Amazing-Python-Scripts
155 строк · 5.1 Кб
1# -*- coding: utf-8 -*-
2"""
3Created on Sun Jun 6 10:57:01 2021
4
5@author: Ayush
6"""
7
8# Import Dependencies
9import random
10import numpy as np
11import flappy_bird_gym
12from collections import deque
13from tensorflow.keras.layers import Input, Dense
14from tensorflow.keras.models import load_model, save_model, Sequential
15from tensorflow.keras.optimizers import RMSprop
16
17# Neural Network for Agent
18
19
20def NeuralNetwork(input_shape, output_shape):
21model = Sequential()
22model.add(Input(input_shape))
23model.add(Dense(512, input_shape=input_shape,
24activation='relu', kernel_initializer='he_uniform'))
25model.add(Dense(256, activation='relu', kernel_initializer='he_uniform'))
26model.add(Dense(64, activation='relu', kernel_initializer='he_uniform'))
27model.add(Dense(output_shape, activation='linear',
28kernel_initializer='he_uniform'))
29model.compile(loss='mse', optimizer=RMSprop(
30lr=0.0001, rho=0.95, epsilon=0.01), metrics=['accuracy'])
31model.summary()
32return model
33
34
35# Brain of Agent || BluePrint of Agent
36
37class DQNAgent:
38def __init__(self):
39self.env = flappy_bird_gym.make("FlappyBird-v0")
40self.episodes = 1000
41self.state_space = self.env.observation_space.shape[0]
42self.action_space = self.env.action_space.n
43self.memory = deque(maxlen=2000)
44
45# Hyperparameters
46self.gamma = 0.95
47self.epsilon = 1
48self.epsilon_decay = 0.9999
49self.epsilon_min = 0.01
50self.batch_number = 64 # 16, 32, 128, 256
51
52self.train_start = 1000
53self.jump_prob = 0.01
54self.model = NeuralNetwork(input_shape=(
55self.state_space,), output_shape=self.action_space)
56
57def act(self, state):
58if np.random.random() > self.epsilon:
59return np.argmax(self.model.predict(state))
60return 1 if np.random.random() < self.jump_prob else 0
61
62def learn(self):
63# Make sure we have enough data
64if len(self.memory) < self.train_start:
65return
66
67# Create minibatch
68minibatch = random.sample(self.memory, min(
69len(self.memory), self.batch_number))
70# Variables to store minibatch info
71state = np.zeros((self.batch_number, self.state_space))
72next_state = np.zeros((self.batch_number, self.state_space))
73
74action, reward, done = [], [], []
75
76# Store data in variables
77for i in range(self.batch_number):
78state[i] = minibatch[i][0]
79action.append(minibatch[i][1])
80reward.append(minibatch[i][2])
81next_state[i] = minibatch[i][3]
82done.append(minibatch[i][4])
83
84# Predict y label
85target = self.model.predict(state)
86target_next = self.model.predict(next_state)
87
88for i in range(self.batch_number):
89if done[i]:
90target[i][action[i]] = reward[i]
91else:
92target[i][action[i]] = reward[i] + \
93self.gamma * (np.argmax(target_next[i]))
94print('training')
95self.model.fit(state, target, batch_size=self.batch_number, verbose=0)
96
97def train(self):
98# n episode Iterartions for training
99for i in range(self.episodes):
100# Environment variables for training
101state = self.env.reset()
102state = np.reshape(state, [1, self.state_space])
103done = False
104score = 0
105self.epsilon = self.epsilon * self.epsilon_decay if self.epsilon * \
106self.epsilon_decay > self.epsilon_min else self.epsilon_min
107
108while not done:
109self.env.render()
110action = self.act(state)
111next_state, reward, done, info = self.env.step(action)
112
113# reshape nextstate
114next_state = np.reshape(next_state, [1, self.state_space])
115score += 1
116if done:
117reward -= 100
118
119self.memory.append((state, action, reward, next_state, done))
120state = next_state
121
122if done:
123print("Episode: {}\nScore: {}\nEpsilon: {:.2}".format(
124i, score, self.epsilon))
125# Save model
126if score >= 1000:
127self.model.save_model('flappybrain.h5')
128self.learn()
129
130def perform(self):
131self.model = load_model('flappybrain.h5')
132while 1:
133state = self.env.reset()
134state = np.reshape(state, [1, self.state_space])
135done = False
136score = 0
137
138while not done:
139self.env.render()
140action = np.argmax(self.model.predict(state))
141next_state, reward, done, info = self.env.step(action)
142state = np.reshape(next_state, [1, self.state_space])
143score += 1
144
145print("Current Score: {}".format(score))
146
147if done:
148print('DEAD')
149break
150
151
152if __name__ == '__main__':
153agent = DQNAgent()
154agent.train()
155# agent.perform()
156