Amazing-Python-Scripts
76 строк · 2.2 Кб
1# -*- coding: utf-8 -*-
2"""
3Created on Thu Jun 3 13:06:20 2021
4@author: Ayush
5"""
6
7import numpy as np
8from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
9from rl.memory import SequentialMemory
10from rl.agents import DQNAgent
11from tensorflow.keras.optimizers import Adam
12from tensorflow.keras.layers import Dense, Flatten, Conv2D
13from tensorflow.keras.models import Sequential
14import gym
15env = gym.make("SpaceInvaders-v0")
16
17episodes = 10
18
19for episode in range(1, episodes):
20state = env.reset()
21done = False
22score = 0
23
24while not done:
25env.render()
26state, reward, done, info = env.step(env.action_space.sample())
27score += reward
28print('Episode: {}\nScore: {}'.format(episode, score))
29env.close()
30
31# Import Neural Network Packages
32
33
34def build_model(height, width, channels, actions):
35model = Sequential()
36model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu',
37input_shape=(3, height, width, channels)))
38model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
39model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
40model.add(Flatten())
41model.add(Dense(512, activation='relu'))
42model.add(Dense(256, activation='relu'))
43model.add(Dense(64, activation='relu'))
44model.add(Dense(actions, activation='linear'))
45return model
46
47
48height, width, channels = env.observation_space.shape
49actions = env.action_space.n
50
51model = build_model(height, width, channels, actions)
52
53
54# Importing keras-rl2 reinforcement learning functions
55
56def build_agent(model, actions):
57policy = LinearAnnealedPolicy(EpsGreedyQPolicy(
58), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
59memory = SequentialMemory(limit=2000, window_length=3)
60dqn = DQNAgent(model=model, memory=memory, policy=policy, enable_dueling_network=True,
61dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000)
62return dqn
63
64
65dqn = build_agent(model, actions)
66
67
68dqn.compile(Adam(lr=0.001))
69
70dqn.fit(env, nb_steps=40000, visualize=True, verbose=1)
71
72scores = dqn.test(env, nb_episodes=10, visualize=True)
73print(np.mean(scores.history['episode_reward']))
74
75dqn.save_weights('models/dqn.h5f')
76dqn.load_weights('models/dqn.h5f')
77