-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
280 lines (209 loc) · 8.92 KB
/
main.py
File metadata and controls
280 lines (209 loc) · 8.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
import argparse
import json
import sys
from multiprocessing import Process, Manager, Lock
from statistics import mean
from typing import List, Tuple
import matplotlib.pyplot as plt
import numpy as np
from agent import Agent
from game import Game
from generate_game_rules import generate_game_rules
from rules import Rules
class Main(object):
def __init__(self):
parser = argparse.ArgumentParser(
description='Sparse q-learning',
usage='''main <command> [<args>]
The commands are:
learn Let the agents learn a policy during n episodes
play Play the game with a learned policy
test Test the performance of the learning
''')
parser.add_argument('mode', help='mode to run')
# check the mode
args = parser.parse_args(sys.argv[1:2])
if not hasattr(self, args.mode):
print('Unrecognized mode')
parser.print_help()
exit(1)
# invoke method with same name
getattr(self, args.mode)()
def learn(self):
# manage the arguments
parser = argparse.ArgumentParser(
description='Let the agents learn a policy during n episodes')
parser.add_argument('directory',
help="directory to store the rules file")
parser.add_argument('-e', help="number of episode", default=100000, type=int)
parser.add_argument('-g', help="grid size", default=4, type=int)
args = parser.parse_args(sys.argv[2:])
print('Running learn mode, episode={}, grid={}'.format(args.e, args.g))
# run the learn mode with the arguments
n_episode = args.e
grid = (args.g, args.g)
directory = args.directory
learn_mode(n_episode, grid, directory)
def play(self):
# manage the arguments
parser = argparse.ArgumentParser(
description='Play the game with a learned policy')
parser.add_argument('directory', help="directory of the rules file")
parser.add_argument('name', help="name of the rules file")
parser.add_argument('-g', help="grid size", default=4, type=int)
args = parser.parse_args(sys.argv[2:])
print('Running play mode, grid={}, directory={} name={}'.format(args.g, args.directory, args.name))
# run the play mode with the arguments
grid = (args.g, args.g)
play_mode(grid, args.directory, args.name)
def test(self):
parser = argparse.ArgumentParser(
description='Test the performance of the learning')
parser.add_argument('-e', help="number of episode", default=100000, type=int)
parser.add_argument('-r', help="number of run", default=25, type=int)
parser.add_argument('-g', help="grid size", default=4, type=int)
parser.add_argument('-v', '--verbose', action='store_true')
args = parser.parse_args(sys.argv[2:])
print('Running test mode, grid={}, run={}, episode={}'.format(args.g, args.r, args.e))
# run the test mode with the arguments
grid = (args.g, args.g)
n_episode = args.e
n_run = args.r
verbose = args.verbose
test_mode(n_episode, n_run, grid, verbose)
def learn_mode(n_episode, grid, directory):
nrow, ncol = grid
# create a game
game = Game(nrow, ncol)
# create a specific context graph/rules
rules = generate_game_rules(ncol)
# create predators
predators = [Agent(0, rules), Agent(1, rules)]
# run n episodes
run_episodes(n_episode, game, rules, predators)
ncol, nrow = grid
file_name = "{}_{}_grid".format(ncol, nrow)
rules.save_rules(directory=directory, name=file_name)
def play_mode(grid, directory, file_name):
ncol, nrow = grid
# create a game
game = Game(ncol, nrow)
# create a specific context graph and load rules
rules = Rules()
rules.load_rules(directory=directory, name=file_name)
# create predators
predators = [Agent(0, rules), Agent(1, rules)]
capture = False
while not capture:
state = game.state
# compute the action of the predators
j_action = dict()
for predator in predators:
j_action[predator.pred_id] = predator.get_action_choice(state, 0.)
# play the actions and get the reward and the next state
_, _, capture = game.play(j_action)
# print grid
game.print()
choice = ""
while choice != "s" and choice != "n":
choice = input("n -> next episode, s -> stop : ")
print(choice)
if choice == "s":
break
def test_mode(n_episode: int, n_run: int, grid: Tuple[int, int], verbose=False, size_interval: int = 500):
def f_run(run_times, test_games, size_interval,
n_episode, line_to_up, run, lock, verbose):
# create a specific context graph/rules
rules = generate_game_rules(grid[0])
# create predator agents
predators = [Agent(0, rules), Agent(1, rules)]
# game used to run episodes
learn_game = Game(grid[0], grid[1])
n_interval = int(n_episode / size_interval)
times = [] # store capture time for each tests in a run
time = make_capture_test(predators, test_games)
times.append(time)
for i in range(n_interval):
run_episodes(size_interval, learn_game, rules, predators)
time = make_capture_test(predators, test_games)
times.append(time)
if verbose:
lock.acquire()
for line in range(line_to_up):
sys.stdout.write("\033[F")
sys.stdout.write("\033[K")
sys.stdout.write("run {} : {} %".format((run + 1), ((i + 1) / n_interval) * 100))
for line in range(line_to_up):
print("\r")
lock.release()
run_times.append(times)
# generate 100 random initial game states
ncol, nrow = grid
test_games = [Game(nrow, ncol) for _ in range(100)]
with Manager() as manager:
run_times = manager.list() # <-- can be shared between processes.
processes = []
N_PROCESS = 8
lock = Lock()
for run in range(n_run):
p = Process(target=f_run, args=(run_times, test_games,
size_interval, n_episode,
(n_run - run), run, lock, verbose))
if verbose:
print("run {} : {} %".format((run + 1), 0))
p.start()
processes.append(p)
for p in processes:
p.join()
# average the results over the runs
avg = [float(sum(col)) / len(col) for col in zip(*run_times)]
episode = np.arange(0, n_episode + size_interval, size_interval).tolist()
plt.plot(episode, avg)
plt.xlabel("learning episode")
plt.ylabel("capture/episode")
plt.title("Evolution of cooperation")
plt.savefig('images/plots/{}_{}_grid.png'.format(nrow, ncol))
data = {"avg": avg, "episode": episode}
with open('json/{}_{}_grid.json'.format(nrow, ncol), 'w') as outfile:
json.dump(data, outfile)
def run_episodes(n_episode: int, game: Game, rules: Rules, predators: List[Agent]):
# learning parameters
gamma = 0.9
epsilon = 0.2
alpha = 0.3
for episode in range(n_episode):
# reset game to a random initial state
game.reset(random_state=True)
capture = False
while not capture:
state = game.state
# compute the action of the predators
j_action = dict()
for predator in predators:
j_action[predator.pred_id] = predator.get_action_choice(state, epsilon)
# play the actions and get the reward and the next state
next_state, rewards, capture = game.play(j_action)
q_values = {predator.pred_id: predator.q_value(state) for predator in predators}
if not capture:
future_rewards = {predator.pred_id: predator.q_value(next_state) for predator in predators}
else:
future_rewards = {predator.pred_id: 0 for predator in predators}
rules.update_rule_values(state, j_action, rewards, q_values, future_rewards, alpha, gamma)
def make_capture_test(predators: List[Agent], test_games: List[Game]):
capture_times = []
for game in test_games:
game.reset()
capture = False
while not capture:
state = game.state
# compute the action of the predators
j_action = dict()
for predator in predators:
j_action[predator.pred_id] = predator.get_action_choice(state, 0.)
# play the actions and get the reward and the next state
_, _, capture = game.play(j_action)
capture_times.append(game.round)
mean_capture_time = mean(capture_times)
return mean_capture_time
if __name__ == '__main__':
Main()