-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_deap.py
135 lines (104 loc) · 3.72 KB
/
test_deap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import random
from deap import creator, base, tools, algorithms
import ns
import maze
import copy
import numpy as np
env = maze.Maze(1)
n_s = ns.NoveltySearch(behavior_type='ad_hoc')
playout_count = 0
dummy_count = 0
behv_state = copy.deepcopy(env)
behv_last_visit = copy.deepcopy(env)
behv_rewards = copy.deepcopy(env)
behv_acc_visit = copy.deepcopy(env)
def print_debug_states():
global behv_state
global behv_last_visit
global behv_rewards
global behv_acc_visit
print('accumulative end positions of generation:')
print(behv_state.render())
print('accumulative end positions overall')
print(behv_acc_visit.render())
print('last visited places')
print(behv_last_visit.render())
print('last rewards gained')
print(behv_rewards.render())
def clean_debug_states():
global playout_count
global behv_state
global behv_last_visit
global behv_rewards
playout_count = 0
behv_state = copy.deepcopy(env)
behv_last_visit = copy.deepcopy(env)
behv_rewards = copy.deepcopy(env)
def debug(r, p, state):
global playout_count
global behv_state
global behv_last_visit
global behv_rewards
global behv_acc_visit
behv_state.maze[state.posx, state.posy] += 1
behv_acc_visit.maze[state.posx, state.posy] += 1
behv_last_visit.maze[state.posx, state.posy] = p
behv_rewards.maze[state.posx, state.posy] = r
def evalOneMax(individual):
global playout_count
global dummy_count
run_env = copy.deepcopy(env)
val_reward=0
for act in individual:
_, val_reward, done = run_env.step(act)
if done:
break
behv = (run_env.posx, run_env.posy)
if(behv[0] == 0 and behv[1] == 0):
r = 0
else:
r = n_s.get_approx_novelty(behv, k=5, done=True)
# r=n_s.get_novelty_simple(behv,done=True)
r = r*10
#n_s.set_behavior_in_archive(behv, n_s.behavior_archive, True)
playout_count += 1
debug(r, playout_count, run_env)
return val_reward,
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 3)
toolbox.register("individual", tools.initRepeat,
creator.Individual, toolbox.attr_bool, n=10)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", evalOneMax)
toolbox.register("mate", tools.cxOnePoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=10)
population = toolbox.population(n=100)
NGEN = 100
i = 0
hof = tools.ParetoFront() # a ParetoFront may be used to retrieve the best non dominated individuals of the evolution
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean, axis=0)
stats.register("std", np.std, axis=0)
stats.register("min", np.min, axis=0)
stats.register("max", np.max, axis=0)
algorithms.eaSimple(population, toolbox, 0.7, 0.2, ngen=NGEN, stats=stats, halloffame=hof, verbose=True)
print_debug_states()
"""
for gen in range(NGEN):
i += 1
offspring = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.07)
fits = toolbox.map(toolbox.evaluate, offspring)
for fit, ind in zip(fits, offspring):
ind.fitness.values = fit
population = toolbox.select(offspring, k=len(population))
print("################ Gen ", i, ' ################')
print_debug_states()
clean_debug_states()
"""
top10 = tools.selBest(population, k=10)
for i in range(len(top10)):
print('it ', i, ': ', top10[i])
print()