-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexpectimax.py
151 lines (129 loc) · 4.95 KB
/
expectimax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# Simulation to test the game
# This makes fully random moves till the game ends
from game import GameManager
from game import Moves
import random
from statistics import mean, variance, stdev, mode
import time
import sys
T=10
DEBUG = False
class ExpectimaxAgent():
def __init__(self, N=4):
self.scores = {}
self.N = N
self.depth = 2
def encode(self, grid):
key = ','.join([str(tile) for row in grid for tile in row])
return key
def simulation_random_move(self, sim_gm, depth, h_score):
tot_score = 0
if sim_gm.isOver():
return tot_score
empty_cell_list = sim_gm.getEmptyCells()
pcell = 1/len(empty_cell_list)
for pos in empty_cell_list:
#Try assigning 4 in current empty slot
grid = sim_gm.getCurrentState()
grid[pos[0]][pos[1]] = 4
tot_score = tot_score + 0.1*pcell*self.expectimax(grid, depth-1, False, h_score)
grid[pos[0]][pos[1]] = 0
grid[pos[0]][pos[1]] = 2
tot_score = tot_score + 0.9*pcell*self.expectimax(grid, depth-1, False, h_score)
grid[pos[0]][pos[1]] = 0
return tot_score
def heuristic(self, sim_gm, h_score):
#set using trail and error
w_freecell = 2.5
w_monotone = 0.75
w_maxtile = 1
w_smooth = 0.5
key = self.encode(sim_gm.getCurrentState())
if key in self.scores:
return self.scores[key]
final_score = 0 #h_score
final_score += w_freecell * sim_gm.evalFreeCells()
final_score += w_smooth * sim_gm.evalSmoothness()
final_score += w_monotone*sim_gm.evalMonotone_simple()
final_score += w_maxtile * sim_gm.evalMaxTile()
# final_score += sim_gm.evalMonotone()
if random.randint(0,100) == 0 and DEBUG == True:
sim_gm.printState()
print("Free Cell %.3f"%(sim_gm.evalFreeCells()))
print("Smoothness %.3f"%(sim_gm.evalSmoothness()))
print("Monotone Simple %.3f"%(sim_gm.evalMonotone_simple()))
print("Max Tile %d"%(sim_gm.evalMaxTile()))
print("Score %d"%(h_score))
self.scores[key] = final_score
return self.scores[key]
def expectimax(self, grid, depth, is_random, h_score):
sim_gm = GameManager(grid)
# sim_gm.enableRandomTile(False) # May not be needed
if depth == 0 or sim_gm.isOver():
return self.heuristic(sim_gm, h_score)
elif is_random:
return self.simulation_random_move(sim_gm, depth, h_score)
else:
# our move
max_score = float('-inf')
moves = sim_gm.getAvailableMoves()
for game_move in moves:
current_grid, current_score = sim_gm.tryMove(grid, game_move)
max_score = max(max_score, self.expectimax(current_grid, depth, True, h_score + current_score))
return max_score
return -1
def getNextMove(self, grid, moves):
best_move = moves[0]
best_sc = float('-inf')
sim_gm = GameManager(grid)
for move in moves:
current_grid, current_score = sim_gm.tryMove(grid, move)
depth = self.depth
if sim_gm.getNoOfEmptyCells() <= 2:
depth += 1
current_score = self.expectimax(current_grid, depth, True, current_score)
# print("Score %.3f:%s"%(current_score, move))
if current_score > best_sc:
best_move = move
best_sc = current_score
# print("")
return best_move
def run():
game = GameManager()
gt = game.getGameTracker()
ai = ExpectimaxAgent(4) # 4x4 tile game
while not game.isOver():
moves = game.getAvailableMoves()
# random.shuffle(moves)
next_move = ai.getNextMove(game.getCurrentState(), moves)
game.makeMove(next_move)
# print("Move made: %s"%(next_move))
# if not DEBUG:
# game.printState()
game.printState()
print("Final Score: %d"%(game.getScore()))
print("Max Tile: %d"%(gt.getMaxTile()))
print("No. Of Moves: %d"%(gt.getNoOfMoves()))
print("Time per move: %0.5f"%(gt.getTimePerMove()))
# print(ai.scores)
return gt
if __name__ == "__main__":
print("Expectimax Bot")
t_scores = []
ex_times = []
max_tile = []
num_moves = []
while T > 0:
gt = run()
t_scores.append(gt.getScore())
ex_times.append(gt.getTimePerMove())
max_tile.append(gt.getMaxTile())
num_moves.append(gt.getNoOfMoves())
T = T-1
print("Final Analysis:")
print("Mean Score: %.3f"%(mean(t_scores)))
print("Max Score: %d"%(max(t_scores)))
print("Std Deviation of Scores: %.3f"%(stdev(t_scores)))
print("Average Time taken per move(sec): %.4f"%(mean(ex_times)))
print("Average No. of moves: %.4f"%(mean(num_moves)))
print("Most common Max Tile reached: %d"%(mode(max_tile)))