Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Documenting main.py and MCTS args #300

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions MCTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,19 @@

import numpy as np

EPS = 1e-8
from utils import dotdict

EPS = 1e-8 # For numerical stability

log = logging.getLogger(__name__)

default_args = dotdict(
{
"numMCTSSims": 25, # Number of moves for MCTS to simulate.
"cpuct": 1.0, # PUCT exploration constant
}
)


class MCTS():
"""
Expand Down Expand Up @@ -38,7 +47,10 @@ def getActionProb(self, canonicalBoard, temp=1):
self.search(canonicalBoard)

s = self.game.stringRepresentation(canonicalBoard)
counts = [self.Nsa[(s, a)] if (s, a) in self.Nsa else 0 for a in range(self.game.getActionSize())]
counts = [
self.Nsa[(s, a)] if (s, a) in self.Nsa else 0
for a in range(self.game.getActionSize())
]

if temp == 0:
bestAs = np.array(np.argwhere(counts == np.max(counts))).flatten()
Expand All @@ -47,7 +59,7 @@ def getActionProb(self, canonicalBoard, temp=1):
probs[bestA] = 1
return probs

counts = [x ** (1. / temp) for x in counts]
counts = [x ** (1.0 / temp) for x in counts]
counts_sum = float(sum(counts))
probs = [x / counts_sum for x in counts]
return probs
Expand Down Expand Up @@ -92,7 +104,7 @@ def search(self, canonicalBoard):
# if all valid moves were masked make all valid moves equally probable

# NB! All valid moves may be masked if either your NNet architecture is insufficient or you've get overfitting or something else.
# If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.
# If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.
log.error("All valid moves were masked, doing a workaround.")
self.Ps[s] = self.Ps[s] + valids
self.Ps[s] /= np.sum(self.Ps[s])
Expand All @@ -102,7 +114,7 @@ def search(self, canonicalBoard):
return -v

valids = self.Vs[s]
cur_best = -float('inf')
cur_best = -float("inf")
best_act = -1

# pick the action with the highest upper confidence bound
Expand Down Expand Up @@ -134,3 +146,4 @@ def search(self, canonicalBoard):

self.Ns[s] += 1
return -v

28 changes: 16 additions & 12 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,24 @@
coloredlogs.install(level='INFO') # Change this to DEBUG to see more info.

args = dotdict({
'numIters': 1000,
'numEps': 100, # Number of complete self-play games to simulate during a new iteration.
'tempThreshold': 15, #
'updateThreshold': 0.6, # During arena playoff, new neural net will be accepted if threshold or more of games are won.
'maxlenOfQueue': 200000, # Number of game examples to train the neural networks.
'numMCTSSims': 25, # Number of games moves for MCTS to simulate.
'arenaCompare': 40, # Number of games to play during arena play to determine if new net will be accepted.
'cpuct': 1,

'checkpoint': './temp/',
'numIters': 1000, # Number of training iterations
'numEps': 100, # Number of self-play games per training iteration
'tempThreshold': 15, # Number of iterations to pass before increasing MCTS temp by 1
'updateThreshold': 0.6, # Threshold win percentage of arena games to accept a new neural network
'maxlenOfQueue': 200_000, # Number of game examples to train the neural networks.
'numMCTSSims': 25, # Number of moves for MCTS to simulate.
'arenaCompare': 40, # Number of arena games to assess neural network for acceptance
'cpuct': 1, # PUCT exploration constant

'checkpoint': './temp/', # Folder name to save checkpoints
# Set True to load in the model weights from checkpoint and training
# examples from the load_folder_file
'load_model': False,
'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'),
# Two-tuple of folder and filename where training examples are housed
'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'),
# Max amount of training examples to keep in the history, dropping the
# oldest example beyond that before adding a new one (like a FIFO queue)
'numItersForTrainExamplesHistory': 20,

})


Expand Down