suragnair · jamesbraza · Mar 13, 2023 · Mar 13, 2023
diff --git a/MCTS.py b/MCTS.py
@@ -3,10 +3,19 @@
 
 import numpy as np
 
-EPS = 1e-8
+from utils import dotdict
+
+EPS = 1e-8  # For numerical stability
 
 log = logging.getLogger(__name__)
 
+default_args = dotdict(
+    {
+        "numMCTSSims": 25,  # Number of moves for MCTS to simulate.
+        "cpuct": 1.0,       # PUCT exploration constant
+    }
+)
+
 
 class MCTS():
     """
@@ -38,7 +47,10 @@ def getActionProb(self, canonicalBoard, temp=1):
             self.search(canonicalBoard)
 
         s = self.game.stringRepresentation(canonicalBoard)
-        counts = [self.Nsa[(s, a)] if (s, a) in self.Nsa else 0 for a in range(self.game.getActionSize())]
+        counts = [
+            self.Nsa[(s, a)] if (s, a) in self.Nsa else 0
+            for a in range(self.game.getActionSize())
+        ]
 
         if temp == 0:
             bestAs = np.array(np.argwhere(counts == np.max(counts))).flatten()
@@ -47,7 +59,7 @@ def getActionProb(self, canonicalBoard, temp=1):
             probs[bestA] = 1
             return probs
 
-        counts = [x ** (1. / temp) for x in counts]
+        counts = [x ** (1.0 / temp) for x in counts]
         counts_sum = float(sum(counts))
         probs = [x / counts_sum for x in counts]
         return probs
@@ -92,7 +104,7 @@ def search(self, canonicalBoard):
                 # if all valid moves were masked make all valid moves equally probable
 
                 # NB! All valid moves may be masked if either your NNet architecture is insufficient or you've get overfitting or something else.
-                # If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.   
+                # If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.
                 log.error("All valid moves were masked, doing a workaround.")
                 self.Ps[s] = self.Ps[s] + valids
                 self.Ps[s] /= np.sum(self.Ps[s])
@@ -102,7 +114,7 @@ def search(self, canonicalBoard):
             return -v
 
         valids = self.Vs[s]
-        cur_best = -float('inf')
+        cur_best = -float("inf")
         best_act = -1
 
         # pick the action with the highest upper confidence bound
@@ -134,3 +146,4 @@ def search(self, canonicalBoard):
 
         self.Ns[s] += 1
         return -v
+
diff --git a/main.py b/main.py
@@ -12,20 +12,24 @@
 coloredlogs.install(level='INFO')  # Change this to DEBUG to see more info.
 
 args = dotdict({
-    'numIters': 1000,
-    'numEps': 100,              # Number of complete self-play games to simulate during a new iteration.
-    'tempThreshold': 15,        #
-    'updateThreshold': 0.6,     # During arena playoff, new neural net will be accepted if threshold or more of games are won.
-    'maxlenOfQueue': 200000,    # Number of game examples to train the neural networks.
-    'numMCTSSims': 25,          # Number of games moves for MCTS to simulate.
-    'arenaCompare': 40,         # Number of games to play during arena play to determine if new net will be accepted.
-    'cpuct': 1,
-
-    'checkpoint': './temp/',
+    'numIters': 1000,           # Number of training iterations
+    'numEps': 100,              # Number of self-play games per training iteration
+    'tempThreshold': 15,        # Number of iterations to pass before increasing MCTS temp by 1
+    'updateThreshold': 0.6,     # Threshold win percentage of arena games to accept a new neural network
+    'maxlenOfQueue': 200_000,   # Number of game examples to train the neural networks.
+    'numMCTSSims': 25,          # Number of moves for MCTS to simulate.
+    'arenaCompare': 40,         # Number of arena games to assess neural network for acceptance
+    'cpuct': 1,                 # PUCT exploration constant
+
+    'checkpoint': './temp/',    # Folder name to save checkpoints
+    # Set True to load in the model weights from checkpoint and training
+    # examples from the load_folder_file
     'load_model': False,
-    'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'),
+    # Two-tuple of folder and filename where training examples are housed
+    'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'),
+    # Max amount of training examples to keep in the history, dropping the
+    # oldest example beyond that before adding a new one (like a FIFO queue)
     'numItersForTrainExamplesHistory': 20,
-
 })