diff --git a/dual_net.py b/dual_net.py index a23902658..83007a2ac 100644 --- a/dual_net.py +++ b/dual_net.py @@ -284,10 +284,13 @@ def model_fn(features, labels, mode, params): train_op = optimizer.minimize(combined_cost, global_step=global_step) # Computations to be executed on CPU, outside of the main TPU queues. - def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, - value_tensor, policy_cost, value_cost, - l2_cost, combined_cost, step, - est_mode=tf.estimator.ModeKeys.TRAIN): + def eval_metrics_host_call_fn( + avg_stones, avg_stones_delta, + policy_output, value_output, + pi_tensor, value_tensor, + policy_cost, value_cost, + l2_cost, combined_cost, + step, est_mode=tf.estimator.ModeKeys.TRAIN): policy_entropy = -tf.reduce_mean(tf.reduce_sum( policy_output * tf.log(policy_output), axis=1)) # pi_tensor is one_hot when generated from sgfs (for supervised learning) @@ -315,13 +318,16 @@ def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, 'l2_cost': tf.metrics.mean(l2_cost), 'policy_entropy': tf.metrics.mean(policy_entropy), 'combined_cost': tf.metrics.mean(combined_cost), - 'avg_value_observed': tf.metrics.mean(avg_value_observed), 'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1), 'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3), 'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence), + 'value_confidence': tf.metrics.mean(tf.abs(value_output)), + + # Metrics about input data 'policy_target_top_1_confidence': tf.metrics.mean( policy_target_top_1_confidence), - 'value_confidence': tf.metrics.mean(tf.abs(value_output)), + 'avg_value_observed': tf.metrics.mean(avg_value_observed), + 'avg_stones': tf.metrics.mean(tf.reduce_mean(avg_stones)), } if est_mode == tf.estimator.ModeKeys.EVAL: @@ -339,6 +345,8 @@ def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, for metric_name, metric_op in metric_ops.items(): summary.scalar(metric_name, metric_op[1], step=eval_step) + summary.histogram("avg_stones_delta", avg_stones_delta) + # Reset metrics occasionally so that they are mean of recent batches. reset_op = tf.variables_initializer(tf.local_variables('metrics')) cond_reset_op = tf.cond( @@ -348,7 +356,14 @@ def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, return summary.all_summary_ops() + [cond_reset_op] + # compute here to avoid sending all of features to cpu. + avg_stones_black = tf.reduce_sum(features[:,:,:,1], [1,2]) + avg_stones_white = tf.reduce_sum(features[:,:,:,0], [1,2]) + avg_stones = avg_stones_black + avg_stones_white + avg_stones_delta = avg_stones_black - avg_stones_white + metric_args = [ + avg_stones, avg_stones_delta, policy_output, value_output, labels['pi_tensor'],