google-research · zdebeurs · May 31, 2019 · May 31, 2019 · May 31, 2019 · May 31, 2019
diff --git a/.DS_Store b/.DS_Store
diff --git a/Figure4.gif b/Figure4.gif
diff --git a/exoplanet-ml/.DS_Store b/exoplanet-ml/.DS_Store
diff --git a/exoplanet-ml/astronet/.DS_Store b/exoplanet-ml/astronet/.DS_Store
diff --git a/exoplanet-ml/rv_net/.DS_Store b/exoplanet-ml/rv_net/.DS_Store
diff --git a/exoplanet-ml/rv_net/README.md b/exoplanet-ml/rv_net/README.md
@@ -0,0 +1,5 @@
+# Identifying Exoplanets with Deep Learning. IV. Removing Stellar Activity Signals from Radial Velocity Measurements Using Neural Networks
+
+![HARSP Observations Animated](pics/Figure4.gif)
+
+**Figure 1.** HARPS-N ΔCCFs -- Computed residual CCFs (ΔCCFs) by subtracting the mean CCF, highlighting differences in features between CCFs. For training the model, ΔCCFs is the input and the RV from stellar activity is the output. Radial velocity is indicated by its color (red = redshifted, blue = blueshifted)
diff --git a/exoplanet-ml/rv_net/__pycache__/data.cpython-36.pyc b/exoplanet-ml/rv_net/__pycache__/data.cpython-36.pyc
diff --git a/exoplanet-ml/rv_net/__pycache__/estimator_util.cpython-36.pyc b/exoplanet-ml/rv_net/__pycache__/estimator_util.cpython-36.pyc
diff --git a/exoplanet-ml/rv_net/__pycache__/rv_model.cpython-36.pyc b/exoplanet-ml/rv_net/__pycache__/rv_model.cpython-36.pyc
diff --git a/exoplanet-ml/rv_net/data.py b/exoplanet-ml/rv_net/data.py
@@ -0,0 +1,76 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+class DatasetBuilder(object):
+    """Dataset builder class."""
+
+    def __init__(self, file_pattern, hparams, mode, repeat=1):
+        """Initializes the dataset builder.
+
+        Args:
+          file_pattern: File pattern matching input file shards, e.g.
+            "/tmp/train-?????-of-00100".
+          hparams: A ConfigDict.
+          mode: A tf.estimator.ModeKeys.
+          repeat: The number of times to repeat the dataset. If None, the dataset
+            will repeat indefinitely.
+        """
+        valid_modes = [
+            tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL,
+            tf.estimator.ModeKeys.PREDICT
+        ]
+        if mode not in valid_modes:
+            raise ValueError("Expected mode in {}. Got: {}".format(valid_modes, mode))
+
+        self.file_pattern = file_pattern
+        self.hparams = hparams
+        self.mode = mode
+        self.repeat = repeat
+
+    def __call__(self):
+        is_training = self.mode == tf.estimator.ModeKeys.TRAIN
+
+        # Dataset of file names.
+        filename_dataset = tf.data.Dataset.list_files(self.file_pattern,
+                                                      shuffle=is_training)
+
+        # Dataset of serialized tf.Examples.
+        dataset = filename_dataset.flat_map(tf.data.TFRecordDataset)
+
+        # Shuffle in training mode.
+        if is_training:
+            dataset = dataset.shuffle(self.hparams.shuffle_values_buffer)
+
+        # Possibly repeat.
+        if self.repeat != 1:
+            dataset = dataset.repeat(self.repeat)
+
+        def _example_parser(serialized_example):
+            """Parses a single tf.Example into feature and label tensors."""
+            data_fields = {
+                self.hparams.ccf_feature_name: tf.FixedLenFeature([401], tf.float32),
+                self.hparams.label_feature_name: tf.FixedLenFeature([], tf.float32),
+            }
+            parsed_fields = tf.parse_single_example(serialized_example, features=data_fields)
+            ccf_data = parsed_fields[self.hparams.ccf_feature_name]
+            label = parsed_fields[self.hparams.label_feature_name]
+            label *= self.hparams.label_rescale_factor  # Rescale the label.
+            return {
+                "ccf_data": ccf_data,
+                "label": label,
+            }
+
+        # Map the parser over the dataset.
+        dataset = dataset.map(_example_parser, num_parallel_calls=4)
+
+        # Batch results by up to batch_size.
+        dataset = dataset.batch(self.hparams.batch_size)
+
+        # Prefetch a few batches.
+        dataset = dataset.prefetch(10)
+
+        return dataset
diff --git a/exoplanet-ml/rv_net/data_HARPS_N.py b/exoplanet-ml/rv_net/data_HARPS_N.py
@@ -0,0 +1,79 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+class DatasetBuilder(object):
+    """Dataset builder class."""
+
+    def __init__(self, file_pattern, hparams, mode, repeat=1):
+        """Initializes the dataset builder.
+
+        Args:
+          file_pattern: File pattern matching input file shards, e.g.
+            "/tmp/train-?????-of-00100".
+          hparams: A ConfigDict.
+          mode: A tf.estimator.ModeKeys.
+          repeat: The number of times to repeat the dataset. If None, the dataset
+            will repeat indefinitely.
+        """
+        valid_modes = [
+            tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL,
+            tf.estimator.ModeKeys.PREDICT
+        ]
+        if mode not in valid_modes:
+            raise ValueError("Expected mode in {}. Got: {}".format(valid_modes, mode))
+
+        self.file_pattern = file_pattern
+        self.hparams = hparams
+        self.mode = mode
+        self.repeat = repeat
+
+    def __call__(self):
+        is_training = self.mode == tf.estimator.ModeKeys.TRAIN
+
+        # Dataset of file names.
+        filename_dataset = tf.data.Dataset.list_files(self.file_pattern,
+                                                      shuffle=is_training)
+
+        # Dataset of serialized tf.Examples.
+        dataset = filename_dataset.flat_map(tf.data.TFRecordDataset)
+
+        # Shuffle in training mode.
+        if is_training:
+            dataset = dataset.shuffle(self.hparams.shuffle_values_buffer)
+
+        # Possibly repeat.
+        if self.repeat != 1:
+            dataset = dataset.repeat(self.repeat)
+
+        def _example_parser(serialized_example):
+            """Parses a single tf.Example into feature and label tensors."""
+            data_fields = {
+                self.hparams.ccf_feature_name: tf.io.FixedLenFeature([161], tf.float32),
+                self.hparams.label_feature_name: tf.io.FixedLenFeature([], tf.float32),
+                self.hparams.label_feature_name2: tf.io.FixedLenFeature([], tf.float32),
+            }
+            parsed_fields = tf.io.parse_single_example(serialized_example, features=data_fields)
+            ccf_data = parsed_fields[self.hparams.ccf_feature_name]
+            label = parsed_fields[self.hparams.label_feature_name]
+            label *= self.hparams.label_rescale_factor  # Rescale the label.
+            label2 = parsed_fields[self.hparams.label_feature_name2]
+            return {
+                "ccf_data": ccf_data,
+                "label": label,
+                "bjd": label2,
+            }
+
+        # Map the parser over the dataset.
+        dataset = dataset.map(_example_parser, num_parallel_calls=4)
+
+        # Batch results by up to batch_size.
+        dataset = dataset.batch(self.hparams.batch_size)
+
+        # Prefetch a few batches.
+        dataset = dataset.prefetch(10)
+
+        return dataset
diff --git a/exoplanet-ml/rv_net/estimator_util.py b/exoplanet-ml/rv_net/estimator_util.py
@@ -0,0 +1,94 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from astronet.ops import training
+
+def create_learning_rate(hparams, global_step):
+  """Creates a learning rate Tensor.
+
+  Args:
+    hparams: ConfigDict containing the learning rate configuration.
+    global_step: The global step Tensor.
+
+  Returns:
+    A learning rate Tensor.
+  """
+  if hparams.get("learning_rate_decay_steps"):
+    # Linear decay from hparams.learning_rate to 0.
+    learning_rate = tf.train.polynomial_decay(
+        learning_rate=hparams.learning_rate,
+        global_step=global_step,
+        decay_steps=hparams.learning_rate_decay_steps,
+        end_learning_rate=0,
+        power=1.0)
+  else:
+    learning_rate = tf.constant(hparams.learning_rate)
+
+  return learning_rate
+
+
+def sum_metric(values, name=None):
+  with tf.variable_scope(name, 'sum', (values,)):
+    values = tf.convert_to_tensor(values)
+    total = tf.get_variable(
+        'total',
+        initializer=tf.zeros([], dtype=values.dtype),
+        trainable=False,
+        collections=[tf.GraphKeys.LOCAL_VARIABLES,
+                     tf.GraphKeys.METRIC_VARIABLES])
+    update_total = tf.assign_add(total, tf.reduce_sum(values))
+    return total.value(), update_total
+
+
+class ModelFn(object):
+  """Class that acts as a callable model function for Estimator train / eval."""
+
+  def __init__(self, model_class, hparams):
+    """Initializes the model function.
+
+    Args:
+      model_class: Model class.
+      hparams: A HParams object containing hyperparameters for building and
+        training the model.
+    """
+    self.model_class = model_class
+    self.hparams = hparams
+
+  def __call__(self, features, mode):
+    """Builds the model and returns an EstimatorSpec."""
+    model = self.model_class(features, self.hparams, mode)
+    model.build()
+    print(model.summary)
+    print("___________ starting new epoch___________")
+    # Possibly create train_op.
+    train_op = None
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      learning_rate = create_learning_rate(self.hparams, model.global_step)
+      optimizer = training.create_optimizer(self.hparams, learning_rate)
+      train_op = training.create_train_op(model, optimizer)
+
+    # Possibly create evaluation metrics.
+    eval_metrics = None
+    if mode == tf.estimator.ModeKeys.EVAL:
+      eval_metrics = {
+        "num_examples": sum_metric(tf.ones_like(model.label, dtype=tf.int32)),
+        "num_eval_batches": sum_metric(1),
+        "rmse": tf.metrics.root_mean_squared_error(
+            model.label, model.predicted_rv),
+        "root_mean_label": tf.metrics.root_mean_squared_error(
+            model.label, tf.zeros_like(model.label)),
+        "root_mean_pred": tf.metrics.root_mean_squared_error(
+            model.predicted_rv, tf.zeros_like(model.predicted_rv)),
+      }
+
+    return tf.estimator.EstimatorSpec(
+        mode=mode,
+        predictions={"ccf_data": model.ccf_data,
+  		     "label": model.label,
+                     "predicted_rv": model.predicted_rv},
+        loss=model.total_loss,
+        train_op=train_op,
+	eval_metric_ops=eval_metrics)
diff --git a/exoplanet-ml/rv_net/exoplanet-ml b/exoplanet-ml/rv_net/exoplanet-ml