diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..104eb05 Binary files /dev/null and b/.DS_Store differ diff --git a/Figure4.gif b/Figure4.gif new file mode 100644 index 0000000..adc21be Binary files /dev/null and b/Figure4.gif differ diff --git a/exoplanet-ml/.DS_Store b/exoplanet-ml/.DS_Store new file mode 100644 index 0000000..febeb2d Binary files /dev/null and b/exoplanet-ml/.DS_Store differ diff --git a/exoplanet-ml/astronet/.DS_Store b/exoplanet-ml/astronet/.DS_Store new file mode 100644 index 0000000..517b7a0 Binary files /dev/null and b/exoplanet-ml/astronet/.DS_Store differ diff --git a/exoplanet-ml/rv_net/.DS_Store b/exoplanet-ml/rv_net/.DS_Store new file mode 100644 index 0000000..c3bf109 Binary files /dev/null and b/exoplanet-ml/rv_net/.DS_Store differ diff --git a/exoplanet-ml/rv_net/README.md b/exoplanet-ml/rv_net/README.md new file mode 100644 index 0000000..c354f22 --- /dev/null +++ b/exoplanet-ml/rv_net/README.md @@ -0,0 +1,5 @@ +# Identifying Exoplanets with Deep Learning. IV. Removing Stellar Activity Signals from Radial Velocity Measurements Using Neural Networks + +![HARSP Observations Animated](pics/Figure4.gif) + +**Figure 1.** HARPS-N ΔCCFs -- Computed residual CCFs (ΔCCFs) by subtracting the mean CCF, highlighting differences in features between CCFs. For training the model, ΔCCFs is the input and the RV from stellar activity is the output. Radial velocity is indicated by its color (red = redshifted, blue = blueshifted) diff --git a/exoplanet-ml/rv_net/__pycache__/data.cpython-36.pyc b/exoplanet-ml/rv_net/__pycache__/data.cpython-36.pyc new file mode 100644 index 0000000..c9eaa8b Binary files /dev/null and b/exoplanet-ml/rv_net/__pycache__/data.cpython-36.pyc differ diff --git a/exoplanet-ml/rv_net/__pycache__/estimator_util.cpython-36.pyc b/exoplanet-ml/rv_net/__pycache__/estimator_util.cpython-36.pyc new file mode 100644 index 0000000..0b2de36 Binary files /dev/null and b/exoplanet-ml/rv_net/__pycache__/estimator_util.cpython-36.pyc differ diff --git a/exoplanet-ml/rv_net/__pycache__/rv_model.cpython-36.pyc b/exoplanet-ml/rv_net/__pycache__/rv_model.cpython-36.pyc new file mode 100644 index 0000000..adae7fa Binary files /dev/null and b/exoplanet-ml/rv_net/__pycache__/rv_model.cpython-36.pyc differ diff --git a/exoplanet-ml/rv_net/data.py b/exoplanet-ml/rv_net/data.py new file mode 100644 index 0000000..7b3ecc3 --- /dev/null +++ b/exoplanet-ml/rv_net/data.py @@ -0,0 +1,76 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + + +class DatasetBuilder(object): + """Dataset builder class.""" + + def __init__(self, file_pattern, hparams, mode, repeat=1): + """Initializes the dataset builder. + + Args: + file_pattern: File pattern matching input file shards, e.g. + "/tmp/train-?????-of-00100". + hparams: A ConfigDict. + mode: A tf.estimator.ModeKeys. + repeat: The number of times to repeat the dataset. If None, the dataset + will repeat indefinitely. + """ + valid_modes = [ + tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL, + tf.estimator.ModeKeys.PREDICT + ] + if mode not in valid_modes: + raise ValueError("Expected mode in {}. Got: {}".format(valid_modes, mode)) + + self.file_pattern = file_pattern + self.hparams = hparams + self.mode = mode + self.repeat = repeat + + def __call__(self): + is_training = self.mode == tf.estimator.ModeKeys.TRAIN + + # Dataset of file names. + filename_dataset = tf.data.Dataset.list_files(self.file_pattern, + shuffle=is_training) + + # Dataset of serialized tf.Examples. + dataset = filename_dataset.flat_map(tf.data.TFRecordDataset) + + # Shuffle in training mode. + if is_training: + dataset = dataset.shuffle(self.hparams.shuffle_values_buffer) + + # Possibly repeat. + if self.repeat != 1: + dataset = dataset.repeat(self.repeat) + + def _example_parser(serialized_example): + """Parses a single tf.Example into feature and label tensors.""" + data_fields = { + self.hparams.ccf_feature_name: tf.FixedLenFeature([401], tf.float32), + self.hparams.label_feature_name: tf.FixedLenFeature([], tf.float32), + } + parsed_fields = tf.parse_single_example(serialized_example, features=data_fields) + ccf_data = parsed_fields[self.hparams.ccf_feature_name] + label = parsed_fields[self.hparams.label_feature_name] + label *= self.hparams.label_rescale_factor # Rescale the label. + return { + "ccf_data": ccf_data, + "label": label, + } + + # Map the parser over the dataset. + dataset = dataset.map(_example_parser, num_parallel_calls=4) + + # Batch results by up to batch_size. + dataset = dataset.batch(self.hparams.batch_size) + + # Prefetch a few batches. + dataset = dataset.prefetch(10) + + return dataset diff --git a/exoplanet-ml/rv_net/data_HARPS_N.py b/exoplanet-ml/rv_net/data_HARPS_N.py new file mode 100644 index 0000000..05d72ca --- /dev/null +++ b/exoplanet-ml/rv_net/data_HARPS_N.py @@ -0,0 +1,79 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + + +class DatasetBuilder(object): + """Dataset builder class.""" + + def __init__(self, file_pattern, hparams, mode, repeat=1): + """Initializes the dataset builder. + + Args: + file_pattern: File pattern matching input file shards, e.g. + "/tmp/train-?????-of-00100". + hparams: A ConfigDict. + mode: A tf.estimator.ModeKeys. + repeat: The number of times to repeat the dataset. If None, the dataset + will repeat indefinitely. + """ + valid_modes = [ + tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL, + tf.estimator.ModeKeys.PREDICT + ] + if mode not in valid_modes: + raise ValueError("Expected mode in {}. Got: {}".format(valid_modes, mode)) + + self.file_pattern = file_pattern + self.hparams = hparams + self.mode = mode + self.repeat = repeat + + def __call__(self): + is_training = self.mode == tf.estimator.ModeKeys.TRAIN + + # Dataset of file names. + filename_dataset = tf.data.Dataset.list_files(self.file_pattern, + shuffle=is_training) + + # Dataset of serialized tf.Examples. + dataset = filename_dataset.flat_map(tf.data.TFRecordDataset) + + # Shuffle in training mode. + if is_training: + dataset = dataset.shuffle(self.hparams.shuffle_values_buffer) + + # Possibly repeat. + if self.repeat != 1: + dataset = dataset.repeat(self.repeat) + + def _example_parser(serialized_example): + """Parses a single tf.Example into feature and label tensors.""" + data_fields = { + self.hparams.ccf_feature_name: tf.io.FixedLenFeature([161], tf.float32), + self.hparams.label_feature_name: tf.io.FixedLenFeature([], tf.float32), + self.hparams.label_feature_name2: tf.io.FixedLenFeature([], tf.float32), + } + parsed_fields = tf.io.parse_single_example(serialized_example, features=data_fields) + ccf_data = parsed_fields[self.hparams.ccf_feature_name] + label = parsed_fields[self.hparams.label_feature_name] + label *= self.hparams.label_rescale_factor # Rescale the label. + label2 = parsed_fields[self.hparams.label_feature_name2] + return { + "ccf_data": ccf_data, + "label": label, + "bjd": label2, + } + + # Map the parser over the dataset. + dataset = dataset.map(_example_parser, num_parallel_calls=4) + + # Batch results by up to batch_size. + dataset = dataset.batch(self.hparams.batch_size) + + # Prefetch a few batches. + dataset = dataset.prefetch(10) + + return dataset diff --git a/exoplanet-ml/rv_net/estimator_util.py b/exoplanet-ml/rv_net/estimator_util.py new file mode 100644 index 0000000..00538bf --- /dev/null +++ b/exoplanet-ml/rv_net/estimator_util.py @@ -0,0 +1,94 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from astronet.ops import training + +def create_learning_rate(hparams, global_step): + """Creates a learning rate Tensor. + + Args: + hparams: ConfigDict containing the learning rate configuration. + global_step: The global step Tensor. + + Returns: + A learning rate Tensor. + """ + if hparams.get("learning_rate_decay_steps"): + # Linear decay from hparams.learning_rate to 0. + learning_rate = tf.train.polynomial_decay( + learning_rate=hparams.learning_rate, + global_step=global_step, + decay_steps=hparams.learning_rate_decay_steps, + end_learning_rate=0, + power=1.0) + else: + learning_rate = tf.constant(hparams.learning_rate) + + return learning_rate + + +def sum_metric(values, name=None): + with tf.variable_scope(name, 'sum', (values,)): + values = tf.convert_to_tensor(values) + total = tf.get_variable( + 'total', + initializer=tf.zeros([], dtype=values.dtype), + trainable=False, + collections=[tf.GraphKeys.LOCAL_VARIABLES, + tf.GraphKeys.METRIC_VARIABLES]) + update_total = tf.assign_add(total, tf.reduce_sum(values)) + return total.value(), update_total + + +class ModelFn(object): + """Class that acts as a callable model function for Estimator train / eval.""" + + def __init__(self, model_class, hparams): + """Initializes the model function. + + Args: + model_class: Model class. + hparams: A HParams object containing hyperparameters for building and + training the model. + """ + self.model_class = model_class + self.hparams = hparams + + def __call__(self, features, mode): + """Builds the model and returns an EstimatorSpec.""" + model = self.model_class(features, self.hparams, mode) + model.build() + print(model.summary) + print("___________ starting new epoch___________") + # Possibly create train_op. + train_op = None + if mode == tf.estimator.ModeKeys.TRAIN: + learning_rate = create_learning_rate(self.hparams, model.global_step) + optimizer = training.create_optimizer(self.hparams, learning_rate) + train_op = training.create_train_op(model, optimizer) + + # Possibly create evaluation metrics. + eval_metrics = None + if mode == tf.estimator.ModeKeys.EVAL: + eval_metrics = { + "num_examples": sum_metric(tf.ones_like(model.label, dtype=tf.int32)), + "num_eval_batches": sum_metric(1), + "rmse": tf.metrics.root_mean_squared_error( + model.label, model.predicted_rv), + "root_mean_label": tf.metrics.root_mean_squared_error( + model.label, tf.zeros_like(model.label)), + "root_mean_pred": tf.metrics.root_mean_squared_error( + model.predicted_rv, tf.zeros_like(model.predicted_rv)), + } + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions={"ccf_data": model.ccf_data, + "label": model.label, + "predicted_rv": model.predicted_rv}, + loss=model.total_loss, + train_op=train_op, + eval_metric_ops=eval_metrics) diff --git a/exoplanet-ml/rv_net/exoplanet-ml b/exoplanet-ml/rv_net/exoplanet-ml new file mode 160000 index 0000000..63c0c8d --- /dev/null +++ b/exoplanet-ml/rv_net/exoplanet-ml @@ -0,0 +1 @@ +Subproject commit 63c0c8db70ef7d3aa81465a4db1f77626678ce18 diff --git a/exoplanet-ml/rv_net/master_shifting.py b/exoplanet-ml/rv_net/master_shifting.py new file mode 100644 index 0000000..eddc374 --- /dev/null +++ b/exoplanet-ml/rv_net/master_shifting.py @@ -0,0 +1,196 @@ +import numpy as np +import matplotlib.pyplot as plt +import matplotlib as mpl +from astropy.io import fits +from astropy.io.fits import getheader +from scipy.optimize import curve_fit +from scipy.interpolate import interp1d +import mpyfit +from astropy.io import fits +from scipy.signal import argrelextrema +from scipy.optimize import curve_fit +from scipy.interpolate import interp1d +import csv +import pickle +import math as m +import pandas as pd +import os + + +# Create a master shifting function with a lot of flexibility +# Specifically, is able to: +# - shift to the same reference frame (option, can be disabled) +# - remove a specific planet signal (option, can be disabled) +# - shift to zero or median (both options) + +def master_shifting(bjd, ccfBary, rvh, + ref_frame_shift, # "off" or a specific value in km/s + removed_planet_rvs, # array of rv values for planet signal in km/s OR "NULL" + zero_or_median): # "zero" or "median" + number_of_ccfs = len(ccfBary) + + # HARPS direct data lists + BJD_list = [] + og_ccf_list = [] + rv_from_HARPS_list = [] + v_rad_raw_list = [] + + # mpyfit lists + mu_og_list = [] + mu_jup_list = [] + mu_planet_list = [] + mu_zero_list = [] + CCF_normalized_list = [] + + # CCF lists + compiled_ccf_list = [] + jup_shifted_CCF_data_list = [] + shifted_CCF_list = [] + final_ccf_list = [] + + spline_method = 'quadratic' + for i in range(0, number_of_ccfs): + day_of_observation = bjd[i] + BJD_list.append(day_of_observation) + + # extracts the CCF data and rv from fits + CCF_data = ccfBary[i] + og_ccf_list.append(CCF_data) + rv_from_HARPS = rvh[i] # - bsrv[i] + rv_from_HARPS_list.append(rv_from_HARPS) + + # Finds the local minima using a Gaussian fit + # Define the actual function where A = p[0], mu = p[1], sigma = p[2], c = p[3] + def gauss(x, p): + return -p[0] * np.exp(-(x - p[1]) ** 2 / (2. * p[2] ** 2)) + p[3] + + # A simple minimization function: + def least(p, args): + x, y = args + return gauss(x, p) - y + + parinfo = [{'fixed': False, 'step': 1e-6}, + {'fixed': False, 'step': 1e-4}, + {'fixed': False, 'step': 1e-14}, + {'fixed': False, 'step': 1e-9}] + + # no_shift fit + rv_data = np.linspace(-20, 20, 161) + p_no_shifted = [1., 0.1, 1., 0.5] + pfit_no_shift, results_no_shift = mpyfit.fit(least, p_no_shifted, (rv_data, CCF_data), parinfo) + mu_og = pfit_no_shift[1] + mu_og_list.append(mu_og) + compiled_ccf_list.append(CCF_data) + + # Add in reference frame shift + + if removed_planet_rvs[0] != "NULL": # Remove known planet signals *a priori* + jupiter_shift = removed_planet_rvs[i] + v_rad_raw = rvh[i] + removed_planet_rvs[i] + v_rad_raw_list.append(v_rad_raw) + + # planet removal shift + rv_data_jupiter_shift = rv_data + jupiter_shift # minus sign + f_jup = interp1d(rv_data_jupiter_shift, CCF_data, kind=spline_method, fill_value='extrapolate') + jupiter_shifted_CCF_data = f_jup(rv_data) + jup_shifted_CCF_data_list.append(jupiter_shifted_CCF_data) + compiled_ccf_list.append(jupiter_shifted_CCF_data) + + # fits the shifted by planets removed data + p_shifted_jup = [1., 0.1 + jupiter_shift, 1., 0.5] + pfit_jup, results_jup = mpyfit.fit(least, p_shifted_jup, (rv_data, jupiter_shifted_CCF_data), parinfo) + m = pfit_jup[1] + mu_jup_list.append(m) + + if zero_or_median == "zero": + # Shift to zero + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + + shift_to_zero = -(rv_from_HARPS) + rv_data_shifted = rv_data + shift_to_zero + + f = interp1d(rv_data_shifted, ccf_to_use, kind='cubic', fill_value='extrapolate') + shifted_CCF_data = f(rv_data) + shifted_CCF_list.append(shifted_CCF_data) + compiled_ccf_list.append(shifted_CCF_data) + + # fits the shifted data + p_shifted = [1., 0.1 - shift_to_zero, 1., 0.5] + pfit, results = mpyfit.fit(least, p_shifted, (rv_data, shifted_CCF_data), parinfo) + m_zero = pfit[1] + mu_zero_list.append(m_zero) # -0.1) + else: # shifted to median instead + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + shift_to_median = (np.mean(rvh) - rv_from_HARPS) + rv_data_shifted = rv_data + shift_to_median + + f = interp1d(rv_data_shifted, ccf_to_use, kind='cubic', fill_value='extrapolate') + shifted_CCF_data = f(rv_data) + shifted_CCF_list.append(shifted_CCF_data) + compiled_ccf_list.append(shifted_CCF_data) + + # fits the shifted data + p_shifted = [1., 0.1 - shift_to_median, 1., 0.5] + pfit, results = mpyfit.fit(least, p_shifted, (rv_data, shifted_CCF_data), parinfo) + m_zero = pfit[1] + mu_zero_list.append(m_zero) # -0.1) + else: # Do not remove any planet signals *a priori* + if zero_or_median == "zero": + # Shift to zero + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + + shift_to_zero = -(rv_from_HARPS) + rv_data_shifted = rv_data + shift_to_zero + + f = interp1d(rv_data_shifted, ccf_to_use, kind='cubic', fill_value='extrapolate') + shifted_CCF_data = f(rv_data) + shifted_CCF_list.append(shifted_CCF_data) + compiled_ccf_list.append(shifted_CCF_data) + + # fits the shifted data + p_shifted = [1., 0.1 - shift_to_zero, 1., 0.5] + pfit, results = mpyfit.fit(least, p_shifted, (rv_data, shifted_CCF_data), parinfo) + m_zero = pfit[1] + mu_zero_list.append(m_zero) # -0.1) + else: # shifted to median instead + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + shift_to_median = (np.mean(rvh) - rv_from_HARPS) + rv_data_shifted = rv_data + shift_to_median + + f = interp1d(rv_data_shifted, ccf_to_use, kind='cubic', fill_value='extrapolate') + shifted_CCF_data = f(rv_data) + shifted_CCF_list.append(shifted_CCF_data) + compiled_ccf_list.append(shifted_CCF_data) + + # fits the shifted data + p_shifted = [1., 0.1 - shift_to_median, 1., 0.5] + pfit, results = mpyfit.fit(least, p_shifted, (rv_data, shifted_CCF_data), parinfo) + m_zero = pfit[1] + mu_zero_list.append(m_zero) # -0.1) + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + final_ccf_list.append(ccf_to_use) + + # normalize the CCFs + x_left = ccf_to_use[0:40] + x_right = ccf_to_use[121:161] + x_norm_range = list(x_left) + list(x_right) + CCF_normalized = ccf_to_use * (1 / np.mean(x_norm_range)) + CCF_normalized_list.append(CCF_normalized) + + # Create a dataframe + d = {'BJD': BJD_list, + 'vrad_star': rvh, + 'og_ccf_list': og_ccf_list, + 'jup_shifted_CCF_data_list': jup_shifted_CCF_data_list, + 'zero_shifted_CCF_list': shifted_CCF_list, + 'CCF_normalized_list': CCF_normalized_list, + 'mu_og_list': mu_og_list, + 'mu_jup_list': mu_jup_list, + 'mu_zero_list': mu_zero_list + } + df = pd.DataFrame(data=d) + + return df + +if __name__ == '__main__': + master_shifting() \ No newline at end of file diff --git a/exoplanet-ml/rv_net/master_shifting_planet.py b/exoplanet-ml/rv_net/master_shifting_planet.py new file mode 100644 index 0000000..3d84e48 --- /dev/null +++ b/exoplanet-ml/rv_net/master_shifting_planet.py @@ -0,0 +1,232 @@ +import numpy as np +import matplotlib.pyplot as plt +import matplotlib as mpl +from astropy.io import fits +from astropy.io.fits import getheader +from scipy.optimize import curve_fit +from scipy.interpolate import interp1d +import mpyfit +from astropy.io import fits +from scipy.signal import argrelextrema +from scipy.optimize import curve_fit +from scipy.interpolate import interp1d +import csv +import pickle +import math as m +import pandas as pd +import os + + +# Create a master shifting function with a lot of flexibility +# Specifically, is able to: +# - shift to the same reference frame (option, can be disabled) +# - remove a specific planet signal (option, can be disabled) +# - inject a specific planet signal (option, can be disabled) +# - shift to zero or median (both options) + +def master_shifting_planet(bjd, ccfBary, rvh, + ref_frame_shift, # "off" or a specific value in km/s + removed_planet_rvs, # array of rv values for planet signal in km/s OR "NULL" + injected_planet_params, # array of amplitude (km/s), 2pi/period, phase + zero_or_median): # "zero" or "median" + number_of_ccfs = len(ccfBary) + + # HARPS direct data lists + BJD_list = [] + og_ccf_list = [] + rv_from_HARPS_list = [] + v_rad_raw_list = [] + + # injected planet list + planet_signal_list = [] + + # mpyfit lists + mu_og_list = [] + mu_jup_list = [] + mu_planet_list = [] + mu_zero_list = [] + CCF_normalized_list = [] + + # CCF lists + compiled_ccf_list = [] + jup_shifted_CCF_data_list = [] + planet_shifted_CCF_data_list = [] + shifted_CCF_list = [] + final_ccf_list = [] + + def planet_signal(x): + return injected_planet_params[0] * np.sin(injected_planet_params[1] * x + injected_planet_params[2]) + + spline_method = 'quadratic' + for i in range(0, number_of_ccfs): + day_of_observation = bjd[i] + BJD_list.append(day_of_observation) + + # extracts the CCF data and rv from fits + CCF_data = ccfBary[i] + og_ccf_list.append(CCF_data) + rv_from_HARPS = rvh[i] # - bsrv[i] + rv_from_HARPS_list.append(rv_from_HARPS) + + # Finds the local minima using a Gaussian fit + # Define the actual function where A = p[0], mu = p[1], sigma = p[2], c = p[3] + def gauss(x, p): + return -p[0] * np.exp(-(x - p[1]) ** 2 / (2. * p[2] ** 2)) + p[3] + + # A simple minimization function: + def least(p, args): + x, y = args + return gauss(x, p) - y + + parinfo = [{'fixed': False, 'step': 1e-6}, + {'fixed': False, 'step': 1e-4}, + {'fixed': False, 'step': 1e-14}, + {'fixed': False, 'step': 1e-9}] + + # no_shift fit + rv_data = np.linspace(-20, 20, 161) + p_no_shifted = [1., 0.1, 1., 0.5] + pfit_no_shift, results_no_shift = mpyfit.fit(least, p_no_shifted, (rv_data, CCF_data), parinfo) + mu_og = pfit_no_shift[1] + mu_og_list.append(mu_og) + compiled_ccf_list.append(CCF_data) + + # Add in reference frame shift + + if removed_planet_rvs[0] != "NULL": + jupiter_shift = removed_planet_rvs[i] + v_rad_raw = rvh[i] + removed_planet_rvs[i] + v_rad_raw_list.append(v_rad_raw) + + # planet removal shift + rv_data_jupiter_shift = rv_data + jupiter_shift # minus sign + f_jup = interp1d(rv_data_jupiter_shift, CCF_data, kind=spline_method, fill_value='extrapolate') + jupiter_shifted_CCF_data = f_jup(rv_data) + jup_shifted_CCF_data_list.append(jupiter_shifted_CCF_data) + compiled_ccf_list.append(jupiter_shifted_CCF_data) + + # fits the shifted by jupiter data + p_shifted_jup = [1., 0.1 + jupiter_shift, 1., 0.5] + pfit_jup, results_jup = mpyfit.fit(least, p_shifted_jup, (rv_data, jupiter_shifted_CCF_data), parinfo) + m = pfit_jup[1] + mu_jup_list.append(m) + + if injected_planet_params[0] != "NULL": + # inject a planet (k=0.3 m/s, p = 365.24d) + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + + bjd_array = np.asarray(day_of_observation) + inj_planet_shift = planet_signal(bjd_array) # km/s + planet_signal_list.append(inj_planet_shift) + rv_data_planet_shift = rv_data + inj_planet_shift + f_planet = interp1d(rv_data_planet_shift, ccf_to_use, kind='cubic', fill_value='extrapolate') + planet_shifted_CCF_data = f_planet(rv_data) + planet_shifted_CCF_data_list.append(planet_shifted_CCF_data) + compiled_ccf_list.append(planet_shifted_CCF_data) + + # fits the shifted by planet data + p_shifted_planet = [1., 0.1 + inj_planet_shift, 1., 0.5] + pfit_planet, results_planet = mpyfit.fit(least, p_shifted_planet, (rv_data, planet_shifted_CCF_data), + parinfo) + + m = pfit_planet[1] + mu_planet_list.append(m) + + if zero_or_median == "zero": + # Shift to zero, after planet shift + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + + shift_to_zero = -(rv_from_HARPS + inj_planet_shift) + rv_data_shifted = rv_data + shift_to_zero + + f = interp1d(rv_data_shifted, ccf_to_use, kind='cubic', fill_value='extrapolate') + shifted_CCF_data = f(rv_data) + shifted_CCF_list.append(shifted_CCF_data) + compiled_ccf_list.append(shifted_CCF_data) + + # fits the shifted data + p_shifted = [1., 0.1 - shift_to_zero, 1., 0.5] + pfit, results = mpyfit.fit(least, p_shifted, (rv_data, shifted_CCF_data), parinfo) + m_zero = pfit[1] + mu_zero_list.append(m_zero) # -0.1) + else: + # Shift to median, after planet shift + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + + shift_to_zero = ((np.mean(rvh) - rv_from_HARPS) - inj_planet_shift) + rv_data_shifted = rv_data + shift_to_zero + + f = interp1d(rv_data_shifted, ccf_to_use, kind='cubic', fill_value='extrapolate') + shifted_CCF_data = f(rv_data) + shifted_CCF_list.append(shifted_CCF_data) + compiled_ccf_list.append(shifted_CCF_data) + + # fits the shifted data + p_shifted = [1., 0.1 - shift_to_zero, 1., 0.5] + pfit, results = mpyfit.fit(least, p_shifted, (rv_data, shifted_CCF_data), parinfo) + m_zero = pfit[1] + mu_zero_list.append(m_zero) # -0.1) + else: + if zero_or_median == "zero": + # Shift to zero + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + + shift_to_zero = -(rv_from_HARPS) + rv_data_shifted = rv_data + shift_to_zero + + f = interp1d(rv_data_shifted, ccf_to_use, kind='cubic', fill_value='extrapolate') + shifted_CCF_data = f(rv_data) + shifted_CCF_list.append(shifted_CCF_data) + compiled_ccf_list.append(shifted_CCF_data) + + # fits the shifted data + p_shifted = [1., 0.1 - shift_to_zero, 1., 0.5] + pfit, results = mpyfit.fit(least, p_shifted, (rv_data, shifted_CCF_data), parinfo) + m_zero = pfit[1] + mu_zero_list.append(m_zero) # -0.1) + else: # shifted to median instead + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + shift_to_median = (np.mean(rvh) - rv_from_HARPS) + rv_data_shifted = rv_data + shift_to_median + + f = interp1d(rv_data_shifted, ccf_to_use, kind='cubic', fill_value='extrapolate') + shifted_CCF_data = f(rv_data) + shifted_CCF_list.append(shifted_CCF_data) + compiled_ccf_list.append(shifted_CCF_data) + + # fits the shifted data + p_shifted = [1., 0.1 - shift_to_median, 1., 0.5] + pfit, results = mpyfit.fit(least, p_shifted, (rv_data, shifted_CCF_data), parinfo) + m_zero = pfit[1] + mu_zero_list.append(m_zero) # -0.1) + ccf_to_use = compiled_ccf_list[len(compiled_ccf_list) - 1] + final_ccf_list.append(ccf_to_use) + + # normalize the CCFs + x_left = ccf_to_use[0:40] + x_right = ccf_to_use[121:161] + x_norm_range = list(x_left) + list(x_right) + CCF_normalized = ccf_to_use * (1 / np.mean(x_norm_range)) + CCF_normalized_list.append(CCF_normalized) + + # Create a dataframe + d = {'BJD': BJD_list, + 'vrad_star': rvh, + 'vrad_plan_star': rvh + planet_signal_list, + 'og_ccf_list': og_ccf_list, + 'jup_shifted_CCF_data_list': jup_shifted_CCF_data_list, + 'planet_shifted_CCF_data_list': planet_shifted_CCF_data_list, + 'zero_shifted_CCF_list': shifted_CCF_list, + 'CCF_normalized_list': CCF_normalized_list, + 'mu_og_list': mu_og_list, + 'mu_jup_list': mu_jup_list, + 'mu_planet_list': mu_planet_list, + 'mu_zero_list': mu_zero_list + } + df = pd.DataFrame(data=d) + + return df + + +if __name__ == '__main__': + master_shifting_planet() diff --git a/exoplanet-ml/rv_net/mu_plotting.py b/exoplanet-ml/rv_net/mu_plotting.py new file mode 100644 index 0000000..9a93608 --- /dev/null +++ b/exoplanet-ml/rv_net/mu_plotting.py @@ -0,0 +1,28 @@ +import numpy as np +import matplotlib.pyplot as plt +import matplotlib as mpl +from astropy.io import fits +from astropy.io.fits import getheader +from scipy.optimize import curve_fit +from scipy.interpolate import interp1d +import mpyfit +from astropy.io import fits +from scipy.signal import argrelextrema +from scipy.optimize import curve_fit +from scipy.interpolate import interp1d +import csv +import pickle +import math as m +import pandas as pd +import os + +def mu_plotting(df, lists, names): + fig, ax = plt.subplots(1, 1, figsize=(8, 4)) + + for i in np.arange(0, len(lists)): + plt.plot(df["BJD"], df[lists[i]], ".", label=names[i]) + plt.legend() + + +if __name__ == '__main__': + mu_plotting() diff --git a/exoplanet-ml/rv_net/pics/CCF_residuals_9_18.gif b/exoplanet-ml/rv_net/pics/CCF_residuals_9_18.gif new file mode 100644 index 0000000..8136afa Binary files /dev/null and b/exoplanet-ml/rv_net/pics/CCF_residuals_9_18.gif differ diff --git a/exoplanet-ml/rv_net/pics/CCF_residuals_H-N.gif b/exoplanet-ml/rv_net/pics/CCF_residuals_H-N.gif new file mode 100644 index 0000000..0208a5b Binary files /dev/null and b/exoplanet-ml/rv_net/pics/CCF_residuals_H-N.gif differ diff --git a/exoplanet-ml/rv_net/pics/Figure4.gif b/exoplanet-ml/rv_net/pics/Figure4.gif new file mode 100644 index 0000000..adc21be Binary files /dev/null and b/exoplanet-ml/rv_net/pics/Figure4.gif differ diff --git a/exoplanet-ml/rv_net/residual_plot.py b/exoplanet-ml/rv_net/residual_plot.py new file mode 100644 index 0000000..147f329 --- /dev/null +++ b/exoplanet-ml/rv_net/residual_plot.py @@ -0,0 +1,52 @@ +import numpy as np +import matplotlib.pyplot as plt +import matplotlib as mpl +from astropy.io import fits +from astropy.io.fits import getheader +from scipy.optimize import curve_fit +from scipy.interpolate import interp1d +import mpyfit +from astropy.io import fits +from scipy.signal import argrelextrema +from scipy.optimize import curve_fit +from scipy.interpolate import interp1d +import csv +import pickle +import math as m +import pandas as pd +import os + + + +def residual_plot(rv_list, ccfs_of_interest, num_ref_ccf, plot_title): + # create color scheme + min_rv = np.min(rv_list) + max_rv = np.max(rv_list) + cscale_residuals = (np.array(rv_list - min_rv) / (max_rv - min_rv)) + print(np.min(cscale_residuals), np.max(cscale_residuals)) + + col = plt.cm.jet([0.25, 0.75]) + n = len(ccfs_of_interest) + colors = plt.cm.bwr(cscale_residuals) + + # Create the residual plot by looping through the list of CCFs ordered by date + fig, ax = plt.subplots(1, 1, figsize=(8, 4)) + num = 0 + for i in np.arange(0, len(ccfs_of_interest)): + if num_ref_ccf == "median": + plt.plot(ccfs_of_interest[i] - np.median(list(ccfs_of_interest), axis=0), color=colors[num]) + else: + if i != num_ref_ccf: + plt.plot(ccfs_of_interest[i] - ccfs_of_interest[num_ref_ccf], color=colors[num]) + num += 1 + + plt.title(plot_title) + # make color bar + cmap = mpl.cm.bwr + norm = mpl.colors.Normalize(vmin=(min_rv - np.median(rv_list)), vmax=(max_rv - np.median(rv_list))) + cb = plt.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap), orientation="vertical", pad=-0.0001) + cb.set_label(label='Stellar Activity Signal (km/s)', size=16, rotation=270, labelpad=20) + + +if __name__ == '__main__': + residual_plot() \ No newline at end of file diff --git a/exoplanet-ml/rv_net/rv_model.py b/exoplanet-ml/rv_net/rv_model.py new file mode 100644 index 0000000..3f4393e --- /dev/null +++ b/exoplanet-ml/rv_net/rv_model.py @@ -0,0 +1,148 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + + +class RvModel(object): + """A TensorFlow model for predicting radial velocities.""" + + def __init__(self, features, hparams, mode): + """Basic setup. + + The actual TensorFlow model is constructed in build(). + + Args: + features: Dictionary containing "ccf_data" and "label". + hparams: A ConfigDict of hyperparameters for building the model. + mode: A tf.estimator.ModeKeys to specify whether the graph should be built + for training, evaluation or prediction. + + Raises: + ValueError: If mode is invalid. + """ + valid_modes = [ + tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL, + tf.estimator.ModeKeys.PREDICT + ] + if mode not in valid_modes: + raise ValueError("Expected mode in {}. Got: {}".format(valid_modes, mode)) + + self.features = features + self.hparams = hparams + self.mode = mode + + self.ccf_data = self.features["ccf_data"] + self.label = self.features["label"] + self.total_loss = None + + def build_network(self): + """Builds the neural network.""" + net = self.ccf_data + + # Reshape [length] -> [length, 1]. + net = tf.expand_dims(net, -1) + + # create summary object + summary = [] + + for i in self.hparams.conv_block_filters: + for _ in range(self.hparams.conv_layers_per_block): + input_shape = net.shape.as_list() + conv_op = tf.keras.layers.Conv1D(filters=i, kernel_size=self.hparams.kernel_size, padding='same', + activation=tf.nn.relu) + net = conv_op(net) + summary.append("Conv1D-{}-{}. Input shape: {}. Output shape: {}".format(self.hparams.kernel_size, i, input_shape, + net.shape.as_list())) + pool_size = 2 + strides = 2 + max_pool = tf.keras.layers.MaxPool1D(pool_size=pool_size, strides=strides) + net = max_pool(net) + summary.append("MaxPool1D-{}. Pool Size: {}. Strides: {}".format(self.hparams.kernel_size, pool_size, strides)) + + for i in self.hparams.final_conv_num_filters: + conv_op = tf.keras.layers.Conv1D(filters=i, kernel_size=self.hparams.kernel_size, padding='same', + activation=tf.nn.relu) + net = conv_op(net) + flatten = tf.keras.layers.Flatten() + net = flatten(net) + + for i in self.hparams.dense_num_layers: + dense = tf.keras.layers.Dense(i, activation=tf.nn.relu) + net = dense(net) + + # output layer + output = tf.keras.layers.Dense(1) + net = tf.squeeze(output(net)) + + self.summary = "\n".join(summary) + self.predicted_rv = net + + def build_losses(self): + """Builds the training losses.""" + self.batch_losses = tf.squared_difference(self.predicted_rv, self.label) + self.total_loss = tf.reduce_mean(self.batch_losses) + + def build(self): + """Creates all ops for training, evaluation or inference.""" + self.global_step = tf.train.get_or_create_global_step() + self.build_network() + if self.mode != tf.estimator.ModeKeys.PREDICT: + self.build_losses() + +class RvLinearModel(object): + """A TensorFlow model for predicting radial velocities.""" + + def __init__(self, features, hparams, mode): + """Basic setup. + + The actual TensorFlow model is constructed in build(). + + Args: + features: Dictionary containing "ccf_data" and "label". + hparams: A ConfigDict of hyperparameters for building the model. + mode: A tf.estimator.ModeKeys to specify whether the graph should be built + for training, evaluation or prediction. + + Raises: + ValueError: If mode is invalid. + """ + valid_modes = [ + tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL, + tf.estimator.ModeKeys.PREDICT + ] + if mode not in valid_modes: + raise ValueError("Expected mode in {}. Got: {}".format(valid_modes, mode)) + + self.features = features + self.hparams = hparams + self.mode = mode + + self.ccf_data = self.features["ccf_data"] + self.label = self.features["label"] + self.total_loss = None + + def build_network(self): + """Builds linear model.""" + # create summary object + + summary = [] + + dense_layer = tf.keras.layers.Dense(1) + summary.append("Dense-{}-{}. Input shape: {}. Output shape: {}".format(self.hparams.kernel_size, 1, 401,1)) + + self.summary = "\n".join(summary) + self.predicted_rv = dense_layer(self.ccf_data) + + def build_losses(self): + """Builds the training losses.""" + self.batch_losses = tf.squared_difference(self.predicted_rv, self.label) + self.total_loss = tf.reduce_mean(self.batch_losses) + + def build(self): + """Creates all ops for training, evaluation or inference.""" + self.global_step = tf.train.get_or_create_global_step() + self.build_network() + if self.mode != tf.estimator.ModeKeys.PREDICT: + self.build_losses() diff --git a/exoplanet-ml/rv_net/train_with_estimator.py b/exoplanet-ml/rv_net/train_with_estimator.py new file mode 100644 index 0000000..466846e --- /dev/null +++ b/exoplanet-ml/rv_net/train_with_estimator.py @@ -0,0 +1,16 @@ +import os.path +import numpy as np +import tensorflow as tf +import matplotlib.pyplot as plt +import git + +# Get the astronet source code +git clone https://github.com/zdebeurs/exoplanet-ml.git + +import sys +sys.path.append("exoplanet-ml/exoplanet-ml/") +from astronet.ops import training +from tf_util import config_util +from tf_util import configdict +from tf_util import estimator_runner +from rv_net import data, rv_model, estimator_util \ No newline at end of file