Skip to content

Commit

Permalink
Get training working (#128)
Browse files Browse the repository at this point in the history
* Verbosity flag

* Log accuracy of frequency informed guess

* Fix mypy

* Add type

* Update gitignore

* Add reduce lr on plateau

* Fix mypy

* Make sure best model is loaded for evaluation

* Update batch size for experiments

* Add commands for sweeps

* Update commands

* Update commands filepath

* Add interleave cli command

* UPdate cpu profiling

* Changes to get model working

* Add jsonify dict

* Add reduce lr on plateau

* Fix mypy

* Make sure best model is loaded for evaluation

* Update batch size for experiments

* Add commands for sweeps

* Update commands

* Update commands filepath

* Add interleave cli command

* UPdate cpu profiling

* Changes to get model working

* Add jsonify dict

* Add val metrics

* Mypy and linting

* Linting

* Setup sweep

* Fix sweep

* Update evaluation code

* Run training

* Fix fingerprint indices

* Add assert statement to make sure df and fp are same size

* Make sure train_fraction is taken on the training set only

* Change the frequency informed guess to be teh training set instead of the training and validation set

* Change seed

* Linting
  • Loading branch information
marcosfelt authored Jun 5, 2023
1 parent a22d8e5 commit 99dbf8e
Show file tree
Hide file tree
Showing 14 changed files with 579 additions and 133 deletions.
31 changes: 23 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -312,19 +312,34 @@ with_trust_no_map_train_20:
with_trust_with_map_train_20:
python -m condition_prediction --train_data_path="data/orderly/datasets/orderly_with_trust_with_map_train.parquet" --test_data_path="data/orderly/datasets/orderly_with_trust_with_map_test.parquet" --output_folder_path="models/with_trust_with_map_20" --train_fraction=0.2 --train_val_split=0.8 --overwrite=False --epochs=20 --evaluate_on_test_data=True --early_stopping_patience=5 --wandb_entity=WANDB_ENTITY


# Sweeps
sweep_no_trust_no_map_train:
python -m sweep sweeps/no_trust_no_map_train.yaml --max_parallel 1
TRAIN_FRACS = 0.2 0.4 0.6 0.8 1.0
DATASETS_PATH = /project/studios/orderly-preprocessing/ORDerly/data/orderly/datasets/
DATASETS = no_trust_no_map no_trust_with_map with_trust_no_map with_trust_with_map
dataset_size_sweep:
@for dataset in ${DATASETS}; \
do \
for train_frac in ${TRAIN_FRACS}; \
do \
rm -rf .tf_cache* && python -m condition_prediction --train_data_path=${DATASETS_PATH}/orderly_$${dataset}_train.parquet --test_data_path=${DATASETS_PATH}/orderly_$${dataset}_test.parquet --output_folder_path=models/$${dataset} --train_fraction=$${train_frac} --train_val_split=0.8 --overwrite=True --batch_size=512 --epochs=100 --early_stopping_patience=0 --evaluate_on_test_data=True --wandb_entity="ceb-sre"; \
done \
done


sweep_no_trust_no_map_train_commands:
python -m sweep sweeps/no_trust_no_map_train.yaml --dry_run

sweep_no_trust_with_map_train:
python -m sweep sweeps/no_trust_with_map_train.yaml --max_parallel 1
sweep_no_trust_with_map_train_commands:
python -m sweep sweeps/no_trust_with_map_train.yaml --dry_run

sweep_with_trust_no_map_train:
python -m sweep sweeps/with_trust_no_map_train.yaml --max_parallel 1
sweep_with_trust_no_map_train_commands:
python -m sweep sweeps/with_trust_no_map_train.yaml --dry_run

sweep_with_trust_with_map_train:
python -m sweep sweeps/with_trust_with_map_train.yaml --max_parallel 1
sweep_with_trust_with_map_train_commands:
python -m sweep sweeps/with_trust_with_map_train.yaml --dry_run

sweep_all: sweep_no_trust_no_map_train_commands sweep_no_trust_with_map_train_commands sweep_with_trust_no_map_train_commands sweep_with_trust_with_map_train_commands

train_all: no_trust_no_map_train no_trust_with_map_train with_trust_no_map_train with_trust_with_map_train no_trust_no_map_train_20 no_trust_with_map_train_20 with_trust_no_map_train_20 with_trust_with_map_train_20

Expand Down
49 changes: 38 additions & 11 deletions condition_prediction/condition_prediction/data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ class GenerateData:
mol4: NDArray[np.float32]
mol5: NDArray[np.float32]

# def __post_init__(self):
# initializer = lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)
# self.pool = multiprocessing.Pool(os.cpu_count(), initializer)

def map_idx_to_data(self, idx):
idx = idx.numpy()
if self.product_fp is None and self.rxn_diff_fp is None:
Expand Down Expand Up @@ -192,6 +196,10 @@ def get_dataset(
# Construct outputs
if fp is None and df is None:
raise ValueError("Must provide either df or fp")
elif fp is not None and df is not None and fp.shape[0] != df.shape[0]:
raise ValueError(
f"Fingerprint ({fp.shape}) and dataframe ({df.shape}) not the same size"
)

if fp is not None:
product_fp = fp[:, : fp.shape[1] // 2]
Expand Down Expand Up @@ -244,15 +252,6 @@ def map_func(idx):
# num_parallel_calls=os.cpu_count(), deterministic=False
)

if cache_data:
cache_dir = Path(cache_dir)
if not cache_dir.exists():
cache_dir.mkdir(exist_ok=True)
# # Read through dataset once to cache it
# print("Caching dataset")
# [1 for _ in dataset.as_numpy_iterator()]
dataset = dataset.cache(filename=str(cache_dir / "fps"))

# ensures shape is correct after batching
# See https://github.com/tensorflow/tensorflow/issues/32912#issuecomment-550363802
def _fixup_shape(X, Y):
Expand All @@ -264,6 +263,26 @@ def _fixup_shape(X, Y):

dataset = dataset.map(_fixup_shape)

if cache_data:
cache_dir = Path(cache_dir)
if not cache_dir.exists():
cache_dir.mkdir(exist_ok=True)
# Read through dataset once to cache it
print("Caching dataset")
[1 for _ in dataset.as_numpy_iterator()]
# dataset = dataset.cache(filename=str(cache_dir / "fps"))
dataset = dataset.cache()

if cache_data:
cache_dir = Path(cache_dir)
if not cache_dir.exists():
cache_dir.mkdir(exist_ok=True)
# Read through dataset once to cache it
print("Caching dataset")
[1 for _ in dataset.as_numpy_iterator()]
dataset = dataset.cache(filename=str(cache_dir / "fps"))
# dataset = dataset.cache()

if interleave:
dataset = tf.data.Dataset.range(len(dataset)).interleave(
lambda _: dataset,
Expand All @@ -275,6 +294,7 @@ def _fixup_shape(X, Y):
if prefetch_buffer_size is None:
prefetch_buffer_size = AUTOTUNE
dataset = dataset.prefetch(buffer_size=prefetch_buffer_size)
print("Prefetch buffer size:", prefetch_buffer_size)
return dataset


Expand All @@ -294,6 +314,7 @@ def get_datasets(
cache_train_data: bool = False,
cache_val_data: bool = False,
cache_test_data: bool = False,
interleave: bool = False,
):
"""
Get data generators for train, val and test
Expand Down Expand Up @@ -390,13 +411,15 @@ def get_datasets(
train_mol4,
train_mol5,
df=df.iloc[train_idx],
fp=train_val_fp,
fp=train_val_fp[train_idx] if train_val_fp is not None else None,
mode=train_mode,
fp_size=fp_size,
shuffle=True,
batch_size=batch_size,
shuffle_buffer_size=shuffle_buffer_size,
cache_data=cache_train_data,
prefetch_buffer_size=prefetch_buffer_size,
interleave=interleave,
cache_dir=".tf_cache_train/",
)
val_dataset = get_dataset(
Expand All @@ -406,12 +429,14 @@ def get_datasets(
val_mol4,
val_mol5,
df=df.iloc[val_idx],
fp=train_val_fp,
fp=train_val_fp[val_idx] if train_val_fp is not None else None,
mode=val_mode,
fp_size=fp_size,
shuffle=False,
batch_size=batch_size,
shuffle_buffer_size=shuffle_buffer_size,
prefetch_buffer_size=prefetch_buffer_size,
interleave=interleave,
cache_data=cache_val_data,
cache_dir=".tf_cache_val/",
)
Expand All @@ -428,6 +453,8 @@ def get_datasets(
shuffle=False,
batch_size=batch_size,
shuffle_buffer_size=shuffle_buffer_size,
prefetch_buffer_size=prefetch_buffer_size,
interleave=interleave,
cache_data=cache_test_data,
cache_dir=".tf_cache_test/",
)
Expand Down
Loading

0 comments on commit 99dbf8e

Please sign in to comment.