-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_experiment.py
76 lines (60 loc) · 2.69 KB
/
run_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from dataset import cMRIDataset
from transforms import get_train_transforms, get_val_test_transforms, calculate_stats
from train_and_evaluate import train_and_evaluate
# Model parameters
MODEL_ARCHITECTURE = 'resnet50'
LOSS_FUNCTION = 'huber'
OPTIMIZER = 'adamw'
LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.001
BATCH_SIZE = 16
EPOCHS = 100
def prepare_data(df, val_size=50, test_size=50):
train_val, test = train_test_split(df, test_size=test_size, random_state=48)
train, val = train_test_split(train_val, test_size=val_size, random_state=48)
return train, val, test
if __name__ == "__main__":
# Load data
df = pd.read_csv("data/labels.csv")[['idx', 'sphericity_index']]
# Prepare the data
train_pool, val, test = prepare_data(df, val_size=50, test_size=50)
train_pool['split'] = 'train'
val['split'] = 'val'
test['split'] = 'test'
# Define the training sizes to try
train_sizes = [350] # Modify as needed
# Train and evaluate for each training size
results = []
for size in train_sizes:
# Subsample the training data
train_subset = train_pool.sample(n=size, random_state=24)
# Combine subsampled train data with val and test data
df_subset = pd.concat([train_subset, val, test])
# Set up data for calculating mean and std
raw_train_dataset = cMRIDataset(df_subset, split='train', transform=None)
mean, std = calculate_stats(raw_train_dataset)
print(f"Calculated mean: {mean}, std: {std}")
# Create datasets with appropriate transforms
train_dataset = cMRIDataset(df_subset, split='train', transform=get_train_transforms(mean, std))
val_dataset = cMRIDataset(df_subset, split='val', transform=get_val_test_transforms(mean, std))
test_dataset = cMRIDataset(df_subset, split='test', transform=get_val_test_transforms(mean, std))
datasets = {
'train': train_dataset,
'val': val_dataset,
'test': test_dataset
}
# Call the train_and_evaluate function
result = train_and_evaluate(df_subset,
MODEL_ARCHITECTURE, LOSS_FUNCTION, OPTIMIZER,
LEARNING_RATE, WEIGHT_DECAY, BATCH_SIZE, EPOCHS)
# Add train size to the result dictionary
result['train_size'] = size
results.append(result)
print(f"Training Size: {size}, R2 Score: {result['r2']:.4f}")
# Save all results to CSV
results_df = pd.DataFrame(results)
results_df.to_csv('train_size_experiment_results.csv', index=False)