-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathteam_code.py
148 lines (114 loc) · 5.08 KB
/
team_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python
# Edit this script to add your team's code. Some functions are *required*, but you can edit most parts of the required functions,
# change or remove non-required functions, and add your own functions.
################################################################################
#
# Optional libraries, functions, and variables. You can change or remove them.
#
################################################################################
import joblib
import numpy as np
import os
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import sys
from helper_code import *
################################################################################
#
# Required functions. Edit these functions to add your code, but do not change the arguments for the functions.
#
################################################################################
# Train your models. This function is *required*. You should edit this function to add your code, but do *not* change the arguments
# of this function. If you do not train one of the models, then you can return None for the model.
# Train your model.
def train_model(data_folder, model_folder, verbose):
# Find the data files.
if verbose:
print('Finding the Challenge data...')
records = find_records(data_folder)
num_records = len(records)
if num_records == 0:
raise FileNotFoundError('No data were provided.')
# Extract the features and labels from the data.
if verbose:
print('Extracting features and labels from the data...')
features = np.zeros((num_records, 6), dtype=np.float64)
labels = np.zeros(num_records, dtype=bool)
# Iterate over the records.
for i in range(num_records):
if verbose:
width = len(str(num_records))
print(f'- {i+1:>{width}}/{num_records}: {records[i]}...')
record = os.path.join(data_folder, records[i])
features[i] = extract_features(record)
labels[i] = load_label(record)
# Train the models.
if verbose:
print('Training the model on the data...')
# This very simple model trains a random forest model with very simple features.
# Define the parameters for the random forest classifier and regressor.
n_estimators = 12 # Number of trees in the forest.
max_leaf_nodes = 34 # Maximum number of leaf nodes in each tree.
random_state = 56 # Random state; set for reproducibility.
# Fit the model.
model = RandomForestClassifier(
n_estimators=n_estimators, max_leaf_nodes=max_leaf_nodes, random_state=random_state).fit(features, labels)
# Create a folder for the model if it does not already exist.
os.makedirs(model_folder, exist_ok=True)
# Save the model.
save_model(model_folder, model)
if verbose:
print('Done.')
print()
# Load your trained models. This function is *required*. You should edit this function to add your code, but do *not* change the
# arguments of this function. If you do not train one of the models, then you can return None for the model.
def load_model(model_folder, verbose):
model_filename = os.path.join(model_folder, 'model.sav')
model = joblib.load(model_filename)
return model
# Run your trained model. This function is *required*. You should edit this function to add your code, but do *not* change the
# arguments of this function.
def run_model(record, model, verbose):
# Load the model.
model = model['model']
# Extract the features.
features = extract_features(record)
features = features.reshape(1, -1)
# Get the model outputs.
binary_output = model.predict(features)[0]
probability_output = model.predict_proba(features)[0][1]
return binary_output, probability_output
################################################################################
#
# Optional functions. You can change or remove these functions and/or add new functions.
#
################################################################################
# Extract your features.
def extract_features(record):
header = load_header(record)
age = get_age(header)
sex = get_sex(header)
one_hot_encoding_sex = np.zeros(3, dtype=bool)
if sex == 'Female':
one_hot_encoding_sex[0] = 1
elif sex == 'Male':
one_hot_encoding_sex[1] = 1
else:
one_hot_encoding_sex[2] = 1
signal, fields = load_signals(record)
# TO-DO: Update to compute per-lead features. Check lead order and update and use functions for reordering leads as needed.
num_finite_samples = np.size(np.isfinite(signal))
if num_finite_samples > 0:
signal_mean = np.nanmean(signal)
else:
signal_mean = 0.0
if num_finite_samples > 1:
signal_std = np.nanstd(signal)
else:
signal_std = 0.0
features = np.concatenate(([age], one_hot_encoding_sex, [signal_mean, signal_std]))
return np.asarray(features, dtype=np.float32)
# Save your trained model.
def save_model(model_folder, model):
d = {'model': model}
filename = os.path.join(model_folder, 'model.sav')
joblib.dump(d, filename, protocol=0)