Skip to content

Commit

Permalink
Updating Midterm 2 solutions
Browse files Browse the repository at this point in the history
  • Loading branch information
nathanielburbank authored Oct 30, 2017
1 parent 10e1d13 commit 03d28ff
Showing 1 changed file with 0 additions and 122 deletions.
122 changes: 0 additions & 122 deletions Midterms/2016 Midterm 2/2016_Midterm_2_solutions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1658,128 +1658,6 @@
"source": [
"Yay! We beat the benchmarks on the observed data and did pretty good on test data!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Autograding function"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"accuracy = lambda y_true, y_pred: (((y_true - y_pred) == 0).sum() * 1.) / len(y_true)\n",
"\n",
"# function to check - did they beat our benchmarks?\n",
"# You can either pass it the flu_predict function the student wrote or the\n",
"# file name of the y-labels.\n",
"#\n",
"# predict: function of type (array -> array)\n",
"# pred_y_file_name: file name of where their predicted y-labels live\n",
"# data_preprocessing: if false x-train data will be fed to flu_predict with no processing if true data will be encoded with 0 filled in for NaN\n",
"# cost: if true computes the expect cost\n",
"# \n",
"# return some string indicating result of comparison with benchmark\n",
"\n",
"def beat_benchmark(flu_predict=None, pred_y_file_name=None, data_preprocessing=False):\n",
" \n",
" acc_bm_0 = .69\n",
" acc_bm_1 = .60\n",
" \n",
" acc_rm_0 = .5\n",
" acc_rm_1 = .5\n",
" \n",
" df_test = pd.read_csv('data/flu_test.csv')\n",
" df_test = df_test[~np.isnan(df_test['flu'])]\n",
" df_test['flutype'] = df_test['flutype'] - 1\n",
" \n",
" y_true = df_test.values[:, -2]\n",
" \n",
" if flu_predict is not None:\n",
" if data_preprocessing: \n",
" encode = preprocessing.LabelEncoder()\n",
" for column in df_test.columns:\n",
" if df_test[column].dtype == np.object:\n",
" df_test.loc[:, column] = encode.fit_transform(df_test[column])\n",
"\n",
" df_test = df_test.fillna(0)\n",
" \n",
" x = df_test.values[:, :-2]\n",
" \n",
" y_pred = flu_predict(x)\n",
" \n",
" elif pred_y_file_name is not None:\n",
" df_y_pred = pd.read_csv(pred_y_file_name)\n",
" y_pred = df_y_pred.values[:, -1]\n",
" \n",
" else:\n",
" return 'params ill-specified'\n",
" \n",
" acc_0 = accuracy(y_true[y_true == 0], y_pred[y_true == 0])\n",
" acc_1 = accuracy(y_true[y_true == 1], y_pred[y_true == 1])\n",
" \n",
" if acc_0 > acc_rm_0 and acc_1 > acc_rm_1:\n",
" print ('accuracies: {}, {}'.format(acc_0, acc_1))\n",
" return \"accuracy: beats all benchmarks :)\"\n",
" elif acc_0 > acc_bm_0 and acc_1 > acc_bm_1:\n",
" print ('accuracies: {}, {}'.format(acc_0, acc_1))\n",
" return \"accuracy: beats only baseline models :/\"\n",
" else:\n",
" print ('accuracies: {}, {}'.format(acc_0, acc_1))\n",
" return \"accuracy: beats no benchmarks :(\""
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"accuracies: 1.0, 0.0\n",
"accuracy: beats no benchmarks :(\n"
]
}
],
"source": [
"# Example 0: using the autograding function\n",
"\n",
"def flu_predict(x):\n",
" return np.array([0] * len(x))\n",
"\n",
"print(beat_benchmark(flu_predict, data_preprocessing=False))"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"accuracies: 0.7191558441558441, 0.6112956810631229\n",
"accuracy: beats all benchmarks :)\n"
]
}
],
"source": [
"# Example 1: using the autograding function\n",
"print(beat_benchmark(pred_y_file_name='data/Example_1.csv'))"
]
}
],
"metadata": {
Expand Down

0 comments on commit 03d28ff

Please sign in to comment.