-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_HPOFunc.py
299 lines (239 loc) · 14.4 KB
/
test_HPOFunc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
import pandas as pd
import re
import numpy as np
import itertools
import math
from pyhpo.ontology import Ontology
import HPOFunc
NullList=["none", "none documented", "nil", "(borderline)", "no concerns"]
def test_check_and_set_nan():
for item in NullList:
output = HPOFunc.check_and_set_nan(item, NullList)
expect = ""
assert output == expect, "Expected: " + expect + " Got: " + output
test2 = "Something else"
output2 = HPOFunc.check_and_set_nan(test2, NullList)
expect2 = "Something else"
assert output2 == expect2, "Expected: " + expect2 + " Got: " + output2
print("Testing check_and_set_nan")
test_check_and_set_nan()
def test_drop_leading_hp():
test1 = "HP:0000001"
test2 = "0000001"
test3 = "HP0000001"
output1 = HPOFunc.drop_leading_hp(test1)
output2 = HPOFunc.drop_leading_hp(test2)
output3 = HPOFunc.drop_leading_hp(test3)
expect1 = ":0000001"
expect2 = "0000001"
expect3 = "0000001"
assert output1 == expect1, "Expected: " + expect1 + " Got: " + output1
assert output2 == expect2, "Expected: " + expect2 + " Got: " + output2
assert output3 == expect3, "Expected: " + expect3 + " Got: " + output3
print("Testing drop_leading_hp")
test_drop_leading_hp()
def test_HPOSorter():
test1 = "None"
test2 = "Allergy"
test3 = "HP:0012393"
test4 = "Allergy, HP:0500093"
test5 = "HP:0500093 Food allergy Nystagmus HP:0000639"
test6 = "Weird edge case"
output1 = HPOFunc.HPOSorter(test1)
output2 = HPOFunc.HPOSorter(test2)
output3 = HPOFunc.HPOSorter(test3)
output4 = HPOFunc.HPOSorter(test4)
output5 = HPOFunc.HPOSorter(test5)
output6 = HPOFunc.HPOSorter(test6)
# Outputs look like numeric (no HP) values and non numeric values
# should be strings inside lists, as a tuple
expect1 = ([], [])
expect2 = ([], ["Allergy"])
expect3 = (['0012393'], [])
expect4 = (["0500093"], ['Allergy'])
expect5 = (["0500093", "0000639"], ['Food allergy Nystagmus '])
expect6 = ([], ['Weird edge case'])
assert output1 == expect1, "Expected: " + str(expect1) + " Got: " + str(output1)
assert output2 == expect2, "Expected: " + str(expect2) + " Got: " + str(output2)
assert output3 == expect3, "Expected: " + str(expect3) + " Got: " + str(output3)
assert output4 == expect4, "Expected: " + str(expect4) + " Got: " + str(output4)
assert output5 == expect5, "Expected: " + str(expect5) + " Got: " + str(output5)
assert output6 == expect6, "Expected: " + str(expect6) + " Got: " + str(output6)
print("Testing HPOSorter")
test_HPOSorter()
print("Loading Ontology")
ontology=Ontology()
def test_get_hpo_or_error():
# Need to redo these tests
# Want a standard HPO code or two
# Want viable outputs for Process_Type = "None", "Non_numeric" and "Numeric"
# Also want a test case for if I put the wrong process type in
# Then I want to try lots of different HPO codes, not going to call from the ontology for the expected
# Running stuff without errors
test_case_easy1 = "HP:0001166"
test_case_easy2 = "Arachnodactyly"
expected_easy = "HP:0001166 | Arachnodactyly"
output_easy1 = HPOFunc.get_hpo_or_error(test_case_easy1, "Numeric")
output_easy1a = HPOFunc.get_hpo_or_error(test_case_easy1, "None")
output_easy2 = HPOFunc.get_hpo_or_error(test_case_easy2, "Non_numeric")
output_easy2a = HPOFunc.get_hpo_or_error(test_case_easy2, "None")
assert str(output_easy1) == expected_easy, "Expected:" + expected_easy + "Got: " + output_easy1
assert str(output_easy1a) == expected_easy, "Expected:" + expected_easy + "Got: " + output_easy1a
assert str(output_easy2) == expected_easy, "Expected:" + expected_easy + "Got: " + output_easy2
assert str(output_easy2a) == expected_easy, "Expected:" + expected_easy + "Got: " + output_easy2a
# Next to just check if I give it a nonsense process type it won't return something
nonsense_test = HPOFunc.get_hpo_or_error(test_case_easy1, "supercalifragilisticexpelidocious")
assert nonsense_test is None, "Expected: None Got: " + str(nonsense_test)
# Now to test how it handles errors
test_none = "Not a HPO code"
output_none = HPOFunc.get_hpo_or_error(test_none, "None")
assert output_none == "", "Expected: '' Got: " + output_none
test_none_num = "123456789"
output_numeric = HPOFunc.get_hpo_or_error(test_none_num, "Numeric")
assert output_numeric == f"Error: HP:{test_none_num}", "Expected: '' Got: " + output_numeric
test_lower = "arachnodactyly"
output_lower = HPOFunc.get_hpo_or_error(test_lower, "Non_numeric")
assert str(output_lower) == expected_easy, "Expected:" + expected_easy + "Got: " + str(output_lower)
# arguably non-numeric could be tested more thoroughly but happy with this for now
print("Testing get_hpo_or_error")
test_get_hpo_or_error()
def test_list_to_csv():
test1 = ["HP:0000001", "HP:0000002", "HP:0000003"]
output1 = HPOFunc.list_to_csv(test1)
expect1 = "HP:0000001; HP:0000002; HP:0000003"
assert output1 == expect1, "Expected: " + expect1 + " Got: " + output1
print("Testing list_to_csv")
test_list_to_csv()
def test_HPOOutPutter():
# Need quite a few test cases here
# Test case 1: No numeric values, no non-numeric values
num_test1 = []
non_num_test1 = []
output1a, output1b, prob1 = HPOFunc.HPOOutPutter(num_test1, non_num_test1)
assert output1a == [], "Expected: '[]' Got: " + output1a
assert output1b == [], "Expected:" + '[]' + "Got: " + output1b
assert prob1 == "", "Expected:" + "" + "Got: " + str(prob2)
# Test case 2a: No numeric values, some non-numeric values
num_test2 = ["0000077", "0000890"]
non_num_test2 = []
output2a, output2b, prob2 = HPOFunc.HPOOutPutter(num_test2, non_num_test2)
expected2 = ["HP:0000077 | Abnormality of the kidney", "HP:0000890 | Long clavicles"]
assert output2a == expected2, "Expected: " + str(expected2) + " Got: " + str(output2a)
assert output2b == [], "Expected:" + '[]' + "Got: " + str(output2b)
assert prob2 == "", "Expected:" + "" + "Got: " + str(prob2)
#Test 2b: No numeric values, some non-numeric values, some error non-numeric errors
num_test2 = ["0000077", "0000890"]
non_num_test2 = ["Pizza"]
output2a, output2b, prob2 = HPOFunc.HPOOutPutter(num_test2, non_num_test2)
expected2 = ["HP:0000077 | Abnormality of the kidney", "HP:0000890 | Long clavicles"]
assert output2a == expected2, "Expected: " + str(expected2) + " Got: " + str(output2a)
assert output2b == [], "Expected:" + '[]' + "Got: " + str(output2b)
assert prob2 == "Error: Pizza", "Expected:" + "" + "Got: " + str(prob2)
# Test case 3a: Some numeric values, no non-numeric values
num_test3 = []
non_num_test3 = ["Long clavicles", "Abnormality of the kidney"]
output3a, output3b, prob3 = HPOFunc.HPOOutPutter(num_test3, non_num_test3)
expected3 = ["HP:0000890 | Long clavicles", "HP:0000077 | Abnormality of the kidney"]
assert output3a == [], "Expected: '[]' Got: " + str(output3a)
assert output3b == expected3, "Expected:" + str(expected3) + "Got: " + str(output3b)
assert prob3 == "", "Expected:" + "" + "Got: " + str(prob3)
# Test case 3b: Some numeric values, some non-numeric values, some error numeric errors
num_test3 = ["1218476100"]
non_num_test3 = ["Long clavicles", "Abnormality of the kidney"]
output3a, output3b, prob3 = HPOFunc.HPOOutPutter(num_test3, non_num_test3)
expected3 = ["HP:0000890 | Long clavicles", "HP:0000077 | Abnormality of the kidney"]
assert output3a == [], "Expected: '[]' Got: " + str(output3a)
assert output3b == expected3, "Expected:" + str(expected3) + "Got: " + str(output3b)
assert prob3 == "Error: HP:1218476100", "Expected:" + "" + "Got: " + str(prob3)
# Test case 4: Some numeric values, some non-numeric values
num_test4 = ["0002401", "0000717"]
non_num_test4 = ["Stroke-like episode", "Increased vertebral height"]
expected_num4 = ["HP:0002401 | Stroke-like episode", "HP:0000717 | Autism"]
expected_non_num4 = ["HP:0002401 | Stroke-like episode", "HP:0004570 | Increased vertebral height"]
output4a, output4b, prob4 = HPOFunc.HPOOutPutter(num_test4, non_num_test4)
assert output4a == expected_num4, "Expected: " + str(expected_num4) + " Got: " + str(output4a)
assert output4b == expected_non_num4, "Expected:" + str(expected_non_num4) + "Got: " + str(output4b)
assert prob4 == "", "Expected:" + "" + "Got: " + str(prob4)
# Test case 5: Some numeric values, some non-numeric values, some errors
num_test5 = ["0002401", "0000717", "123456"]
non_num_test5 = ["Stroke-like episode", "Increased vertebral height", "Pizza"]
expected_num5 = ["HP:0002401 | Stroke-like episode", "HP:0000717 | Autism"]
expected_non_num5 = ["HP:0002401 | Stroke-like episode", "HP:0004570 | Increased vertebral height"]
expected_prob5 = "Error: HP:123456; Error: Pizza"
output5a, output5b, prob5 = HPOFunc.HPOOutPutter(num_test5, non_num_test5)
assert output5a == expected_num5, "Expected: " + str(expected_num5) + " Got: " + str(output5a)
assert output5b == expected_non_num5, "Expected:" + str(expected_non_num5) + "Got: " + str(output5b)
assert prob5 == expected_prob5, "Expected:" + expected_prob5 + "Got: " + str(expected_prob5)
print("Testing HPOOutPutter")
test_HPOOutPutter()
def test_HPOSquisher():
# Fairly routine? This function isn't designed to deal with errors, just to squash things together
# Test case 1: No numeric values, no non-numeric values
num_test1 = []
non_num_test1 = []
output1 = HPOFunc.HPOSquisher(num_test1, non_num_test1)
expect1 = ""
assert output1 == expect1, "Expected: " + expect1 + " Got: " + output1
# Test case 2: No numeric values, some non-numeric values
num_test2 = []
non_num_test2 = ["HP:0012170 | Nail-biting", "HP:0007302 | Bipolar affective disorder"]
output2 = HPOFunc.HPOSquisher(num_test2, non_num_test2)
expect2 = "HP:0007302 | Bipolar affective disorder; HP:0012170 | Nail-biting"
assert str(output2) == expect2, "Expected: " + expect2 + " Got: " + str(output2)
# Test case 3: Some numeric values, no non-numeric values
num_test3 = ["HP:0007302 | Bipolar affective disorder", "HP:0012170 | Nail-biting"]
non_num_test3 = []
output3 = HPOFunc.HPOSquisher(num_test3, non_num_test3)
expect3 = "HP:0007302 | Bipolar affective disorder; HP:0012170 | Nail-biting"
assert str(output3) == expect3, "Expected: " + str(expect3) + " Got: " + str(output3)
# Test case 4: Some numeric values, some non-numeric values, make sure there's intercepts
num_test4 = ["HP:0007302 | Bipolar affective disorder", "HP:0012170 | Nail-biting"]
non_num_test4 = ["HP:0002401 | Stroke-like episode", "HP:0007302 | Bipolar affective disorder"]
output4 = HPOFunc.HPOSquisher(num_test4, non_num_test4)
expect4 = "HP:0002401 | Stroke-like episode; HP:0007302 | Bipolar affective disorder; HP:0012170 | Nail-biting"
assert str(output4) == expect4, "Expected: " + str(expect4) + " Got: " + str(output4)
print("Testing HPOSquisher")
test_HPOSquisher()
def test_process_column():
# So I want to test this on cell entries but then I want to test it on a whole column of a df
# Test case 1: Single cell entry, no numeric values, no non-numeric values
test1 = pd.NA
termlist1, problems1 = HPOFunc.process_column(test1)
expect1 = ("", "")
assert termlist1 == expect1[0], "Expected: " + str(expect1[0]) + " Got: " + str(termlist1)
assert problems1 == expect1[1], "Expected: " + str(expect1[1]) + " Got: " + str(problems1)
# Test case 2: Single cell entry, no numeric values, some non-numeric values
test2 = "Nail-biting, Bipolar affective disorder"
termlist2, problems2 = HPOFunc.process_column(test2)
expect2 = ("HP:0007302 | Bipolar affective disorder; HP:0012170 | Nail-biting", "")
assert termlist2 == expect2[0], "Expected: " + str(expect2[0]) + " Got: " + str(termlist2)
assert problems2 == expect2[1], "Expected: " + str(expect2[1]) + " Got: " + str(problems2)
# Test case 3: Single cell entry, some numeric values, no non-numeric values
test3 = "HP:0007302, HP:0012170"
termlist3, problems3 = HPOFunc.process_column(test3)
expect3 = ("HP:0007302 | Bipolar affective disorder; HP:0012170 | Nail-biting", "")
assert termlist3 == expect3[0], "Expected: " + str(expect3[0]) + " Got: " + str(termlist3)
assert problems3 == expect3[1], "Expected: " + str(expect3[1]) + " Got: " + str(problems3)
# Test case 4: Single cell entry, some numeric values, some non-numeric values
test4 = "HP:0007302, Nail-biting"
termlist4, problems4 = HPOFunc.process_column(test4)
expect4 = ("HP:0007302 | Bipolar affective disorder; HP:0012170 | Nail-biting", "")
assert termlist4 == expect4[0], "Expected: " + str(expect4[0]) + " Got: " + str(termlist4)
assert problems4 == expect4[1], "Expected: " + str(expect4[1]) + " Got: " + str(problems4)
# Test case 5: Single cell entry, some numeric values, some non-numeric values, some errors
test5 = "HP:0007302 | Bipolar affective disorder, Nail-biting, Pizza"
termlist5, problems5 = HPOFunc.process_column(test5)
expect5 = ("HP:0007302 | Bipolar affective disorder; HP:0012170 | Nail-biting", "Error: Pizza")
assert termlist5 == expect5[0], "Expected: " + str(expect5[0]) + " Got: " + str(termlist5)
assert problems5 == expect5[1], "Expected: " + str(expect5[1]) + " Got: " + str(problems5)
# Test case 6: Whole column of a dataframe and populate it with entries from previous tests
df = pd.DataFrame({"Test": [test1, test2, test3, test4, test5],
"Expected": [expect1[0], expect2[0], expect3[0], expect4[0], expect5[0]],
"Problems": [expect1[1], expect2[1], expect3[1], expect4[1], expect5[1]],
})
df["Output"], df["Output_Problems"] = zip(*df["Test"].map(HPOFunc.process_column))
assert df["Output"].equals(df["Expected"]), "Expected: " + str(df["Expected"]) + " Got: " + str(df["Output"])
print("Testing process_column")
test_process_column()
# Should probably write some tests for the scoring functions but uh, in the mean time, good job me
print("All your tests have passed! You are a super star!")