forked from cmap/merino
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_parse_data.py
114 lines (84 loc) · 3.9 KB
/
test_parse_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import setup_logger
import logging
import unittest
import ConfigParser
import parse_data as pd
logger = logging.getLogger(setup_logger.LOGGER_NAME)
test_file = "functional_tests/test_prism_metadata/prism_cell_tsv.txt"
_prism_cell_config_file_section = "PrismCell column headers"
_perturbagen_CM_input_config_file_section = "Perturbagen CM input column headers"
class Dummy(object):
def __repr__(self):
return " ".join(["{}:{}".format(str(k),str(v)) for (k,v) in self.__dict__.items()])
def __str__(self):
return self.__repr__()
class TestParseData(unittest.TestCase):
def test_read_data(self):
(h, d) = pd.read_data(test_file)
assert h is not None
logger.debug("h: {}".format(h))
assert "pool_id" in h
assert d is not None
logger.debug("d: {}".format(d))
assert len(d) > 0
def test_generate_header_map(self):
#happy path ignore extra field
headers = ["pool_id", "analyte", "strippedname", "extra_header"]
cp = ConfigParser.RawConfigParser()
cp.read("prism_pipeline.cfg")
internal_header_file_header_pairs = cp.items(_prism_cell_config_file_section)
r = pd.generate_header_map(headers, internal_header_file_header_pairs, False)
logger.debug("r: {}".format(r))
assert len(r) == 3, len(r)
assert "extra_header" not in r, r
assert "pool_id" in r, r
assert r["pool_id"] == 0, r["pool_id"]
#happy path include extra field
r = pd.generate_header_map(headers, internal_header_file_header_pairs, True)
logger.debug("r: {}".format(r))
assert len(r) == 4, len(r)
assert "extra_header" in r
assert r["extra_header"] == 3, r["extra_header"]
def test__parse_data(self):
headers = ["pool_id", "analyte", "strippedname"]
cp = ConfigParser.RawConfigParser()
cp.read("prism_pipeline.cfg")
header_map = pd.generate_header_map(headers, cp.items(_prism_cell_config_file_section), False)
data = [["1", "analyte 2", "my cell's name"], ["3", "analyte 5", "autre cell nom"]]
r = pd.parse_data(header_map, data, Dummy)
logger.debug("r: {}".format(r))
assert len(r) == len(data), len(r)
header_map["extra header that doesn't have data in any row"] = 10
r = pd.parse_data(header_map, data, Dummy)
logger.debug("r: {}".format(r))
assert len(r) == len(data), len(r)
data.append(["7", "", "blah"])
r = pd.parse_data(header_map, data, Dummy)
assert r[2].analyte_id is None
headers = ["well_position", "compound_well_mmoles_per_liter", "dilution_factor"]
cp = ConfigParser.RawConfigParser()
cp.read("prism_pipeline.cfg")
header_map = pd.generate_header_map(headers, cp.items(_perturbagen_CM_input_config_file_section), False)
data = [["A01", "1.010101", "2"], ["B07", "3.030303", "5"]]
r = pd.parse_data(header_map, data, Dummy)
logger.debug("r: {}".format(r))
assert len(r) == len(data), len(r)
assert hasattr(r[0], "compound_well_mmoles_per_liter"), r[0].__dict__
assert isinstance(r[0].compound_well_mmoles_per_liter, float)
assert r[0].compound_well_mmoles_per_liter == 1.010101, r[0].compound_well_mmoles_per_liter
assert isinstance(r[0].dilution_factor, int)
assert r[0].dilution_factor == 2, r[0].dilution_factor
assert isinstance(r[1].compound_well_mmoles_per_liter, float)
assert isinstance(r[1].dilution_factor, int)
def test__parse_raw_value(self):
r = pd.parse_raw_value("")
assert r is None
r = pd.parse_raw_value("6")
assert r == 6, r
r = pd.parse_raw_value("6.1")
assert r == 6.1, r
r = pd.parse_raw_value("hello world")
assert r == "hello world", r
if __name__ == "__main__":
setup_logger.setup(verbose=True)
unittest.main()