forked from cmap/merino
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathassemble_no_davepool.py
62 lines (46 loc) · 3.02 KB
/
assemble_no_davepool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import argparse
import merino
import prism_metadata
import assemble
import setup_logger
import logging
import davepool_data
import sys
import cmapPy.pandasGEXpress.write_gct as write_gct
logger = logging.getLogger(setup_logger.LOGGER_NAME)
def build_parser():
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-verbose", '-v', help="Whether to print a bunch of output", action="store_true", default=False)
parser.add_argument("-config_filepath", help="path to the location of the configuration file", type=str,
default=merino.default_config_filepath)
parser.add_argument("prism_replicate_name", help="name of the prism replicate that is being processed", type=str)
parser.add_argument("plate_map_path", help="path to file containing plate map describing perturbagens used", type=str)
parser.add_argument("csv_filepath", help="path to csv file containing data", type=str)
parser.add_argument("-plate_map_type", "-pmt", help="type of the plate map", choices=prism_metadata.plate_map_types,
default=prism_metadata.plate_map_type_CM)
parser.add_argument("-cell_set_definition_file", "-csdf",
help="file containing cell set definition to use, overriding config file", type=str, default=None)
return parser
def main(args):
#read actual data from relevant csv files, associate it with davepool ID
my_davepool = davepool_data.read_data(args.csv_filepath)
#read PRISM cell line metadata from file specified in config file, and associate with assay_plate metadata
prism_cell_list = prism_metadata.read_prism_cell_from_file(args.config_filepath, args.cell_set_definition_file, 'cell_set_definition')
logger.info("len(prism_cell_list): {}".format(len(prism_cell_list)))
#read in all the perturbagens but restrict to those that were on the provided assay_plates
perturbagen_list = prism_metadata.build_perturbagens_from_file(args.plate_map_path, args.plate_map_type,
args.config_filepath)
logger.info("len(perturbagen_list): {}".format(len(perturbagen_list)))
(median_data_by_cell, count_data_by_cell) = assemble.build_data_by_cell(prism_cell_list, my_davepool)
median_gctoo = assemble.build_gctoo(args.prism_replicate_name, perturbagen_list, median_data_by_cell)
write_gct.write(median_gctoo, args.prism_replicate_name + "_MEDIAN.gct", data_null=assemble._NaN,
filler_null=assemble._null)
count_gctoo = assemble.build_gctoo(args.prism_replicate_name, perturbagen_list, count_data_by_cell)
write_gct.write(count_gctoo, args.prism_replicate_name + "_COUNT.gct", data_null=assemble._NaN,
filler_null=assemble._null)
return (median_gctoo, count_gctoo)
if __name__ == "__main__":
args = build_parser().parse_args(sys.argv[1:])
setup_logger.setup(verbose=args.verbose)
logger.debug("args: {}".format(args))
main(args)