-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlaughing-nemesis.py
executable file
·93 lines (75 loc) · 3.52 KB
/
laughing-nemesis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
__author__='pier-luc'
__date__ = '2014-07-09'
__version__ = 0.1
import sys
from Modules.OptionParser import OptionParser
if __name__ == "__main__":
parser = OptionParser(sys.argv[1:])
args = parser.getArguments()
######## PREPARE #################
if sys.argv[1] == "prepare":
from FindLastCommonAncester import *
if args['t'] and args['f'] and args['n']:
prepareData(args)
######## LCA ###################
if sys.argv[1] == "lca" and args['d'] and args['c']:
from FindLastCommonAncester import *
if args['r'] is not None and not args['r'] in possible_taxonomic_level:
sys.stderr.write("Bad taxonomic level. Possible choices are:\n %s\n" % possible_taxonomic_level)
sys.exit(0)
sys.stderr.write("Loading tree of life\n")
tree = prepareTreeOfLife()
sys.stderr.write("Tree of life loaded!\n")
sys.stderr.write("Loading genome to taxon converter\n")
converter = prepareGenomeToTaxonConverter()
sys.stderr.write("Genome to taaxon converter loaded\n")
sys.stderr.write("Searching best matches for contigs\n")
contigs = findContigsID(args)
sys.stderr.write("Searching is done!\n")
sys.stderr.write("Searching LCA\n")
contigs = executeLCA(contigs, tree, converter, args["v"])
if "r" in args and args['r'] is not None:
out_by_max_depth(contigs, tree, args["r"])
else:
out_by_contig(contigs)
################ IDENTIFY ###################
from FindContigsIdWithBiologicalAbundance import *
if sys.argv[1] == "identify":
directory = args["d"]
numberOfBestMatch = args["b"]
contigIdentificationsFiles = []
if args["path"] is not None:
contigIdentificationsFiles = readPathsFile(args["path"])
else:
contigIdentificationsFiles = getPathsFromDirectory(directory)
#Second step: read the Contigs.tsv file from _DeNovoAssembly directory.
biologicalAbundanceContigs = {}
biologicalAbundanceContigs = readContigsTSVfile(directory)
#third step: For each ContigsIdentification.tsv file, read each line and put it at the right place.
for files in contigIdentificationsFiles:
try:
readContigIdentificationFiles(files, biologicalAbundanceContigs)
except:
warning("Unable to read: " + files)
#Fourth step: Calculate PL-values.
for contigs in biologicalAbundanceContigs:
biologicalAbundanceContigs[contigs].calculatePLvalues()
#Fifth step: Select only the top ?? identification for each contigs
for contigs in biologicalAbundanceContigs:
if len(biologicalAbundanceContigs[contigs].contigIdentifications) <= 0:
continue
else:
biologicalAbundanceContigs[contigs].selectBestIdentifications(numberOfBestMatch)
#Last step: Write to stdout!
showTSV(biologicalAbundanceContigs)
############ PLOT_single #########################
if sys.argv[1] == "plot":
from PlotBiologicalAbundance import *
data = {}
if args["t"] == "taxonomy":
data = read_taxonomy_file(args["f"], args["m"])
elif args["t"] == "db":
data = read_db_file(args["f"])
else:
raise ValueError("The file type specified is invalid. Must be 'taxonomy' or 'db'")
stacked_bar_plot_single_simple(data, args["value"])