-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprediction_stat.py
39 lines (30 loc) · 1.24 KB
/
prediction_stat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import re
from Bio import SeqIO
from ass_jobs_stat import get_time_from_log
import busco_resolver
def get_gene_num(pred_aa):
seq_dic = SeqIO.to_dict(SeqIO.parse(pred_aa, 'fasta'))
return len(seq_dic)
def get_interproscan_item_num(ips_tsv):
line_num = 0
with open(ips_tsv, 'r') as f:
line_num = len(f.readlines())
return line_num
def get_running_time(braker_log):
return get_time_from_log(braker_log)
# get running time from snakemake log, deprecated
#
# def get_running_time(snakemake_dic : dict, ass, rule) -> int:
# # {jobid : (rule, input_ass, start_date_parse, finish_date_parse)}
# for job, info in snakemake_dic.items():
# if ass in info[1] and info[0] == rule:
# return str((info[3] - info[2]).total_seconds())
def get_matches_num():
pass
def main():
# snakemake_dic = log2dic(r'D:\new_ncbi_dataset\result\sel_Haptophyta_2830\.snakemake\log\2022-03-03T231159.055539.snakemake.log')
# get_running_time(snakemake_dic, 'GCA_019693415.1', 'braker2')
res_lis = busco_resolver(r'D:\new_ncbi_dataset\tmp_code\sel_Rhodophyta\short_summary.specific.chlorophyta_odb10.GCA_001275005.1_augustus.ab_initio.aa.busco.txt')
print(get_busco_count(res_lis))
if __name__ == '__main__':
main()