-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathselect_ass_to_lineage.py
39 lines (31 loc) · 1.29 KB
/
select_ass_to_lineage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
from ncbi_datasets_resolver import resolve_ass_json
from ncbi_datasets_resolver import get_ass_list_from_json
import shutil
# genomes_store or genomes_rec
store_top = r'/mnt/d/new_ncbi_dataset/genomes_store'
rec_top = r'/mnt/d/new_ncbi_dataset/genomes_rec_part'
# lineage_taxid/ncbi_dataset/assembly_data_report.jsonl
lineage_lis = os.listdir(store_top)
# create lineage-ass map
line_ass_dic = {}
for lineage in lineage_lis:
ass_json_lis = resolve_ass_json(os.path.join(store_top, lineage, 'ncbi_dataset', 'data', 'assembly_data_report.jsonl'))
ass_list = get_ass_list_from_json(ass_json_lis)
line_ass_dic[lineage] = ass_list
def parse_full_lineage(part):
part = str(part)
for i in line_ass_dic.keys():
if part in i:
return i
def select_lineage(taxid):
select_ass_lis = line_ass_dic[parse_full_lineage(taxid)]
dir_name = 'sel_' + str(parse_full_lineage(taxid))
print(dir_name + ' ' + str(len(select_ass_lis)))
print(select_ass_lis)
os.makedirs(os.path.join(rec_top, dir_name))
for ass in select_ass_lis:
source_path = os.path.join(r'/mnt/d/new_ncbi_dataset/genomes_rec', parse_full_lineage(taxid), ass)
dis_path = os.path.join(rec_top, dir_name, ass)
shutil.copytree(source_path, dis_path)
select_lineage(2870)