-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathensembl_connector.py
123 lines (97 loc) · 3.07 KB
/
ensembl_connector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
## code and functions for
## calling Ensembl APIs
# %% IMPORTS
import requests, json
import sys
# %% ENSEMBL API
scheme = 'https'
host = 'rest.ensembl.org'
ENSE_API = '{}://{}'.format(scheme, host)
# %% FUNCTIONS
def get_seq(id, flank, ENSE_API, endpoint = 'sequence'):
'''
https://rest.ensembl.org/documentation/info/sequence_id
'''
ENSE_SEQ_QUERY = ENSE_API + f"/sequence/{endpoint}/id/{id}"
print(f"\nQuery:{ENSE_SEQ_QUERY}")
r = requests.get(ENSE_SEQ_QUERY, headers={ "Content-Type" : "text/plain"})
if not r.ok:
r.raise_for_status()
sys.exit()
else:
seq = r.text
print(f"ID:{id}")
print(f"Seq snippet:{seq[:100]}")
return seq
def ense_info(id, ENSE_API, endpoint = 'lookup'):
'''
gets information for given id
'''
ENSE_ID_QUERY = ENSE_API + f"/{endpoint}/id/{id}"
print(f"\nQuery:{ENSE_ID_QUERY}")
r = requests.get(ENSE_ID_QUERY, headers={ "Content-Type" : "application/json"})
if not r.ok:
r.raise_for_status()
sys.exit()
else:
decoded = r.json()
print(repr(decoded))
return decoded
def check_assembly(id, ENSE_API, endpoint = 'lookup'):
'''
https://rest.ensembl.org/documentation/info/assembly_info
'''
ENSE_QUERY = ENSE_API + f"/{endpoint}/id/{id}"
print(f"\nQuery:{ENSE_QUERY}")
r = requests.get(ENSE_QUERY, headers={ "Content-Type" : "application/json"})
if not r.ok:
r.raise_for_status()
sys.exit()
else:
decoded = r.json()
print(repr(decoded))
return decoded
def extract_seq(info, ENSE_API, endpoint = 'region'):
'''
https://rest.ensembl.org/documentation/info/sequence_region
"/sequence/region/human/X:1000000..1000100:1?expand_5prime=60;expand_3prime=60"
'''
id = info.get('id')
species = info.get('species')
chr = info.get('seq_region_name')
start = info.get('start')
end = info.get('end')
strand = info.get('strand')
if None in [species, chr, start, end, strand]:
print(f"Info:{info}")
print(f" There is a '`None` in required info")
sys.exit()
if strand == -1:
## reverse strand
pass
else:
pass
ENSE_REGION_QUERY = ENSE_API + f"/sequence/{endpoint}/{species}/{chr}:{start}..{end}:{strand}?"
print(f"\nQuery:{ENSE_REGION_QUERY}")
r = requests.get(ENSE_REGION_QUERY, headers={ "Content-Type" : "text/plain"})
if not r.ok:
r.raise_for_status()
sys.exit()
else:
seq = r.text
print(f"ID:{id}")
print(f"Seq snippet:{seq[:100]}")
return seq
# %% TEST
qids = ['ENSMUSG00000000001','ENSMUSG00000000028'] ## test ensembl ids
flank = 100
for qid in qids:
# seq = get_promotors(qid, flank, ENSE_API)
# dct = ense_info(qid, ENSE_API)
inf = check_assembly(qid, ENSE_API)
reg = extract_seq(inf, ENSE_API)
# %% TEST
# %% CHANGELOG
# %% TO DO
## Check is negative strand is reversed and excised or we need to compute coords from left hand
## see: https://www.biostars.org/p/182727/