-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfasta-to-report.py
68 lines (59 loc) · 1.68 KB
/
fasta-to-report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import argparse as ap
import csv
import loader
from Bio import SeqIO
from ga4gh.core import sha512t24u
from app.utils import ga4gh_to_trunc512
from hashlib import md5
import sys
def run():
args = parse_args()
fasta = args.fasta
if args.output == "-":
output = sys.stdout
else:
output = open(args.output, mode="w")
writer = csv.writer(output)
header = ["id", "ga4gh", "md5"]
if args.trunc512:
header.append("trunc512")
writer.writerow(header)
_open = loader.guess_parser_from_filname(fasta)
with _open(fasta) as f:
for record in SeqIO.parse(f, "fasta"):
id = record.id
seq = str(record.seq).encode("ASCII")
sha512sum = sha512t24u(seq)
md5sum = md5(seq).hexdigest()
row = [id, f"SQ.{sha512sum}", md5sum]
if args.trunc512:
trunc512sum = ga4gh_to_trunc512(sha512sum)
row.append(trunc512sum)
writer.writerow(row)
def parse_args():
p = ap.ArgumentParser(description="FASTA report generator")
required = p.add_argument_group("required named arguments")
required.add_argument(
"-f",
"--fasta",
help="Input FASTA file to load. Supports compressed and uncompressed",
type=str,
required=True,
)
p.add_argument(
"--trunc512",
help="Output trunc512 checksums",
action="store_true",
required=False,
default=False,
)
p.add_argument(
"--output",
help="Output location. Use - for stdout",
type=str,
required=False,
default="-",
)
return p.parse_args()
if __name__ == "__main__":
run()