-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrunner.py
82 lines (70 loc) · 3.42 KB
/
runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
'''
@author: alisoncallahan
'''
import argparse
from scripts.PMCReferenceExtractor import run as run_pmc_extractor
from scripts.PubmedPapersQuery import run as run_pubmed_query
from scripts.PubMedPapersProcessor import run as run_pubmed_processor
from scripts.ResourceNameExtractor import run as run_toolname_extractor
from scripts.DBLoader import run as run_db_loader
if __name__ == '__main__':
parser = argparse.ArgumentParser(description = "Run uIndex data generator.")
parser.add_argument('-i',
'--indir',
dest='input_directory',
help='Directory where citing universe PMC XML files are located.',
required=True
)
parser.add_argument('-o',
'--outdir',
dest='output_directory',
help='Directory where output will be written.',
required=True)
parser.add_argument('-db_host',
dest='db_host',
help='SQL database host (e.g. localhost)',
default='localhost',
required=False
)
parser.add_argument('-sql_port',
dest='sql_port',
help='Port where SQL server is running (default: 3306)',
default=3306,
required=False
)
parser.add_argument('-db_cnf',
dest='db_cnf',
help='Configuration file where SQL username and password are stored.',
default='~/.my.cnf',
required=False
)
args = parser.parse_args()
print "########################################################"
print "### Processing PMC records to build citing universe. ###"
print "########################################################"
pmc_article_fp, pmc_reference_fp, pmc_sections_fp = run_pmc_extractor(args.input_directory, args.output_directory)
print "########################################################"
print "### Downloading informatics resource PubMed IDs. ###"
print "########################################################"
pmids_fp = run_pubmed_query(args.output_directory, limit=1)
print "########################################################"
print "### Processing informatics resource PubMed records. ###"
print "########################################################"
titles_fp, dates_fp = run_pubmed_processor(args.output_directory, pmids_fp)
print "########################################################"
print "### Extracting informatics resource names. ###"
print "########################################################"
resource_names_fp = run_toolname_extractor(args.output_directory, titles_fp)
print "########################################################"
print "### Creating and loading u-Index database. ###"
print "########################################################"
run_db_loader(args.db_host,
args.sql_port,
args.db_cnf,
titles_fp,
dates_fp,
resource_names_fp,
pmc_article_fp,
pmc_reference_fp,
pmc_sections_fp
)