-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathjobinfo_all
executable file
·150 lines (110 loc) · 4.09 KB
/
jobinfo_all
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
# coding: utf-8
import glob
import os
import sys
import argparse
def main():
parser = argparse.ArgumentParser(description='Q-CHEM JOB INFO\nSearch recursively for all .out files and get general info.')
parser.add_argument("-v", '--verbose', nargs='?', const=0, type=int, help="Print more. Number of rows optional. 0 prints everything")
parser.add_argument("-s", "--save", nargs='?', const="data.csv", type=str, help="Save output. Name is optional. Defaults to 'data.csv'.")
args = parser.parse_args()
data = []
for filename in getFileList():
rem = False
rem_dict = {}
count_criter = 0
num_jobs = 1
with open(filename, errors='ignore') as f:
for line in f:
# only entered if previous line was start of rem block
if rem == True:
# turn false again if end encountered, otherwise do stuff
if line.startswith("$end"):
rem = False
else:
rem_dict[line.strip().split()[0]] = line.strip().split()[1]
# turns true if rem block is hit
if line.startswith("$rem"):
rem = True
continue
# get number of jobs in multijob file
if line.startswith("User input:") and "of" in line:
num_jobs = int(line.strip().split()[-1])
continue
if "Convergence criterion met" in line:
count_criter += 1
continue
if "shells" in line and "basis functions" in line:
nbas = line.split("shells and")[1].split("basis functions")[0].strip()
continue
if line.startswith(" SCF time: CPU"):
walltime = line.split("wall")[1].strip()
walltime = walltime[:-1]
continue
# append data in line furthest down
if " Total energy in the final basis set =" in line:
energy = line.split("=")[1].strip()
# THIS IS A HOT FIX!!
# first job has no scf guess -> first list shorter
if "scf_guess" not in rem_dict:
rem_dict["scf_guess"] = "default"
# append everything and stop going through file
data.append([filename, energy, nbas, walltime] + list(rem_dict.values()))
continue
# check convergence for my multijob
if count_criter != num_jobs:
print("\n!!!!!!!!!")
print("Convergence error or convergence not yet reached in {}.".format(filename))
print("\n!!!!!!!!!")
########################################
## data collection (outside of loop!) ##
########################################
cols = ["filename", "energy", "nbas", "time"] + list(rem_dict.keys())
try:
import pandas as pd
df = pd.DataFrame(columns=cols, data=data)
# print to console
if args.verbose:
if args.verbose == 0:
pd.set_option('display.max_rows', None)
print(df)
else:
print(df.head(args.verbose))
else:
print(df)
# save
if args.save:
save_loc = os.path.join(os.path.realpath("."), args.save)
df.to_csv(save_loc, index=False)
print("\nSaved data to {}.".format(save_loc))
except ModuleNotFoundError:
# fallback: using numpy for saving data to csv
try:
import numpy as np
# add col names to list
data.insert(0, cols)
# print to console
print("")
if args.verbose:
if args.verbose != 0:
data = data[:(args.verbose + 1)]
print('\n'.join(' '.join(str(x) for x in row) for row in data))
else:
print('\n'.join(' '.join(str(x) for x in row) for row in data[:10]))
# save
if args.save:
save_loc = os.path.join(os.path.realpath("."), args.save)
np.savetxt(save_loc, data, fmt='%s', delimiter="\t")
print("\nSaved data to {}.".format(save_loc))
except ModuleNotFoundError:
print("Modules 'pandas' and 'numpy' not found. Data will not be saved.")
def getFileList():
filelist = []
for file in glob.glob('./**/*.out', recursive=True):
filelist.append(file)
if len(filelist) == 0:
sys.exit("No '.out' files found.")
return filelist
if __name__ == '__main__':
main()