diff --git a/.travis.yml b/.travis.yml index 4bd94bfe3..645d45a8d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ env: matrix: include: - - python: 3.6 + - python: 3.8 install: # Install conda diff --git a/news/test.rst b/news/test.rst new file mode 100644 index 000000000..e69de29bb diff --git a/regolith/builder.py b/regolith/builder.py index 90d7b750c..8d9099d1b 100644 --- a/regolith/builder.py +++ b/regolith/builder.py @@ -12,6 +12,7 @@ from regolith.builders.resumebuilder import ResumeBuilder from regolith.builders.cpbuilder import CPBuilder from regolith.builders.figurebuilder import FigureBuilder +from regolith.builders.coabuilder import RecentCollaboratorsBuilder BUILDERS = { @@ -28,6 +29,7 @@ "preslist": PresListBuilder, "reimb": ReimbursementBuilder, "figure": FigureBuilder, + "recent-collabs": RecentCollaboratorsBuilder, } diff --git a/regolith/builders/coabuilder.py b/regolith/builders/coabuilder.py new file mode 100644 index 000000000..fda98043c --- /dev/null +++ b/regolith/builders/coabuilder.py @@ -0,0 +1,187 @@ +"""Builder for Resumes.""" + +import datetime as dt +import os +import sys +import openpyxl + +from regolith.builders.basebuilder import BuilderBase +from regolith.dates import month_to_int +from regolith.sorters import doc_date_key, ene_date_key, position_key +from regolith.tools import all_docs_from_collection, filter_publications, \ + month_and_year, fuzzy_retrieval, is_since +from copy import copy +from dateutil.relativedelta import relativedelta +from operator import itemgetter + + +NUM_MONTHS = 48 + +def mdy_date(month, day, year, **kwargs): + if isinstance(month, str): + month = month_to_int(month) + return dt.date(year, month, day) + + +def mdy(month, day, year, **kwargs): + return "{}/{}/{}".format( + str(month_to_int(month)).zfill(2), str(day).zfill(2), str(year)[-2:] + ) + + +class RecentCollaboratorsBuilder(BuilderBase): + """Build recent collaborators from database entries""" + + btype = "recent-collabs" + needed_dbs = ['citations', 'people', 'contacts', 'institutions'] + + def __init__(self, rc): + super().__init__(rc) + self.template = os.path.join( + os.path.dirname(os.path.dirname(__file__)), "templates", "coa_template.xlsx" + ) + self.cmds = ["excel"] + + def construct_global_ctx(self): + super().construct_global_ctx() + gtx = self.gtx + rc = self.rc + + gtx["people"] = sorted( + all_docs_from_collection(rc.client, "people"), + key=position_key, + reverse=True, + ) + gtx["contacts"] = sorted( + all_docs_from_collection(rc.client, "contacts"), + key=position_key, + reverse=True, + ) + gtx["institutions"] = all_docs_from_collection(rc.client, + "institutions") + gtx["citations"] = all_docs_from_collection(rc.client, "citations") + gtx["all_docs_from_collection"] = all_docs_from_collection + + def excel(self): + rc = self.rc + gtx = self.gtx + since_date = dt.date.today() - relativedelta(months=NUM_MONTHS) + if isinstance(self.rc.people, str): + self.rc.people = [self.rc.people] + person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"), + ['aka', 'name', '_id'], self.rc.people[0], + case_sensitive=False) + if not person: + sys.exit("please rerun specifying --people PERSON") + for p in self.gtx["people"]: + if p["_id"] == person["_id"]: + my_names = frozenset(p.get("aka", []) + [p["name"]]) + pubs = filter_publications(self.gtx["citations"], my_names, + reverse=True, bold=False) + my_collabs = [] + for pub in pubs: + if is_since(pub.get("year"), since_date.year, + pub.get("month", 1), since_date.month): + if not pub.get("month"): + print("WARNING: {} is missing month".format( + pub["_id"])) + if pub.get("month") == "tbd".casefold(): + print("WARNING: month in {} is tbd".format( + pub["_id"])) + + my_collabs.extend([collabs for collabs in + [names for names in + pub.get('author', [])]]) + people, institutions = [], [] + for collab in my_collabs: + person = fuzzy_retrieval(all_docs_from_collection( + rc.client, "people"), + ["name", "aka", "_id"], + collab) + if not person: + person = fuzzy_retrieval(all_docs_from_collection( + rc.client, "contacts"), + ["name", "aka", "_id"], collab) + if not person: + print( + "WARNING: {} not found in contacts. Check aka".format( + collab)) + else: + people.append(person) + inst = fuzzy_retrieval(all_docs_from_collection( + rc.client, "institutions"), + ["name", "aka", "_id"], + person["institution"]) + if inst: + institutions.append(inst["name"]) + else: + institutions.append( + person.get("institution", "missing")) + print( + "WARNING: {} missing from institutions".format( + person["institution"])) + else: + people.append(person) + pinst = person.get("employment", + [{"organization": "missing"}])[ + 0]["organization"] + inst = fuzzy_retrieval(all_docs_from_collection( + rc.client, "institutions"), ["name", "aka", "_id"], + pinst) + if inst: + institutions.append(inst["name"]) + else: + institutions.append(pinst) + print( + "WARNING: {} missing from institutions".format( + pinst)) + ppl_names = [(person["name"], i) for + person, i in zip(people, institutions) if + person] + ppl = [] + # reformatting the name in last name, first name + for idx in range(len(ppl_names)): + names = ppl_names[idx][0].split() + last_name = names[-1] + first_name = ' '.join(names[:-1]) + name_reformatted = ', '.join([last_name, first_name]) + ppl.append((name_reformatted, ppl_names[idx][1])) + ppl = list(set(ppl)) + # sorting the ppl list + ppl_sorted = sorted(ppl, key=itemgetter(0)) + # print(set([person["name"] for person in people if person])) + #print(set([person for person in ppl_names])) + emp = p.get("employment", [{"organization": "missing", + "begin_year": 2019}]) + emp.sort(key=ene_date_key, reverse=True) + + def apply_cell_style(cell, style): + cell.font = style["font"] + cell.border = style["border"] + cell.fill = style["fill"] + cell.alignment = style["alignment"] + template = self.template + num_rows = len(ppl) # number of rows to add to the excel file + wb = openpyxl.load_workbook(template) + ws = wb.worksheets[0] + ws.delete_rows(52, amount=3) # removing the example rows + ws.move_range("A52:E66", rows=num_rows, cols=0, translate=True) + style_ref_cell = ws["B51"] + template_cell_style = {} + template_cell_style["font"] = copy(style_ref_cell.font) + template_cell_style["border"] = copy(style_ref_cell.border) + template_cell_style["fill"] = copy(style_ref_cell.fill) + template_cell_style["alignment"] = copy(style_ref_cell.alignment) + col_idx = ["A", "B", "C", "D", "E"] + for row in range(1, num_rows + 1): + try: + ws.unmerge_cells("A{}:E{}".format(row + 51, row + 51)) + except: + pass + for idx in range(len(col_idx)): + apply_cell_style(ws["{}{}".format(col_idx[idx], row + 51)], template_cell_style) + ws["A{}".format(row + 51)].value = "A:" + ws["B{}".format(row + 51)].value = ppl_sorted[row - 1][0] + ws["C{}".format((row + 51))].value = ppl_sorted[row - 1][1] + ws.delete_rows(51) # deleting the reference row + wb.save(os.path.join(self.bldir, "coa_table.xlsx")) \ No newline at end of file diff --git a/regolith/builders/recentcollabsbuilder.py b/regolith/builders/recentcollabsbuilder.py new file mode 100644 index 000000000..adccd0c10 --- /dev/null +++ b/regolith/builders/recentcollabsbuilder.py @@ -0,0 +1,175 @@ +"""Builder for publication lists.""" +import os +import datetime as dt +import sys +from copy import copy +from dateutil.relativedelta import relativedelta + +try: + from bibtexparser.bwriter import BibTexWriter + from bibtexparser.bibdatabase import BibDatabase + + HAVE_BIBTEX_PARSER = True +except ImportError: + HAVE_BIBTEX_PARSER = False + +from regolith.tools import all_docs_from_collection, filter_publications, \ + is_since, fuzzy_retrieval +from regolith.sorters import doc_date_key, ene_date_key, position_key +from regolith.builders.basebuilder import LatexBuilderBase, latex_safe + +LATEX_OPTS = ["-halt-on-error", "-file-line-error"] + + +class RecentCollabsBuilder(LatexBuilderBase): + btype = "recent-collabs" + needed_dbs = ['citations', 'people', 'contacts', 'institutions'] + + def construct_global_ctx(self): + super().construct_global_ctx() + gtx = self.gtx + rc = self.rc + + gtx["people"] = sorted( + all_docs_from_collection(rc.client, "people"), + key=position_key, + reverse=True, + ) + gtx["contacts"] = sorted( + all_docs_from_collection(rc.client, "contacts"), + key=position_key, + reverse=True, + ) + gtx["institutions"] = all_docs_from_collection(rc.client, + "institutions") + gtx["citations"] = all_docs_from_collection(rc.client, "citations") + gtx["all_docs_from_collection"] = all_docs_from_collection + + def latex(self): + rc = self.rc + since_date = dt.date.today() - relativedelta(months=48) + if isinstance(self.rc.people, str): + self.rc.people = [self.rc.people] + person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"), + ['aka', 'name', '_id'], self.rc.people[0], + case_sensitive=False) + if not person: + sys.exit("please rerun specifying --people PERSON") + for p in self.gtx["people"]: + if p["_id"] == person["_id"]: + my_names = frozenset(p.get("aka", []) + [p["name"]]) + pubs = filter_publications(self.gtx["citations"], my_names, + reverse=True, bold=False) + my_collabs = [] + for pub in pubs: + if is_since(pub.get("year"), since_date.year, + pub.get("month", 1), since_date.month): + if not pub.get("month"): + print("WARNING: {} is missing month".format( + pub["_id"])) + if pub.get("month") == "tbd".casefold(): + print("WARNING: month in {} is tbd".format( + pub["_id"])) + + my_collabs.extend([collabs for collabs in + [names for names in + pub.get('author', [])]]) + people, institutions = [], [] + for collab in my_collabs: + person = fuzzy_retrieval(all_docs_from_collection( + rc.client, "people"), + ["name", "aka", "_id"], + collab) + if not person: + person = fuzzy_retrieval(all_docs_from_collection( + rc.client, "contacts"), + ["name", "aka", "_id"], collab) + if not person: + print( + "WARNING: {} not found in contacts. Check aka".format( + collab)) + else: + people.append(person) + inst = fuzzy_retrieval(all_docs_from_collection( + rc.client, "institutions"), + ["name", "aka", "_id"], + person["institution"]) + if inst: + institutions.append(inst["name"]) + else: + institutions.append( + person.get("institution", "missing")) + print( + "WARNING: {} missing from institutions".format( + person["institution"])) + else: + people.append(person) + pinst = person.get("employment", + [{"organization": "missing"}])[ + 0]["organization"] + inst = fuzzy_retrieval(all_docs_from_collection( + rc.client, "institutions"), ["name", "aka", "_id"], + pinst) + if inst: + institutions.append(inst["name"]) + else: + institutions.append(pinst) + print( + "WARNING: {} missing from institutions".format( + pinst)) + ppl_names = [(person["name"], i) for + person, i in zip(people, institutions) if + person] + # print(set([person["name"] for person in people if person])) + print(set([person for person in ppl_names])) + emp = p.get("employment", [{"organization": "missing", + "begin_year": 2019}]) + emp.sort(key=ene_date_key, reverse=True) + self.render( + "recentcollabs.csv", + p["_id"] + ".csv", + p=p, + title=p.get("name", ""), + pubs=pubs, + names=names, + bibfile=bibfile, + employment=emp, + collabs=my_collabs + ) + self.pdf(p["_id"]) + + def filter_publications(self, authors, reverse=False): + rc = self.rc + pubs = [] + for pub in all_docs_from_collection(rc.client, "citations"): + if len(set(pub["author"]) & authors) == 0: + continue + bold_self = [] + for a in pub["author"]: + if a in authors: + bold_self.append("\\textbf{" + a + "}") + else: + bold_self.append(a) + pub["author"] = bold_self + pubs.append(pub) + pubs.sort(key=doc_date_key, reverse=reverse) + return pubs + + def make_bibtex_file(self, pubs, pid, person_dir="."): + if not HAVE_BIBTEX_PARSER: + return None + skip_keys = set(["ID", "ENTRYTYPE", "author"]) + self.bibdb.entries = ents = [] + for pub in pubs: + ent = dict(pub) + ent["ID"] = ent.pop("_id") + ent["ENTRYTYPE"] = ent.pop("entrytype") + ent["author"] = " and ".join(ent["author"]) + for key in ent.keys(): + if key in skip_keys: + continue + ents.append(ent) + fname = os.path.join(person_dir, pid) + ".bib" + with open(fname, "w", encoding='utf-8') as f: + f.write(self.bibwriter.write(self.bibdb)) + return fname diff --git a/regolith/dates.py b/regolith/dates.py index 7d13fc026..616555e17 100644 --- a/regolith/dates.py +++ b/regolith/dates.py @@ -40,7 +40,7 @@ "dec.": 12, "december": 12, "": 1, - "tbd": 1, + "tbd": 1 } diff --git a/regolith/schemas.py b/regolith/schemas.py index ffc51f4a3..3a4ccf691 100644 --- a/regolith/schemas.py +++ b/regolith/schemas.py @@ -309,6 +309,7 @@ "bio": "Anthony Scopatz is currently an Assistant Professor", "education": [ { + "advisor": "sbillinge", "begin_year": 2008, "degree": "Ph.D. Mechanical Engineering, " "Nuclear and Radiation Engineering " @@ -363,6 +364,7 @@ "email": "scopatz@cec.sc.edu", "employment": [ { + "mentor": "sbillinge", "begin_year": 2015, "group": "ergs", "location": "Columbia, SC", @@ -376,7 +378,7 @@ "position": "Assistant Professor, Mechanical Engineering " "Department", }, { - "begin_year": 2013, + "mentor": "sbillinge","begin_year": 2013, "begin_month": "Jun", "begin_day": 1, "end_year": 2015, @@ -1366,6 +1368,10 @@ "schema": { "type": "dict", "schema": { + "mentor": {"required": False, "type": "string", + "description": "advisor/mentor of the" + "education entry" + }, "begin_day": {"required": False, "type": "integer"}, "begin_month": {"required": False, @@ -1411,6 +1417,10 @@ "schema": { "type": "dict", "schema": { + "mentor": {"required": False, "type": "string", + "description": "advisor/mentor of the" + "employment entry" + }, "begin_day": {"required": False, "type": "integer"}, "begin_month": {"required": False, "anyof_type": ["string", "integer"], diff --git a/regolith/templates/coa_template.xlsx b/regolith/templates/coa_template.xlsx new file mode 100644 index 000000000..72b2ed846 Binary files /dev/null and b/regolith/templates/coa_template.xlsx differ diff --git a/regolith/templates/recentcollabs.csv b/regolith/templates/recentcollabs.csv new file mode 100644 index 000000000..5f8ca09ae --- /dev/null +++ b/regolith/templates/recentcollabs.csv @@ -0,0 +1,4 @@ +%person collaborated with +{{p['title']}} {{p['name']}} + +{{ my_collabs }} \ No newline at end of file diff --git a/regolith/tools.py b/regolith/tools.py index c64abc75f..a08722f4c 100644 --- a/regolith/tools.py +++ b/regolith/tools.py @@ -425,6 +425,38 @@ def filter_grants(input_grants, names, pi=True, reverse=True, multi_pi=False): return grants, total_amount, subaward_amount +def filter_advisors(input_contacts, advisors, positions=["PhD", "post-doc"]): + """Filter for PhD and post-docs advisors. + + Parameters + ---------- + input_contacts : list of dict + The contacts information + advisors : list of str + The advisors to be filtered for + positions : list of str, optional + The positions to be filtered for + + Return + ------ + filtered_contacts: list of dicts + """ + output_contacts = [] + for contacts in input_contacts: + filtered_contacts = {} + for advisor in advisors: + for position in positions: + for person, info in contacts.items(): + if info['education']['advisor']: + if advisor == info['education']['advisor'] and position == info['position']: + filtered_contacts[person] = info + if info['employment']['advisor']: + if advisor == info['employment']['advisor'] and position == info['position']: + filtered_contacts[person] = info + output_contacts.append(filtered_contacts) + return output_contacts + + def awards_grants_honors(p): """Make sorted awards grants and honors list. diff --git a/requirements/run.txt b/requirements/run.txt index 52eac1ac9..68892a44d 100644 --- a/requirements/run.txt +++ b/requirements/run.txt @@ -7,3 +7,4 @@ xonsh rever openpyxl nameparser +python-dateutil diff --git a/tests/test_builders.py b/tests/test_builders.py index faa1a11b4..a879aae1c 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -19,6 +19,8 @@ "preslist", "reimb", "figure", +# "review-man", + "recent-collabs" ] xls_check = ("B17", "B20", "B36") @@ -49,7 +51,7 @@ def test_builder(bm, make_db): prep_figure() if bm == "html": os.makedirs("templates/static", exist_ok=True) - if bm == "reimb": + if bm == "reimb" or bm == "recent-collabs": subprocess.run(["regolith", "build", bm, "--no-pdf", "--people", "scopatz"], check=True, cwd=repo ) else: @@ -93,7 +95,7 @@ def test_builder_python(bm, make_db): prep_figure() if bm == "html": os.makedirs("templates/static", exist_ok=True) - if bm == "reimb": + if bm == "reimb" or bm == "recent-collabs": main(["build", bm, "--no-pdf", "--people", "scopatz"]) else: main(["build", bm, "--no-pdf"])