Merge pull request #32 from st3107/caobuilder

DEV: Coabuilder
sbillinge · Apr 8, 2020 · 13a36ad · 13a36ad
2 parents 2533b34 + 643258a
commit 13a36ad
Show file tree

Hide file tree

Showing 4 changed files with 100 additions and 27 deletions.
diff --git a/regolith/builders/coabuilder.py b/regolith/builders/coabuilder.py
@@ -110,16 +110,20 @@ def get_since_date(rc):
     return since_date
 
 
-def get_coauthors_from_pubs(pubs):
-    """Get co-authors' names from the publication."""
-    my_collabs = []
+def get_coauthors_from_pubs(pubs, not_person):
+    """Get co-authors' names from the publication. Not include the person itself."""
+    my_collabs = list()
     for pub in pubs:
         my_collabs.extend(
             [
                 collabs for collabs in
                 (names for names in pub.get('author', []))
             ]
         )
+    my_collabs = set(my_collabs)
+    for name in ([not_person['_id'], not_person['name']] + not_person['aka']):
+        if name in my_collabs:
+            my_collabs.remove(name)
     return my_collabs
 
 
@@ -128,15 +132,15 @@ def get_recent_org(person_info):
     if "employment" in person_info:
         employment = person_info.get("employment", []) + person_info.get("education", [])
         if len(employment) == 0:
-            return "missing"
+            return ""
         # sort by end_year
         employment = sorted(
             employment,
             key=lambda d: d.get("end_year", float('inf')),
             reverse=True)
-        organization = employment[0].get('organization', 'missing')
+        organization = employment[0].get('organization', '')
     else:
-        organization = "missing"
+        organization = ""
     return organization
 
 
@@ -186,7 +190,13 @@ def query_people_and_institutions(rc, names):
 
 def get_inst_name(person, rc):
     """Get the name of instituion of the person's lastest employment."""
-    person_inst_abbr = person.get("employment")[0]["organization"]
+    if 'employment' in person:
+        org = get_recent_org(person)
+        person_inst_abbr = org
+    elif 'institution' in person:
+        person_inst_abbr = person.get('institution')
+    else:
+        person_inst_abbr = ''
     person_inst = fuzzy_retrieval(all_docs_from_collection(
         rc.client, "institutions"), ["name", "aka", "_id"],
         person_inst_abbr, case_sensitive=False)
@@ -210,6 +220,14 @@ def get_person_pubs(coll, person):
     return pubs
 
 
+def make_person_3tups(person, rc):
+    if 'name' not in person:
+        print("Warning")
+    name = HumanName(person['name'])
+    inst = get_inst_name(person, rc)
+    return [(name.last, name.first, inst)]
+
+
 def format_last_first_instutition_names(rc, ppl_names, excluded_inst_name=None):
     """Get the last name, first name and institution name."""
     ppl = []
@@ -243,8 +261,10 @@ def format_people_name(ppl_names):
 
 
 def format_to_nsf(tups, type_str):
-    """Format the 3 tups to 2 tups. ('type_str', 'last, first', 'inst')."""
-    return [(type_str, '{}, {}'.format(last, first), inst) for last, first, inst in tups]
+    """Format the 3 tups to 2 tups. ('type_str', 'last, first', 'inst', ...)."""
+    return [
+        (type_str, '{}, {}'.format(tup[0], tup[1])) + tup[2:] for tup in tups
+    ]
 
 
 def apply_cell_style(*cells, style):
@@ -299,6 +319,51 @@ def unmerge(ws, cells):
     return
 
 
+def get_person(person_id, rc):
+    """Get the person's name."""
+    person_found = fuzzy_retrieval(
+        all_docs_from_collection(rc.client, "people"),
+        ["name", "aka", "_id"],
+        person_id,
+        case_sensitive=False
+    )
+    if person_found:
+        return person_found
+    person_found = fuzzy_retrieval(
+        all_docs_from_collection(rc.client, "contacts"),
+        ["name", "aka", "_id"],
+        person_id,
+        case_sensitive=False
+    )
+    if person_found:
+        return person_found
+    print("WARNING: {} missing from people and contacts. Check aka.".format(person_id))
+    return None
+
+
+def find_coeditors(person, rc):
+    """Get the coeditors info of the person. Return (last, first, inst, journal)."""
+    emps = person.get('employment')
+    if emps is None:
+        return set()
+
+    def coeditor_id_journals(_emps):
+        for emp in _emps:
+            if emp.get('position') == 'editor':
+                _journal = emp.get('department', '')
+                coeditor_ids = emp.get('coworkers', [])
+                for _coeditor_id in coeditor_ids:
+                    yield _coeditor_id, _journal
+
+    coeditor_inst_journals = set()
+    for coeditor_id, journal in coeditor_id_journals(emps):
+        coeditor = get_person(coeditor_id, rc)
+        coeditor_name = HumanName(coeditor['name'])
+        inst_name = get_inst_name(coeditor, rc)
+        coeditor_inst_journals.add((coeditor_name.last, coeditor_name.first, inst_name, journal))
+    return coeditor_inst_journals
+
+
 class RecentCollaboratorsBuilder(BuilderBase):
     """Build recent collaborators from database entries"""
     btype = "recent-collabs"
@@ -344,55 +409,64 @@ def query_ppl(self, target, **filters):
                                  case_sensitive=False)
         if not person:
             raise RuntimeError("Person {} not found in people.".format(target))
-        person_inst_name = get_inst_name(person, rc)
         pubs = get_person_pubs(gtx["citations"], person)
         if 'since_date' in filters:
             since_date = filters.get('since_date')
             pubs = filter_since_date(pubs, since_date)
-        my_collabs = get_coauthors_from_pubs(pubs)
+        my_collabs = get_coauthors_from_pubs(pubs, person)
         people, institutions = query_people_and_institutions(rc, my_collabs)
         ppl_names = set(zip(people, institutions))
         collab_3tups = set(format_last_first_instutition_names(rc, ppl_names))
         advisors_3tups = set(get_advisors_name_inst(person, rc))
         advisees_3tups = set(get_advisees_name_inst(gtx["people"], person, rc))
         ppl_3tups = sorted(list(collab_3tups | advisors_3tups | advisees_3tups))
+        person_3tups = make_person_3tups(person, rc)
+        coeditors_info = find_coeditors(person, rc)
+        ppl_tab1 = format_to_nsf(person_3tups, '')
         ppl_tab3 = format_to_nsf(advisors_3tups, 'G:') + format_to_nsf(advisees_3tups, 'T:')
         ppl_tab4 = format_to_nsf(collab_3tups, 'A:')
+        ppl_tab5 = format_to_nsf(coeditors_info, 'E:')
         results = {
             'person_info': person,
-            'person_institution_name': person_inst_name,
+            'ppl_tab1': ppl_tab1,
             'ppl_tab3': ppl_tab3,
             'ppl_tab4': ppl_tab4,
-            'ppl_3tups': ppl_3tups,
+            'ppl_tab5': ppl_tab5,
+            'ppl_3tups': ppl_3tups
         }
         return results
 
     @staticmethod
-    def add_ppl_2tups(ws, ppl_2tups, start_row, template_cell_style=None, cols='ABCDE'):
+    def fill_in_tab(ws, ppl_tups, start_row, template_cell_style=None, cols='ABCDE'):
         """Add the information in person, institution pairs into the table 4 in nsf table."""
-        more_rows = len(ppl_2tups) - 1
+        more_rows = len(ppl_tups) - 1
         if more_rows > 0:
             ws.insert_rows(start_row, amount=more_rows)
-        for row, tup in enumerate(ppl_2tups, start=start_row):
+        for row, tup in enumerate(ppl_tups, start=start_row):
             cells = [ws['{}{}'.format(col, row)] for col in cols]
             if template_cell_style is not None:
                 apply_cell_style(*cells, style=template_cell_style)
-            cells[0].value = tup[0]
-            cells[1].value = tup[1]
-            cells[2].value = tup[2]
+            for ind, value in enumerate(tup):
+                cells[ind].value = value
         return
 
-    def render_template1(self, person_info, ppl_tab3, ppl_tab4, **kwargs):
+    def render_template1(self, person_info, ppl_tab1, ppl_tab3, ppl_tab4, ppl_tab5, **kwargs):
         """Render the nsf template."""
         template = self.template
         wb = openpyxl.load_workbook(template)
         ws = wb.worksheets[0]
         style = copy_cell_style(ws['A17'])
-        self.add_ppl_2tups(
+        self.fill_in_tab(
+            ws, ppl_tab5, start_row=44, template_cell_style=style
+        )
+        self.fill_in_tab(
+            ws, ppl_tab4, start_row=37, template_cell_style=style
+        )
+        self.fill_in_tab(
             ws, ppl_tab3, start_row=30, template_cell_style=style
         )
-        self.add_ppl_2tups(
-            ws, ppl_tab4, start_row=37 + len(ppl_tab3) - 1, template_cell_style=style
+        self.fill_in_tab(
+            ws, ppl_tab1, start_row=17, template_cell_style=style
         )
         wb.save(os.path.join(self.bldir, "{}_nsf.xlsx".format(person_info["_id"])))
         return locals()

diff --git a/regolith/tools.py b/regolith/tools.py
@@ -6,14 +6,13 @@
 import re
 import sys
 import time
-from copy import deepcopy
 from calendar import monthrange
-
+from copy import deepcopy
 from datetime import datetime, date, timedelta
 
+from regolith.chained_db import ChainDB
 from regolith.dates import month_to_int, date_to_float, beg_end_dates
 from regolith.sorters import doc_date_key, id_key, ene_date_key
-from regolith.chained_db import ChainDB
 
 try:
     from bibtexparser.bwriter import BibTexWriter
@@ -786,7 +785,7 @@ def is_fully_loaded(appts):
     if max(loading) > 1.0:
         status = False
         print("max {} at {}".format(max(loading),
-                datearray[list(loading).index(max(loading))]))
+                                    datearray[list(loading).index(max(loading))]))
     elif min(loading) < 1.0:
         status = False
         print("min {} at {}".format(min(loading),

diff --git a/tests/outputs/recent-collabs/scopatz_doe.xlsx b/tests/outputs/recent-collabs/scopatz_doe.xlsx
diff --git a/tests/outputs/recent-collabs/scopatz_nsf.xlsx b/tests/outputs/recent-collabs/scopatz_nsf.xlsx