Skip to content

Commit

Permalink
Added utils to fetch data from different sources
Browse files Browse the repository at this point in the history
  • Loading branch information
cszsol committed Nov 4, 2024
1 parent f601946 commit 9690c32
Show file tree
Hide file tree
Showing 11 changed files with 2,378 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/citations/data_sources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tools for processing information from different data sources."""
46 changes: 46 additions & 0 deletions src/citations/data_sources/bbp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Tools for extracting info from BBP publication csv files."""

import logging
from typing import List, Optional

import pandas as pd

from citations.utils import normalize_title

logger = logging.getLogger(__name__)


def get_bbp_author_names(
bbp_publications: pd.DataFrame, title: str, is_bbp: bool
) -> Optional[List[str]]:
"""
Get names of BBP authors for a particular title.
Parameters
----------
bbp_publications : pd.DataFrame
The DataFrame that contains the BBP publications.
title : str
The title of the publication.
is_bbp : bool
A flag indicating whether the publication is from BBP.
Returns
-------
Optional[List[str]]
A list of author names if the publication is from BBP, else None.
"""
try:
if is_bbp:
bbp_row = bbp_publications[
bbp_publications["normalized_title"] == normalize_title(title)
].iloc[0]
author_names = [
name.strip() for name in bbp_row["Author"].split(";")
]
else:
author_names = None
except Exception as e:
logger.error(f"Could not find bbp publication with title: {title}")
raise e
return author_names
Loading

0 comments on commit 9690c32

Please sign in to comment.