Skip to content

Commit

Permalink
importer: Update tests for normalized title matching
Browse files Browse the repository at this point in the history
  • Loading branch information
sakshamarora1 authored and kpsherva committed Jan 23, 2024
1 parent 0de5125 commit 1608e0f
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
4 changes: 3 additions & 1 deletion cds_ils/importer/documents/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def search_document_by_title_authors(title, authors, subtitle=None):
"""Find document by title and authors."""
document_search = current_app_ils.document_search_cls()

title = " ".join(title.lower().split()) # Normalized title search for documents
title = " ".join(
title.lower().split()
).strip() # Normalized title search for documents
if subtitle:
search = (
document_search.filter("term", title__normalized_keyword=title)
Expand Down
16 changes: 13 additions & 3 deletions cds_ils/importer/documents/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,12 @@ def _update_field_alternative_identifiers(self, matched_document):

return existing_identifiers + new_identifiers

@staticmethod
def _normalize_title(title):
"""Return a normalized title."""
t = " ".join(title.lower().split())
return t.strip()

def update_document(self, matched_document):
"""Update document record."""
for field in self.update_document_fields:
Expand Down Expand Up @@ -263,11 +269,13 @@ def _validate_volumes(self, existing_document):

for import_serial in import_doc_serials:
import_volume = import_serial.get("volume")
import_serial_title = import_serial["title"].lower()
import_serial_title = self._normalize_title(import_serial["title"])

for serial in existing_doc_serials:
existing_volume = serial.get("volume")
existing_title = serial["record_metadata"]["title"].lower()
existing_title = self._normalize_title(
serial["record_metadata"]["title"]
)

same_serial = existing_title == import_serial_title
both_have_volumes = import_volume and existing_volume
Expand Down Expand Up @@ -322,7 +330,9 @@ def validate_found_matches(self, not_validated_matches):
)

pub_year_not_equal = doc_pub_year != import_doc_publication_year
titles_not_equal = document_title.lower() != import_doc_title.lower()
titles_not_equal = self._normalize_title(
document_title
) != self._normalize_title(import_doc_title)

provider_identifiers_not_equal = (
not self._validate_provider_identifiers(document)
Expand Down
1 change: 1 addition & 0 deletions tests/importer/test_document_matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def test_document_search_matching(importer_test_data):
validated_matches, partial = document_importer.validate_found_matches(matches)

assert validated_matches == "docid-4"
assert partial == ["docid-41"]


def test_fuzzy_matching(importer_test_data):
Expand Down

0 comments on commit 1608e0f

Please sign in to comment.