From 79b38009e3e33f81b380ffb11ed1a74c60d9c10f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Mon, 5 Aug 2024 11:55:09 +0200 Subject: [PATCH] Change urn-Link mapping to catch non nbn resolver-links #2034 I try to catch all URN Links except those repository links that are have a urn/urn pattern since they are not resolver links: http://digital.ub.uni-duesseldorf.de/urn/urn:nbn:de:hbz:061:1-249692 or https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092 --- src/main/resources/alma/fix/identifiers.fix | 9 ++++++--- .../alma/fix/relatedRessourcesAndLinks.fix | 15 ++++++++++++--- .../resources/alma-fix/990197067610206441.json | 4 ++-- .../resources/alma-fix/990198125850206441.json | 6 +++--- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/main/resources/alma/fix/identifiers.fix b/src/main/resources/alma/fix/identifiers.fix index 53e557be9..45a10a549 100644 --- a/src/main/resources/alma/fix/identifiers.fix +++ b/src/main/resources/alma/fix/identifiers.fix @@ -17,15 +17,18 @@ end # Sometimes urn are not set in 024 then we could pick up the missing from 856. # 856 - Electronic Location and Access (R) - Subfield: $u (R) $3 (NR) # 1. Indicator: 4 = HTTP +set_array("@urnLinks") + do list(path:"8564?", "var":"$i") - if all_match("$i.u", "^http.*[/=]urn:nbn.*") + if all_match("$i.u", "^http.*(urn=|[org|de]/)(urn:.+$)") # This should ignore repository links like: https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092 copy_field("$i.u", "urn[].$append") - replace_all("urn[].$last", "(^http.+)(urn:.+$)", "$2") + copy_field("$i.u", "@urnLinks.$append") + replace_all("urn[].$last", "^http.*[/=](urn:.+$)", "$1") end end replace_all("urn[].*","^(nbn:de:.*\\d)$","urn:$1") - +uniq("@urnLinks") uniq("urn[]") # 035 - System Control Number (R) - Subfield: $a (NR) diff --git a/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix b/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix index c39a56015..83bfc8815 100644 --- a/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix +++ b/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix @@ -271,7 +271,7 @@ set_array("fulltextOnline[]") do list(path: "8564?", "var":"$i") if exists("$i.u") - unless any_match("$i.u",".*(doi.org|nbn-resolving).*") + unless any_match("$i.u",".*(doi.org|urn=urn:|(org|de)/urn:).*") # This should not skip repository links like: https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092 if all_equal("$i.z", "kostenfrei") # kostenfrei, added Digitalisierung not only Verlag or Agentur as filter if all_match("$i.x", "Verlag|Agentur|Digitalisierung") copy_field("$i.x", "fulltextOnline[].$append.label") @@ -301,13 +301,22 @@ end # urn for fullTextOnline and sameAs -do list(path:"urn[]","var":"$i") +do list(path:"@urnLinks","var":"$i") copy_field("$i", "fulltextOnline[].$append.id") - prepend("fulltextOnline[].$last.id","https://nbn-resolving.org/") copy_field("fulltextOnline[].$last.id", "sameAs[].$append.id") add_field("fulltextOnline[].$last.label", "URN-Link") end +if is_empty("@urnLinks") + do list(path:"urn[]","var":"$i") + copy_field("$i", "fulltextOnline[].$append.id") + prepend("fulltextOnline[].$last.id","https://nbn-resolving.org/") + copy_field("fulltextOnline[].$last.id", "sameAs[].$append.id") + add_field("fulltextOnline[].$last.label", "URN-Link") + end +end + + # TODO: hasVersion is outcommented since it needs some remodelling # See https://github.com/hbz/lobid-resources/issues/1242 #