From 79b38009e3e33f81b380ffb11ed1a74c60d9c10f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20B=C3=BClte?= <tobias.buelte@hbz-nrw.de>
Date: Mon, 5 Aug 2024 11:55:09 +0200
Subject: [PATCH] Change urn-Link mapping to catch non nbn resolver-links #2034

I try to catch all URN Links except those repository links that
are have a urn/urn pattern since they are not resolver links:
http://digital.ub.uni-duesseldorf.de/urn/urn:nbn:de:hbz:061:1-249692
or
https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092
---
 src/main/resources/alma/fix/identifiers.fix       |  9 ++++++---
 .../alma/fix/relatedRessourcesAndLinks.fix        | 15 ++++++++++++---
 .../resources/alma-fix/990197067610206441.json    |  4 ++--
 .../resources/alma-fix/990198125850206441.json    |  6 +++---
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/src/main/resources/alma/fix/identifiers.fix b/src/main/resources/alma/fix/identifiers.fix
index 53e557be9..45a10a549 100644
--- a/src/main/resources/alma/fix/identifiers.fix
+++ b/src/main/resources/alma/fix/identifiers.fix
@@ -17,15 +17,18 @@ end
 # Sometimes urn are not set in 024 then we could pick up the missing from 856.
 # 856 - Electronic Location and Access (R) - Subfield: $u (R) $3 (NR)
 # 1. Indicator: 4 = HTTP
+set_array("@urnLinks")
+
 do list(path:"8564?", "var":"$i")
-  if all_match("$i.u", "^http.*[/=]urn:nbn.*")
+  if all_match("$i.u", "^http.*(urn=|[org|de]/)(urn:.+$)") # This should ignore repository links like: https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092
     copy_field("$i.u", "urn[].$append")
-    replace_all("urn[].$last", "(^http.+)(urn:.+$)", "$2")
+    copy_field("$i.u", "@urnLinks.$append")
+    replace_all("urn[].$last", "^http.*[/=](urn:.+$)", "$1")
   end
 end
 
 replace_all("urn[].*","^(nbn:de:.*\\d)$","urn:$1")
-
+uniq("@urnLinks")
 uniq("urn[]")
 
 # 035 - System Control Number (R) - Subfield: $a (NR)
diff --git a/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix b/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix
index c39a56015..83bfc8815 100644
--- a/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix
+++ b/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix
@@ -271,7 +271,7 @@ set_array("fulltextOnline[]")
 
 do list(path: "8564?", "var":"$i")
   if exists("$i.u")
-    unless any_match("$i.u",".*(doi.org|nbn-resolving).*")
+    unless any_match("$i.u",".*(doi.org|urn=urn:|(org|de)/urn:).*") # This should not skip repository links like: https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092
       if all_equal("$i.z", "kostenfrei") # kostenfrei, added Digitalisierung not only Verlag or Agentur as filter
           if all_match("$i.x", "Verlag|Agentur|Digitalisierung")
             copy_field("$i.x", "fulltextOnline[].$append.label")
@@ -301,13 +301,22 @@ end
 
 # urn for fullTextOnline and sameAs
 
-do list(path:"urn[]","var":"$i")
+do list(path:"@urnLinks","var":"$i")
   copy_field("$i", "fulltextOnline[].$append.id")
-  prepend("fulltextOnline[].$last.id","https://nbn-resolving.org/")
   copy_field("fulltextOnline[].$last.id", "sameAs[].$append.id")
   add_field("fulltextOnline[].$last.label", "URN-Link")
 end
 
+if is_empty("@urnLinks")
+  do list(path:"urn[]","var":"$i")
+    copy_field("$i", "fulltextOnline[].$append.id")
+    prepend("fulltextOnline[].$last.id","https://nbn-resolving.org/")
+    copy_field("fulltextOnline[].$last.id", "sameAs[].$append.id")
+    add_field("fulltextOnline[].$last.label", "URN-Link")
+  end
+end
+
+
 # TODO: hasVersion is outcommented since it needs some remodelling
 # See https://github.com/hbz/lobid-resources/issues/1242
 #  <!--
diff --git a/src/test/resources/alma-fix/990197067610206441.json b/src/test/resources/alma-fix/990197067610206441.json
index 36cd512dd..d2218a212 100644
--- a/src/test/resources/alma-fix/990197067610206441.json
+++ b/src/test/resources/alma-fix/990197067610206441.json
@@ -68,7 +68,7 @@
     "id" : "https://hub.culturegraph.org/resource/(DE-605)990197067610206441",
     "label" : "Culturegraph Ressource"
   }, {
-    "id" : "https://nbn-resolving.org/urn:nbn:de:hbz:061:1-249692",
+    "id" : "http://nbn-resolving.de/urn:nbn:de:hbz:061:1-249692",
     "label" : "urn:nbn:de:hbz:061:1-249692"
   } ],
   "primaryForm" : [ {
@@ -77,7 +77,7 @@
     "note" : [ "Elektronische Reproduktion von" ]
   } ],
   "fulltextOnline" : [ {
-    "id" : "https://nbn-resolving.org/urn:nbn:de:hbz:061:1-249692",
+    "id" : "http://nbn-resolving.de/urn:nbn:de:hbz:061:1-249692",
     "label" : "URN-Link"
   } ],
   "inCollection" : [ {
diff --git a/src/test/resources/alma-fix/990198125850206441.json b/src/test/resources/alma-fix/990198125850206441.json
index 33f76379b..37ffd144b 100644
--- a/src/test/resources/alma-fix/990198125850206441.json
+++ b/src/test/resources/alma-fix/990198125850206441.json
@@ -73,11 +73,11 @@
     "id" : "http://worldcat.org/oclc/162593822",
     "label" : "OCLC Ressource"
   }, {
-    "id" : "https://nbn-resolving.org/urn:nbn:de:bvb:12-bsb10020200-9",
-    "label" : "urn:nbn:de:bvb:12-bsb10020200-9"
+    "id" : "http://www.mdz-nbn-resolving.de/urn/resolver.pl?urn=urn:nbn:de:bvb:12-bsb10020200-9",
+    "label" : "resolver.pl?urn=urn:nbn:de:bvb:12-bsb10020200-9"
   } ],
   "fulltextOnline" : [ {
-    "id" : "https://nbn-resolving.org/urn:nbn:de:bvb:12-bsb10020200-9",
+    "id" : "http://www.mdz-nbn-resolving.de/urn/resolver.pl?urn=urn:nbn:de:bvb:12-bsb10020200-9",
     "label" : "URN-Link"
   } ],
   "inCollection" : [ {