From 7d85320cf4b0ecb0ab387f32bce2687ed75d9c68 Mon Sep 17 00:00:00 2001 From: TobiasNx Date: Wed, 11 Sep 2024 11:07:29 +0200 Subject: [PATCH] Make mapping for 856 less strict #2070 Due to missing indicator 1 = 4 some links were not created. Since 856 with an empty indicator which says missing information could also link to an http-page I change the 8564? -> 856?? --- src/main/resources/alma/fix/identifiers.fix | 6 +++--- .../alma/fix/relatedRessourcesAndLinks.fix | 16 ++++++++-------- .../resources/alma-fix/990177418660206441.json | 16 ++++++++++++++++ .../resources/alma-fix/990184766040206441.json | 8 ++++++++ 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/main/resources/alma/fix/identifiers.fix b/src/main/resources/alma/fix/identifiers.fix index 343b6de30..7695a24c2 100644 --- a/src/main/resources/alma/fix/identifiers.fix +++ b/src/main/resources/alma/fix/identifiers.fix @@ -19,7 +19,7 @@ end # 1. Indicator: 4 = HTTP set_array("@urnLinks") -do list(path:"8564?", "var":"$i") +do list(path:"856??", "var":"$i") if all_match("$i.u", "^http.*(urn=|\\.(org|de)/)urn:.+$") # This should ignore repository links like: https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092 copy_field("$i.u", "urn[].$append") copy_field("$i.u", "@urnLinks.$append") @@ -100,12 +100,12 @@ end # Sometimes dois are not set in 024 then we could pick up the missing from 856. # 856 - Electronic Location and Access (R) - Subfield: $u (R) $3 (NR) # 1. Indicator: 4 = HTTP -do list(path:"8564?", "var":"$i") +do list(path:"856??", "var":"$i") if all_match("$i.u", ".*doi.org.*(10\\.(\\d)+/(\\S)+).*") # Volltext copy_field("$i.u", "doi[].$append") - replace_all("doi[].$last", ".*doi.org.*(10\\.(\\d)+/(\\S)+).*", "$1") end end +replace_all("doi[].*", ".*doi.org.*(10\\.(\\d)+/(\\S)+).*", "$1") uniq("doi[]") # 035 - System Control Number (R) - Subfield: $a (NR) diff --git a/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix b/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix index 189a6f519..92d423ca3 100644 --- a/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix +++ b/src/main/resources/alma/fix/relatedRessourcesAndLinks.fix @@ -35,7 +35,7 @@ do list(path: "rpbId", "var": "$i") end # DBIS -do list(path: "8564?", "var":"$i") +do list(path: "856??", "var":"$i") if exists("$i.u") if all_match("$i.x", ".*DBIS.*") copy_field("$i.u", "sameAs[].$append.id") @@ -254,7 +254,7 @@ replace_all("secondaryForm[].*.id", "^\\(DE-600\\)(.*)$", "http://lobid.org/reso set_array("tableOfContents[]") -do list(path: "8564?", "var":"$i") +do list(path: "856??", "var":"$i") if all_match("$i.3", "^[Ii][Nn][Hh][aA][lL][tT][sS][vV].*") # Inhaltsverzeichnis copy_field("$i.3", "tableOfContents[].$append.label") copy_field("$i.u", "tableOfContents[].$last.id") @@ -263,7 +263,7 @@ end set_array("description[]") -do list(path: "8564?", "var":"$i") +do list(path: "856??", "var":"$i") if all_match("$i.3", "^[Ii][Nn][Hh][aA][lL][tT][sS][tT].*") # Inhaltstext copy_field("$i.3", "description[].$append.label") copy_field("$i.u", "description[].$last.id") @@ -272,7 +272,7 @@ end set_array("seeAlso[]") -do list(path: "8564?", "var":"$i") +do list(path: "856??", "var":"$i") if all_match("$i.3", "^[zZ][uU][sS].*") # Zusätzliche Angaben copy_field("$i.3", "seeAlso[].$append.label") copy_field("$i.u", "seeAlso[].$last.id") @@ -281,7 +281,7 @@ end set_array("fulltextOnline[]") -do list(path: "8564?", "var":"$i") +do list(path: "856??", "var":"$i") if exists("$i.u") unless any_match("$i.u",".*(doi.org|urn=urn:|\\.(org|de)/urn:).*") # This should not skip repository links like: https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092 if all_equal("$i.z", "kostenfrei") # kostenfrei, added Digitalisierung not only Verlag or Agentur as filter @@ -439,7 +439,7 @@ end # 856 - Electronic Location and Access (R) - Subfield: $x - Nonpublic note (R) # TODO: Check if src/test/resources/alma-fix/(CKB)5280000000199164.xml is also an EZB titel even when it has no 865. -do list(path:"8564?", "var":"$i") +do list(path:"856??", "var":"$i") if any_equal("$i.x","EZB") # can test x and x.* add_field("inCollection[].$append.id", "http://lobid.org/resources/HT016356466#!") add_field("inCollection[].$last.label", "Elektronische Zeitschriftenbibliothek (EZB)") @@ -452,7 +452,7 @@ end # edoweb -do list(path:"8564?", "var":"$i") +do list(path:"856??", "var":"$i") if any_match("$i.u","^.*edoweb.*") # can test x and x.* add_field("inCollection[].$append.id", "http://lobid.org/resources/HT016925914#!") add_field("inCollection[].$last.label", "Edoweb Rheinland-Pfalz") @@ -463,7 +463,7 @@ end # TODO: AlephMorph checked for ellinet in "078r1.a" but publisso is also stated in the Link URI is that enough? -do list(path:"8564?", "var":"$i") +do list(path:"856??", "var":"$i") if any_match("$i.u","^.*publisso.*") # can test x and x.* add_field("inCollection[].$append.id", "http://repository.publisso.de") add_field("inCollection[].$last.label", "Fachrepositorium Lebenswissenschaften") diff --git a/src/test/resources/alma-fix/990177418660206441.json b/src/test/resources/alma-fix/990177418660206441.json index f237c4e4d..cc15aff5d 100644 --- a/src/test/resources/alma-fix/990177418660206441.json +++ b/src/test/resources/alma-fix/990177418660206441.json @@ -80,6 +80,22 @@ "id" : "http://worldcat.org/oclc/838434577", "label" : "OCLC Ressource" } ], + "fulltextOnline" : [ { + "label" : "Volltext", + "id" : "http://www.gbv.de/dms/belser/aszese/74817-1.pdf" + }, { + "label" : "Volltext", + "id" : "http://www.gbv.de/dms/belser/aszese/74817-2.pdf" + }, { + "label" : "Volltext", + "id" : "http://www.gbv.de/dms/belser/aszese/74817-3.pdf" + }, { + "label" : "Volltext", + "id" : "http://www.gbv.de/dms/belser/aszese/74817-4.pdf" + }, { + "label" : "Volltext", + "id" : "http://www.gbv.de/dms/belser/aszese/74817-5.pdf" + } ], "inCollection" : [ { "id" : "https://nrw.digibib.net/search/hbzvk/", "label" : "DigiBib hbz Verbundkatalog", diff --git a/src/test/resources/alma-fix/990184766040206441.json b/src/test/resources/alma-fix/990184766040206441.json index 7e8d45867..ab4aeb660 100644 --- a/src/test/resources/alma-fix/990184766040206441.json +++ b/src/test/resources/alma-fix/990184766040206441.json @@ -13,6 +13,7 @@ "almaMmsId" : "990184766040206441", "hbzId" : "HT016770284", "deprecatedUri" : "http://lobid.org/resources/HT016770284#!", + "doi" : [ "10.48644/mpirg_sisis_125001" ], "oclcNumber" : [ "179716832" ], "publication" : [ { "startDate" : "1883", @@ -73,12 +74,19 @@ }, { "id" : "http://worldcat.org/oclc/179716832", "label" : "OCLC Ressource" + }, { + "id" : "https://doi.org/10.48644/mpirg_sisis_125001", + "label" : "mpirg_sisis_125001" } ], "primaryForm" : [ { "id" : "http://lobid.org/resources/HT000522767#!", "label" : "Elektronische Reproduktion von HT000522767", "note" : [ "Elektronische Reproduktion von" ] } ], + "fulltextOnline" : [ { + "id" : "https://doi.org/10.48644/mpirg_sisis_125001", + "label" : "DOI-Link" + } ], "inCollection" : [ { "id" : "https://nrw.digibib.net/search/hbzvk/", "label" : "DigiBib hbz Verbundkatalog",