Skip to content

Commit

Permalink
Merge #2024 from branch '1058-enrichWithRvk' of github.com:hbz/lobid-…
Browse files Browse the repository at this point in the history
…resources
  • Loading branch information
dr0i committed Jun 17, 2024
2 parents f50dfb3 + 8cca4e8 commit 57f318e
Show file tree
Hide file tree
Showing 14 changed files with 60 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ public void run() {
fixVariables.put("classification.tsv", "./maps/classification.tsv");
fixVariables.put("formangabe.tsv", "./maps/formangabe.tsv");
fixVariables.put("almaMmsId2rpbId", "../../../../../../lookup-tables/data/almaMmsId2rpbId.tsv");
fixVariables.put("rvk.tsv", "../../../../../../lookup-tables/data/rvk.tsv");
fixVariables.put("lobidOrganisationsMapping.tsv", "./maps/lobidOrganisationsMapping.tsv");
fixVariables.put("hbzowner2sigel.tsv", "./maps/hbzowner2sigel.tsv");
fixVariables.put("rpb2.ttl", "../../../../../../vocabs/rpb/rpb2.ttl");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* @author Pascal Christoph (dr0i)
* @author Tobias Bülte (TobiasNx)
**/
public final class CulturegraphXmlFilterHbzRvkToCsv {
private static String OUTPUT_FILE="cg-concordance.csv";
public final class CulturegraphXmlFilterHbzRvkToTsv {
private static String OUTPUT_FILE="rvk.tsv";

public static void main(String... args) {
String XML_INPUT_FILE = new File(args[0]).getAbsolutePath();
Expand All @@ -32,12 +32,16 @@ public static void main(String... args) {
final FileOpener opener = new FileOpener();
JsonDecoder jsonDecoder = new JsonDecoder();
jsonDecoder.setRecordPath("records");
CsvEncoder csvEncoder = new CsvEncoder();
csvEncoder.setSeparator("\t");
csvEncoder.setNoQuotes(true);

try {
opener.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler())
.setReceiver(new Metafix("src/main/resources/rvk/cg-to-rvk-csv.fix"))
.setReceiver(new Metafix("src/main/resources/rvk/cg-to-rvk-tsv.fix"))
.setReceiver(new JsonEncoder())
.setReceiver(jsonDecoder)
.setReceiver(new CsvEncoder())
.setReceiver(csvEncoder)
.setReceiver(new ObjectWriter<>(OUTPUT_FILE));
} catch (IOException e) {
e.printStackTrace();
Expand Down
3 changes: 3 additions & 0 deletions src/main/resources/alma/fix/maps.fix
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ put_filemap("$[dnbSachgruppen]","dnbSachgruppen",sep_char:"\t")
put_filemap("$[classification.tsv]","classificationCode2Label", sep_char:"\t",key_column:"0",value_column:"1",expected_columns:"-1")
put_filemap("$[classification.tsv]","classificationCode2Uri", sep_char:"\t",key_column:"0",value_column:"2",expected_columns:"-1")

# RVK via Culturegraph
put_filemap("$[rvk.tsv]","rvk", sep_char:"\t")

# RPB SKOS Maps
put_rdfmap("$[rpb2.ttl]", "rpb2", target: "skos:prefLabel", select_language: "de") # LBZ-Notationen aka rpb2 fka RPB-Sachgruppen und Zusätze
put_rdfmap("$[rpb-spatial.ttl]", "rpbr", target: "skos:prefLabel", select_language: "de") # RPB-Raumsystematik aka rpbr
Expand Down
12 changes: 12 additions & 0 deletions src/main/resources/alma/fix/subjects.fix
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,18 @@ do list(path:"084??", "var":"$i")
end
end

# RVK via Culturegraph

copy_field("almaMmsId","@rvkNotations")
lookup("@rvkNotations","rvk",delete:"true")
split_field("@rvkNotations", ",")
do list(path: "@rvkNotations","var":"$i")
copy_field("$i","subject[].$append.notation")
set_array("subject[].$last.type[]","Concept")
add_field("subject[].$last.source.label","RVK (Regensburger Verbundklassifikation)")
add_field("subject[].$last.source.id","https://d-nb.info/gnd/4449787-8")
end

# 689 RSWK Schlagwortfolgen fka: Schlagwortketten 1 - 10 - no info on repeatability

call_macro("schlagwortfolge", field: "6890?")
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion src/test/java/UnitTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
@Suite.SuiteClasses({
TestGenerateContext.class,
org.lobid.resources.AlmaMarc21XmlToLobidJsonMetafixTest.class,
org.lobid.resources.CulturegraphXmlFilterHbzRvkToCsvTest.class,
org.lobid.resources.CulturegraphXmlFilterHbzRvkToTsvTest.class,
org.lobid.resources.CulturegraphXmlFilterHbzToJsonTest.class})

public final class UnitTests {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public void setup() {
fixVariables.put("picaCreatorId2Isil.tsv", "src/main/resources/alma/maps/picaCreatorId2Isil.tsv");
fixVariables.put("nwbibWikidataLabelTypeCoords.tsv", "src/main/resources/alma/maps/nwbibWikidataLabelTypeCoords.tsv");
fixVariables.put("almaMmsId2rpbId", "src/test/resources/alma/maps/almaMmsId2rpbId.tsv");
fixVariables.put("rvk.tsv", "src/test/resources/cg/rvk.tsv");
fixVariables.put("lobidOrganisationsMapping.tsv", "src/test/resources/alma/maps/lobidOrganisationsMapping.tsv");
fixVariables.put("hbzowner2sigel.tsv", "src/main/resources/alma/maps/hbzowner2sigel.tsv");
fixVariables.put("rpb2.ttl", "src/test/resources/alma/maps/rpb2.ttl");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,29 @@
import org.slf4j.LoggerFactory;

import org.junit.Test;
import org.lobid.resources.run.CulturegraphXmlFilterHbzRvkToCsv;
import org.lobid.resources.run.CulturegraphXmlFilterHbzRvkToTsv;

/**
* Test of filtering resources with hbz holdings from culturegraph MARCXML,
* tranforming into a CSV file.
*
* @author Pascal Christoph(dr0i)
**/
public final class CulturegraphXmlFilterHbzRvkToCsvTest {
public final class CulturegraphXmlFilterHbzRvkToTsvTest {

private static final Logger LOG =
LoggerFactory.getLogger(CulturegraphXmlFilterHbzRvkToCsvTest.class);
LoggerFactory.getLogger(CulturegraphXmlFilterHbzRvkToTsvTest.class);

private static final String PATH_TO_TEST = "src/test/resources/";
public static final String OUTPUT_FILE =
PATH_TO_TEST + "cg/output.csv";
PATH_TO_TEST + "cg/rvk.tsv";

private static final String XML_INPUT_FILE = "cg/aggregate_20240507_example.marcxml";

@SuppressWarnings("static-method")
@Test
public void testExtractLookupTableFromCgAsHbzRvk() {
CulturegraphXmlFilterHbzRvkToCsv.main(PATH_TO_TEST + XML_INPUT_FILE,
CulturegraphXmlFilterHbzRvkToTsv.main(PATH_TO_TEST + XML_INPUT_FILE,
OUTPUT_FILE);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public final class CulturegraphXmlFilterHbzToJsonTest {

private static final String PATH_TO_TEST = "src/test/resources/";
public static final String JSON_OUTPUT_FILE =
PATH_TO_TEST + "cg/output-es-bulk.ndjson";
PATH_TO_TEST + "cg/rvk-es-bulk.ndjson";

private static final String XML_INPUT_FILE = "cg/aggregate_20240507_example.marcxml";
private static PluginConfigurableNode node;
Expand Down
21 changes: 21 additions & 0 deletions src/test/resources/alma-fix/990367731740206441.json
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,27 @@
"label" : "RVK (Regensburger Verbundklassifikation)",
"id" : "https://d-nb.info/gnd/4449787-8"
}
}, {
"notation" : "NQ 2360",
"type" : [ "Concept" ],
"source" : {
"label" : "RVK (Regensburger Verbundklassifikation)",
"id" : "https://d-nb.info/gnd/4449787-8"
}
}, {
"notation" : "NY 4760",
"type" : [ "Concept" ],
"source" : {
"label" : "RVK (Regensburger Verbundklassifikation)",
"id" : "https://d-nb.info/gnd/4449787-8"
}
}, {
"notation" : "BD 7100",
"type" : [ "Concept" ],
"source" : {
"label" : "RVK (Regensburger Verbundklassifikation)",
"id" : "https://d-nb.info/gnd/4449787-8"
}
}, {
"type" : [ "ComplexSubject" ],
"label" : "Pandemie | Gesellschaft | Geschichte",
Expand Down
2 changes: 1 addition & 1 deletion src/test/resources/cg/aggregate_20240507_example.marcxml
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@
<marc:subfield code="8">9\p</marc:subfield>
</marc:datafield>
<marc:datafield ind2=" " ind1=" " tag="035">
<marc:subfield code="a">(DE-605)990063057720206441</marc:subfield>
<marc:subfield code="a">(DE-605)990367731740206441</marc:subfield>
<marc:subfield code="8">6\p</marc:subfield>
</marc:datafield>
<marc:datafield ind2=" " ind1=" " tag="035">
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{"index":{"_index":"cg","_type":"rvk"}}
{"rvk":["MG 11380","MF 3390","MC 7200"],"id":"990067531130206441"}
{"index":{"_index":"cg","_type":"rvk"}}
{"rvk":["NQ 2360","NY 4760","BD 7100"],"id":"990019247190206441, 990063057720206441, 990063668050206441"}
{"rvk":["NQ 2360","NY 4760","BD 7100"],"id":"990019247190206441, 990367731740206441, 990063668050206441"}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"990067531130206441","MG 11380,MF 3390,MC 7200"
"990019247190206441","NQ 2360,NY 4760,BD 7100"
"990063057720206441","NQ 2360,NY 4760,BD 7100"
"990367731740206441","NQ 2360,NY 4760,BD 7100"
"990063668050206441","NQ 2360,NY 4760,BD 7100"
4 changes: 4 additions & 0 deletions src/test/resources/cg/rvk.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
990067531130206441 MG 11380,MF 3390,MC 7200
990019247190206441 NQ 2360,NY 4760,BD 7100
990367731740206441 NQ 2360,NY 4760,BD 7100
990063668050206441 NQ 2360,NY 4760,BD 7100

0 comments on commit 57f318e

Please sign in to comment.