diff --git a/README.md b/README.md
index 5f37ae064..21058556b 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
![Metafacture](https://raw.github.com/wiki/metafacture/metafacture-core/img/metafacture.png)
-[![Build](https://github.com/metafacture/metafacture-core/workflows/Build/badge.svg?branch=master)](https://github.com/metafacture/metafacture-core/actions?query=workflow%3ABuild)
+[![Build](https://github.com/metafacture/metafacture-core/actions/workflows/build.yml/badge.svg?branch=master)](https://github.com/metafacture/metafacture-core/actions?query=workflow%3ABuild)
Metafacture is a toolkit for processing semi-structured data with a focus on library metadata. It provides a versatile set of tools for reading, writing and transforming data. Metafacture can be used as a stand-alone application or as a Java library in other applications. The name Metafacture is a portmanteau of the words *meta*data and manu*facture*.
diff --git a/build.gradle b/build.gradle
index 424b6af76..37dcbe95e 100644
--- a/build.gradle
+++ b/build.gradle
@@ -28,7 +28,7 @@ subprojects {
versions = [
'assertj_core': '3.11.1',
'commons_compress': '1.21',
- 'guava': '29.0-jre',
+ 'guava': '32.0.1-jre',
'jackson_databind': '2.15.1',
'jdk': '11',
'junit': '4.12',
diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar
index e6441136f..a4b76b953 100644
Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
index 8a1f6b97f..82dd18b20 100644
--- a/gradle/wrapper/gradle-wrapper.properties
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -1,7 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
-distributionSha256Sum=a4b4158601f8636cdeeab09bd76afb640030bb5b144aafe261a5e8af027dc612
-distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip
+distributionSha256Sum=57dafb5c2622c6cc08b993c85b7c06956a2f53536432a30ead46166dbca0f1e9
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.11-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
diff --git a/gradlew b/gradlew
index 1aa94a426..f5feea6d6 100755
--- a/gradlew
+++ b/gradlew
@@ -15,6 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+# SPDX-License-Identifier: Apache-2.0
+#
##############################################################################
#
@@ -55,7 +57,7 @@
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
-# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
+# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
@@ -84,7 +86,8 @@ done
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
-APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
+APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
+' "$PWD" ) || exit
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
diff --git a/gradlew.bat b/gradlew.bat
index 25da30dbd..9d21a2183 100644
--- a/gradlew.bat
+++ b/gradlew.bat
@@ -13,6 +13,8 @@
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
+@rem SPDX-License-Identifier: Apache-2.0
+@rem
@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Encoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Encoder.java
index 3cd536fe7..fb8ad50b2 100644
--- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Encoder.java
+++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Encoder.java
@@ -81,6 +81,7 @@ public final class Marc21Encoder extends
private State state = State.IN_STREAM;
private boolean generateIdField;
+ private boolean validateLeader = true;
/**
* Initializes the encoder with MARC 21 constants and charset.
@@ -108,6 +109,18 @@ public void setGenerateIdField(final boolean generateIdField) {
this.generateIdField = generateIdField;
}
+ /**
+ * Controls whether the leader should be validated.
+ *
+ * The default value of {@code validateLeader} is true.
+ *
+ *
+ * @param validateLeader if false the leader is not validated
+ */
+ public void setValidateLeader(final boolean validateLeader) {
+ this.validateLeader = validateLeader;
+ }
+
/**
* Gets the flag to decide whether the ID field is generated.
*
@@ -259,12 +272,14 @@ private void processLeaderAsSubfields(final String name, final char code) {
}
private void requireValidCode(final char code, final char[] validCodes) {
- for (final char validCode: validCodes) {
- if (validCode == code) {
- return;
+ if (validateLeader) {
+ for (final char validCode : validCodes) {
+ if (validCode == code) {
+ return;
+ }
}
+ throw new FormatException("invalid code '" + code + "'; allowed codes are: " + Arrays.toString(validCodes));
}
- throw new FormatException("invalid code '" + code + "'; allowed codes are: " + Arrays.toString(validCodes));
}
private void processTopLevelLiteral(final String name, final String value) {
diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java
index 9dba83d51..9fb12b016 100644
--- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java
+++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java
@@ -49,9 +49,6 @@ public final class MarcXmlEncoder extends DefaultStreamPipe";
- private static final String ROOT_CLOSE = "";
-
private enum Tag {
collection(" xmlns%s=\"" + NAMESPACE + "\"%s"),
@@ -106,7 +103,6 @@ public String close(final Object[] args) {
private static final int TAG_END = 3;
private final Encoder encoder = new Encoder();
- private final Marc21Decoder decoder = new Marc21Decoder();
private final Marc21Encoder wrapper = new Marc21Encoder();
private DefaultStreamPipe> pipe;
@@ -115,6 +111,7 @@ public String close(final Object[] args) {
* Creates an instance of {@link MarcXmlEncoder}.
*/
public MarcXmlEncoder() {
+ final Marc21Decoder decoder = new Marc21Decoder();
decoder.setEmitLeaderAsWhole(true);
wrapper
@@ -136,7 +133,6 @@ public void setEmitNamespace(final boolean emitNamespace) {
/**
* Sets the flag to decide whether to omit the XML declaration.
- *
* Default value: {@value #OMIT_XML_DECLARATION}
*
* @param currentOmitXmlDeclaration true if the XML declaration is omitted, otherwise
@@ -148,7 +144,6 @@ public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) {
/**
* Sets the XML version.
- *
* Default value: {@value #XML_VERSION}
*
* @param xmlVersion the XML version
@@ -159,7 +154,6 @@ public void setXmlVersion(final String xmlVersion) {
/**
* Sets the XML encoding.
- *
* Default value: {@value #XML_ENCODING}
*
* @param xmlEncoding the XML encoding
@@ -173,7 +167,6 @@ public void setXmlEncoding(final String xmlEncoding) {
* If true, the input data is validated to ensure correct MARC21. Also the leader may be generated.
* It acts as a wrapper: the input is piped to {@link org.metafacture.biblio.marc21.Marc21Encoder}, whose output is piped to {@link org.metafacture.biblio.marc21.Marc21Decoder}, whose output is piped to {@link org.metafacture.biblio.marc21.MarcXmlEncoder}.
* This validation and treatment of the leader is more safe but comes with a performance impact.
- *
* Default value: {@value #ENSURE_CORRECT_MARC21_XML}
*
* @param ensureCorrectMarc21Xml if true the input data is validated to ensure correct MARC21. Also the leader may be generated.
@@ -184,7 +177,6 @@ public void setEnsureCorrectMarc21Xml(final boolean ensureCorrectMarc21Xml) {
/**
* Formats the resulting xml by indentation. Aka "pretty printing".
- *
* Default value: {@value #PRETTY_PRINTED}
*
* @param formatted true if formatting is activated, otherwise false
@@ -220,7 +212,7 @@ public void literal(final String name, final String value) {
@Override
protected void onResetStream() {
- pipe.resetStream();
+ encoder.onResetStream();
}
@Override
@@ -247,11 +239,12 @@ private static class Encoder extends DefaultStreamPipe> {
private String currentEntity = "";
private boolean emitNamespace = true;
- private Object[] namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY};
+ private Object[] namespacePrefix = new Object[]{NAMESPACE_PREFIX};
private int indentationLevel;
private boolean formatted = PRETTY_PRINTED;
private int recordAttributeOffset;
+ private int recordLeaderOffset;
private Encoder() {
}
@@ -294,7 +287,7 @@ public void startRecord(final String identifier) {
writeTag(Tag.record::open);
recordAttributeOffset = builder.length() - 1;
prettyPrintNewLine();
-
+ recordLeaderOffset = builder.length();
incrementIndentationLevel();
}
@@ -345,6 +338,7 @@ public void literal(final String name, final String value) {
if (name.equals(Marc21EventNames.MARCXML_TYPE_LITERAL)) {
if (value != null) {
builder.insert(recordAttributeOffset, String.format(ATTRIBUTE_TEMPLATE, name, value));
+ recordLeaderOffset = builder.length();
}
}
else if (!appendLeader(name, value)) {
@@ -353,7 +347,7 @@ else if (!appendLeader(name, value)) {
if (value != null) {
writeEscaped(value.trim());
}
- writeTag(Tag.controlfield::close);
+ writeTag(Tag.controlfield::close, false);
prettyPrintNewLine();
}
}
@@ -378,7 +372,9 @@ protected void onResetStream() {
@Override
protected void onCloseStream() {
- writeFooter();
+ if (!atStreamStart) {
+ writeFooter();
+ }
sendAndClearData();
}
@@ -408,9 +404,20 @@ private void writeFooter() {
* @param str the unescaped sequence to be written
*/
private void writeRaw(final String str) {
+
builder.append(str);
}
+ /**
+ * Writes the unescaped sequence to the leader position.
+ *
+ * @param str the unescaped sequence to be written to the leader position
+ */
+ private void writeRawLeader(final String str) {
+ builder.insert(recordLeaderOffset, str);
+ recordLeaderOffset = recordLeaderOffset + str.length();
+ }
+
private boolean appendLeader(final String name, final String value) {
if (name.equals(Marc21EventNames.LEADER_ENTITY)) {
leaderBuilder.append(value);
@@ -432,12 +439,18 @@ private void writeEscaped(final String str) {
private void writeLeader() {
final String leader = leaderBuilder.toString();
- if (!leader.isEmpty()) {
- prettyPrintIndentation();
- writeTag(Tag.leader::open);
- writeRaw("0000" + leader.substring(0, 4) + "2200000" + leader.substring(5, 7) + "4500"); // creates a valid leader without counted elements
- writeTag(Tag.leader::close);
- prettyPrintNewLine();
+ if (leaderBuilder.length() > 0) {
+ if (formatted) {
+ writeRawLeader(getIndentationPrefix());
+ }
+
+ writeTagLeader(Tag.leader::open);
+ writeRawLeader("0000" + leader.substring(0, 4) + "2200000" + leader.substring(5, 7) + "4500"); // creates a valid leader without counted elements
+ writeTagLeader(Tag.leader::close);
+
+ if (formatted) {
+ writeRawLeader(NEW_LINE);
+ }
}
}
@@ -447,10 +460,17 @@ private void writeTag(final Function