Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emit marc:leader as first element #549

Merged
merged 2 commits into from
Jul 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -49,9 +49,6 @@ public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<Strin
public static final boolean OMIT_XML_DECLARATION = false;
public static final boolean ENSURE_CORRECT_MARC21_XML = false;

private static final String ROOT_OPEN = "<marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\">";
private static final String ROOT_CLOSE = "</marc:collection>";

private enum Tag {

collection(" xmlns%s=\"" + NAMESPACE + "\"%s"),
@@ -106,7 +103,6 @@ public String close(final Object[] args) {
private static final int TAG_END = 3;

private final Encoder encoder = new Encoder();
private final Marc21Decoder decoder = new Marc21Decoder();
private final Marc21Encoder wrapper = new Marc21Encoder();

private DefaultStreamPipe<ObjectReceiver<String>> pipe;
@@ -115,6 +111,7 @@ public String close(final Object[] args) {
* Creates an instance of {@link MarcXmlEncoder}.
*/
public MarcXmlEncoder() {
final Marc21Decoder decoder = new Marc21Decoder();
decoder.setEmitLeaderAsWhole(true);

wrapper
@@ -136,7 +133,6 @@ public void setEmitNamespace(final boolean emitNamespace) {

/**
* Sets the flag to decide whether to omit the XML declaration.
*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The previous formatting looked much clearer to me...

* <strong>Default value: {@value #OMIT_XML_DECLARATION}</strong>
*
* @param currentOmitXmlDeclaration true if the XML declaration is omitted, otherwise
@@ -148,7 +144,6 @@ public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) {

/**
* Sets the XML version.
*
* <strong>Default value: {@value #XML_VERSION}</strong>
*
* @param xmlVersion the XML version
@@ -159,7 +154,6 @@ public void setXmlVersion(final String xmlVersion) {

/**
* Sets the XML encoding.
*
* <strong>Default value: {@value #XML_ENCODING}</strong>
*
* @param xmlEncoding the XML encoding
@@ -173,7 +167,6 @@ public void setXmlEncoding(final String xmlEncoding) {
* If true, the input data is validated to ensure correct MARC21. Also the leader may be generated.
* It acts as a wrapper: the input is piped to {@link org.metafacture.biblio.marc21.Marc21Encoder}, whose output is piped to {@link org.metafacture.biblio.marc21.Marc21Decoder}, whose output is piped to {@link org.metafacture.biblio.marc21.MarcXmlEncoder}.
* This validation and treatment of the leader is more safe but comes with a performance impact.
*
* <strong>Default value: {@value #ENSURE_CORRECT_MARC21_XML}</strong>
*
* @param ensureCorrectMarc21Xml if true the input data is validated to ensure correct MARC21. Also the leader may be generated.
@@ -184,7 +177,6 @@ public void setEnsureCorrectMarc21Xml(final boolean ensureCorrectMarc21Xml) {

/**
* Formats the resulting xml by indentation. Aka "pretty printing".
*
* <strong>Default value: {@value #PRETTY_PRINTED}</strong>
*
* @param formatted true if formatting is activated, otherwise false
@@ -247,11 +239,12 @@ private static class Encoder extends DefaultStreamPipe<ObjectReceiver<String>> {
private String currentEntity = "";

private boolean emitNamespace = true;
private Object[] namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY};
private Object[] namespacePrefix = new Object[]{NAMESPACE_PREFIX};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you change this? Now one has to remember to adjust this line as well should the default for emitNamespace be switched one day.


private int indentationLevel;
private boolean formatted = PRETTY_PRINTED;
private int recordAttributeOffset;
private int recordLeaderOffset;

private Encoder() {
}
@@ -294,7 +287,7 @@ public void startRecord(final String identifier) {
writeTag(Tag.record::open);
recordAttributeOffset = builder.length() - 1;
prettyPrintNewLine();

recordLeaderOffset = builder.length();
incrementIndentationLevel();
}

@@ -353,7 +346,7 @@ else if (!appendLeader(name, value)) {
if (value != null) {
writeEscaped(value.trim());
}
writeTag(Tag.controlfield::close);
writeTag(Tag.controlfield::close, false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this change do? It seems to be ineffective. And also counterintuitive: Why would a close tag have arguments/attributes?

prettyPrintNewLine();
}
}
@@ -408,9 +401,20 @@ private void writeFooter() {
* @param str the unescaped sequence to be written
*/
private void writeRaw(final String str) {

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this empty line?

builder.append(str);
}

/**
* Writes the unescaped sequence to the leader position.
*
* @param str the unescaped sequence to be written to the leader position
*/
private void writeRawLeader(final String str) {
builder.insert(recordLeaderOffset, str);
recordLeaderOffset = recordLeaderOffset + str.length();
}

private boolean appendLeader(final String name, final String value) {
if (name.equals(Marc21EventNames.LEADER_ENTITY)) {
leaderBuilder.append(value);
@@ -432,11 +436,11 @@ private void writeEscaped(final String str) {

private void writeLeader() {
final String leader = leaderBuilder.toString();
if (!leader.isEmpty()) {
if (leaderBuilder.length() > 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change?

prettyPrintIndentation();
writeTag(Tag.leader::open);
writeRaw(leader);
writeTag(Tag.leader::close);
writeTagLeader(Tag.leader::open);
writeRawLeader(leader);
writeTagLeader(Tag.leader::close);
prettyPrintNewLine();
}
}
@@ -447,6 +451,10 @@ private void writeTag(final Function<Object[], String> function, final Object...
writeRaw(function.apply(allArgs));
}

private void writeTagLeader(final Function<Object[], String> function) {
writeRawLeader(function.apply(namespacePrefix));
}

private void prettyPrintIndentation() {
if (formatted) {
final String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT));
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@
import org.junit.Test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;

/**
@@ -249,12 +250,26 @@ private void issue336_createRecordWithTopLevelLeader(final MarcXmlEncoder encode
encoder.endRecord();
encoder.closeStream();
String expected = XML_DECLARATION + XML_ROOT_OPEN
+ "<marc:record><marc:controlfield tag=\"001\">8u3287432</marc:controlfield>" +
"<marc:leader>" + expectedLeader + "</marc:leader></marc:record>" + XML_MARC_COLLECTION_END_TAG;
+ "<marc:record><marc:leader>" + expectedLeader + "</marc:leader>" +
"<marc:controlfield tag=\"001\">8u3287432</marc:controlfield></marc:record>" + XML_MARC_COLLECTION_END_TAG;
String actual = resultCollector.toString();
assertEquals(expected, actual);
}

@Test
public void issue548_failWhenLeaderIsNotFirst() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the purpose of this test? It's already covered by the previous one.

encoder.startRecord("1");
encoder.literal("001", "8u3287432");
encoder.literal(Marc21EventNames.LEADER_ENTITY, "00000naa a2200000uc 4500");
encoder.endRecord();
encoder.closeStream();
String expected = XML_DECLARATION + XML_ROOT_OPEN
+ "<marc:record><marc:controlfield tag=\"001\">8u3287432</marc:controlfield>" +
"<marc:leader>00000naa a2200000uc 4500</marc:leader></marc:record>" + XML_MARC_COLLECTION_END_TAG;
String actual = resultCollector.toString();
assertNotEquals(expected, actual);
}

@Test
public void issue527_shouldEmitLeaderAlwaysAsWholeString() {
createRecordWithLeader("1", "a", "o", "a", " ", "a", "z", "u", " ");