diff --git a/metafacture-csv/build.gradle b/metafacture-csv/build.gradle index b63fe775b..ee029ff14 100644 --- a/metafacture-csv/build.gradle +++ b/metafacture-csv/build.gradle @@ -19,7 +19,7 @@ description = 'Modules for processing comma-separated values' dependencies { api project(':metafacture-framework') - implementation 'com.opencsv:opencsv:3.10' + implementation 'com.opencsv:opencsv:5.9' testImplementation "junit:junit:${versions.junit}" testImplementation "org.mockito:mockito-core:${versions.mockito}" } diff --git a/metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java b/metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java index 06bd6a690..3f51b64b4 100644 --- a/metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java +++ b/metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java @@ -1,5 +1,5 @@ /* - * Copyright 2013, 2014 Deutsche Nationalbibliothek + * Copyright 2013-2024 Deutsche Nationalbibliothek and hbz * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. @@ -16,6 +16,11 @@ package org.metafacture.csv; +import com.opencsv.CSVReader; +import com.opencsv.CSVReaderBuilder; +import com.opencsv.RFC4180Parser; +import com.opencsv.RFC4180ParserBuilder; +import com.opencsv.exceptions.CsvException; import org.metafacture.framework.FluxCommand; import org.metafacture.framework.StreamReceiver; import org.metafacture.framework.annotations.Description; @@ -23,8 +28,6 @@ import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; -import com.opencsv.CSVReader; - import java.io.IOException; import java.io.StringReader; import java.util.List; @@ -48,6 +51,7 @@ public final class CsvDecoder extends DefaultObjectPipe private String[] header = new String[0]; private int count; private boolean hasHeader; + private RFC4180Parser parser; /** * Creates an instance of {@link CsvDecoder} with a given separator. @@ -56,6 +60,7 @@ public final class CsvDecoder extends DefaultObjectPipe */ public CsvDecoder(final String separator) { this.separator = separator.charAt(0); + initializeCsvParser(); } /** @@ -65,6 +70,7 @@ public CsvDecoder(final String separator) { */ public CsvDecoder(final char separator) { this.separator = separator; + initializeCsvParser(); } /** @@ -72,6 +78,13 @@ public CsvDecoder(final char separator) { * {@value #DEFAULT_SEP}. */ public CsvDecoder() { + initializeCsvParser(); + } + + private void initializeCsvParser() { + this.parser = new RFC4180ParserBuilder() + .withSeparator(separator) + .build(); } @Override @@ -105,18 +118,18 @@ else if (parts.length == header.length) { } } - private String[] parseCsv(final String string) { + private String[] parseCsv(final String csv) { String[] parts = new String[0]; try { - final CSVReader reader = new CSVReader(new StringReader(string), - separator); + final CSVReader reader = new CSVReaderBuilder(new StringReader(csv)) + .withCSVParser(parser) + .build(); final List lines = reader.readAll(); if (lines.size() > 0) { parts = lines.get(0); } reader.close(); - } - catch (final IOException e) { + } catch (final IOException | CsvException e) { e.printStackTrace(); } return parts; @@ -139,5 +152,6 @@ public void setHasHeader(final boolean hasHeader) { */ public void setSeparator(final String separator) { this.separator = separator.charAt(0); + initializeCsvParser(); } } diff --git a/metafacture-csv/src/test/java/org/metafacture/csv/CsvDecoderTest.java b/metafacture-csv/src/test/java/org/metafacture/csv/CsvDecoderTest.java index ed095383c..dba5cd1f5 100644 --- a/metafacture-csv/src/test/java/org/metafacture/csv/CsvDecoderTest.java +++ b/metafacture-csv/src/test/java/org/metafacture/csv/CsvDecoderTest.java @@ -89,4 +89,22 @@ public void testTabSeparated() { ordered.verify(receiver).endRecord(); } + /** + In: "a","b\t","c\\t","\","\cd\" + Out: a, b , c\\t, \, \cd\ + */ + @Test + public void issue496_escaping() { + decoder.setHasHeader(false); + decoder.process("\"a\",\"b\t\",\"c\\t\",\"\\\",\"\\cd\\\""); + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord("1"); + ordered.verify(receiver).literal("0", "a"); + ordered.verify(receiver).literal("1", "b\t"); + ordered.verify(receiver).literal("2", "c\\t"); + ordered.verify(receiver).literal("3", "\\"); + ordered.verify(receiver).literal("4", "\\cd\\"); + ordered.verify(receiver).endRecord(); + } + }