From 69b7fa97c63f708f13b70134b6932a99dfe29751 Mon Sep 17 00:00:00 2001 From: Yang Xia <55853655+xiazcy@users.noreply.github.com> Date: Mon, 21 Nov 2022 18:16:27 -0800 Subject: [PATCH] Removed unescaping of double quotes in CSV export, added tests in printer to ensure proper escape of adjacent double quotes & newlines --- .../propertygraph/schema/DataType.java | 3 +- .../io/CsvPropertyGraphPrinterTest.java | 115 ++++++++++++++++++ .../propertygraph/schema/DataTypeTest.java | 10 +- 3 files changed, 124 insertions(+), 4 deletions(-) diff --git a/neptune-export/src/main/java/com/amazonaws/services/neptune/propertygraph/schema/DataType.java b/neptune-export/src/main/java/com/amazonaws/services/neptune/propertygraph/schema/DataType.java index 2ab4ae78..607a00c7 100644 --- a/neptune-export/src/main/java/com/amazonaws/services/neptune/propertygraph/schema/DataType.java +++ b/neptune-export/src/main/java/com/amazonaws/services/neptune/propertygraph/schema/DataType.java @@ -378,8 +378,7 @@ public static String escapeSeparators(Object value, String separator) { } public static String escapeDoubleQuotes(Object value) { - String temp = value.toString().replace("\"\"", "\""); - return temp.replace("\"", "\"\""); + return value.toString().replace("\"", "\"\""); } public String typeDescription() { diff --git a/neptune-export/src/test/java/com/amazonaws/services/neptune/propertygraph/io/CsvPropertyGraphPrinterTest.java b/neptune-export/src/test/java/com/amazonaws/services/neptune/propertygraph/io/CsvPropertyGraphPrinterTest.java index cf699da9..1b9c3280 100644 --- a/neptune-export/src/test/java/com/amazonaws/services/neptune/propertygraph/io/CsvPropertyGraphPrinterTest.java +++ b/neptune-export/src/test/java/com/amazonaws/services/neptune/propertygraph/io/CsvPropertyGraphPrinterTest.java @@ -17,12 +17,19 @@ import com.amazonaws.services.neptune.propertygraph.schema.DataType; import com.amazonaws.services.neptune.propertygraph.schema.LabelSchema; import com.amazonaws.services.neptune.propertygraph.schema.PropertySchema; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVRecord; import org.junit.Test; +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; import java.io.StringWriter; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; @@ -112,4 +119,112 @@ public void shouldUseEmptySeparatorToSeparateMultipleValues() throws Exception { stringWriter.toString()); } + @Test + public void shouldEscapeTwoDoubleQuoteAfterPrintPropertiesToCSVAndRewrite() throws Exception { + testEscapeCharacterAfterPrintPropertiesAndRewrite("{\"hobby\" : \"watching \"Flash\"\"}", + "\"{\"\"hobby\"\" : \"\"watching \"\"Flash\"\"\"\"}\"", + new PrinterOptions(CsvPrinterOptions.builder().build())); + } + + @Test + public void shouldEscapeThreeDoubleQuoteAfterPrintPropertiesToCSVAndRewrite() throws Exception { + testEscapeCharacterAfterPrintPropertiesAndRewrite("{\"hobby\" : \"watching \"The \"Flash\"\"\"}", + "\"{\"\"hobby\"\" : \"\"watching \"\"The \"\"Flash\"\"\"\"\"\"}\"", + new PrinterOptions(CsvPrinterOptions.builder().build())); + } + + @Test + public void shouldPrintCommaInStringWhenPrintPropertiesToCSVAndRewrite() throws Exception { + testEscapeCharacterAfterPrintPropertiesAndRewrite("{\"hobby\", \"watching \"The \"Flash\"\"}", + "\"{\"\"hobby\"\", \"\"watching \"\"The \"\"Flash\"\"\"\"}\"", + new PrinterOptions(CsvPrinterOptions.builder().build())); + } + + @Test + public void shouldNotEscapeNewlineCharAfterPrintPropertiesToCSVAndRewrite() throws Exception { + testEscapeCharacterAfterPrintPropertiesAndRewrite("A\nB", "\"A\nB\"", + new PrinterOptions(CsvPrinterOptions.builder().build())); + } + + @Test + public void shouldNotEscapeNewlineAfterPrintPropertiesToCSVAndRewrite() throws Exception { + testEscapeCharacterAfterPrintPropertiesAndRewrite("A" + System.lineSeparator() + "B", "\"A\nB\"", + new PrinterOptions(CsvPrinterOptions.builder().build())); + } + + @Test + public void shouldEscapeNewlineCharSetTrueAfterPrintPropertiesToCSVAndRewrite() throws Exception { + testEscapeCharacterAfterPrintPropertiesAndRewrite("A\nB", + "\"A\\nB\"", + new PrinterOptions(CsvPrinterOptions.builder().setEscapeNewline(true).build())); + } + + @Test + public void shouldEscapeNewlineSetTrueAfterPrintPropertiesToCSVAndRewrite() throws Exception { + testEscapeCharacterAfterPrintPropertiesAndRewrite("A" + System.lineSeparator() + "B", + "\"A\\nB\"", + new PrinterOptions(CsvPrinterOptions.builder().setEscapeNewline(true).build())); + } + + @Test + public void shouldNotEscapeNewlineCharsAfterPrintPropertiesToCSVAndRewrite() throws Exception { + testEscapeCharacterAfterPrintPropertiesAndRewrite("A\n\nB", "\"A\n\nB\"", + new PrinterOptions(CsvPrinterOptions.builder().build())); + } + + @Test + public void shouldEscapeNewlineCharsSetTrueAfterPrintPropertiesToCSVAndRewrite() throws Exception { + testEscapeCharacterAfterPrintPropertiesAndRewrite("A\n\nB", + "\"A\\n\\nB\"", + new PrinterOptions(CsvPrinterOptions.builder().setEscapeNewline(true).build())); + } + + // A set of tests to ensure that String escaping is done properly when CSVPropertyGraphPrinter prints to + // a buffer, so when the buffer is read in by CSVFormat, the original property string is received + private void testEscapeCharacterAfterPrintPropertiesAndRewrite(String originalValue, String expectedValue, PrinterOptions printerOptions) throws IOException { + StringWriter stringWriter = new StringWriter(); + + PropertySchema propertySchema1 = new PropertySchema("property1", false, DataType.String, false); + + LabelSchema labelSchema = new LabelSchema(new Label("Entity")); + labelSchema.put("property1", propertySchema1); + + HashMap> props = new HashMap>() {{ + put("property1", Collections.singletonList(originalValue)); + }}; + + CsvPropertyGraphPrinter printer = new CsvPropertyGraphPrinter( + new PrintOutputWriter("outputId", stringWriter), + labelSchema, + printerOptions); + + printer.printProperties(props); + + // all double quotes should be escaped when printer prints + assertEquals(expectedValue, stringWriter.toString()); + + // using CSVFormat to read in printed items (same library used by RewriteCSV) + String[] filePropertyHeaders = labelSchema.propertySchemas().stream() + .map(p -> p.property().toString()) + .collect(Collectors.toList()) + .toArray(new String[]{}); + CSVFormat format = CSVFormat.RFC4180.withHeader(filePropertyHeaders); + Reader in = new StringReader(stringWriter.toString()); + Iterable records = format.parse(in); + + for (CSVRecord record : records) { + // what CSVFormat read in from printed CSV should be the original value + if (printerOptions.csv().escapeNewline()){ + // parsed record will contain escaped newline, to compare to original we have to unescape it + assertEquals(originalValue, record.get("property1").replace("\\n", "\n")); + } else { + assertEquals(originalValue, record.get("property1")); + } + + // double quotes should all be properly escaped again when we format for rewrite + assertEquals(expectedValue, DataType.String.format(record.get("property1"), printerOptions.csv().escapeNewline())); + } + } + + } \ No newline at end of file diff --git a/neptune-export/src/test/java/com/amazonaws/services/neptune/propertygraph/schema/DataTypeTest.java b/neptune-export/src/test/java/com/amazonaws/services/neptune/propertygraph/schema/DataTypeTest.java index 416b80ba..d445bfff 100644 --- a/neptune-export/src/test/java/com/amazonaws/services/neptune/propertygraph/schema/DataTypeTest.java +++ b/neptune-export/src/test/java/com/amazonaws/services/neptune/propertygraph/schema/DataTypeTest.java @@ -39,9 +39,15 @@ public void shouldEscapeDoubleQuotes() { } @Test - public void shouldNotDoubleEscapeDoubleQuotesThatHaveAlreadyBeenEscaped() { + public void shouldEscapeTwoDoubleQuotes() { String result = DataType.String.format("One \"\"two\"\" three"); - assertEquals("\"One \"\"two\"\" three\"", result); + assertEquals("\"One \"\"\"\"two\"\"\"\" three\"", result); + } + + @Test + public void shouldEscapeThreeDoubleQuotes() { + String result = DataType.String.format("One \"\"\"two\"\"\" three"); + assertEquals("\"One \"\"\"\"\"\"two\"\"\"\"\"\" three\"", result); } @Test