From 9dc602275b7e8424f16e0ac3a076b1ce2c69cde8 Mon Sep 17 00:00:00 2001 From: David Mesquita-Morris Date: Mon, 22 Nov 2021 06:32:58 +0000 Subject: [PATCH] Simplify the split on newline regex --- lib/node/index.js | 2 +- lib/node/test/index.d.ts | 1 + lib/node/test/index.js | 24 ++++++++++++++++++++++++ package.json | 2 +- src/index.ts | 3 +-- src/test/index.ts | 5 +++++ 6 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 lib/node/test/index.d.ts create mode 100644 lib/node/test/index.js create mode 100644 src/test/index.ts diff --git a/lib/node/index.js b/lib/node/index.js index d0c808f..1ce3ba7 100644 --- a/lib/node/index.js +++ b/lib/node/index.js @@ -9,7 +9,7 @@ exports.parse = void 0; function parse(text) { // convert the csv formatted test into a table of tokens const tokens = text.replace(/^\uFEFF|\r\n$|\n$|\r$/g, '') // trim byte order mark from beginning and trailing EOL if needed - .split(/\r\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\r(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(row => // split text into rows at EOL + .split(/\r?\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\r(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(row => // split text into rows at EOL row.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/) // split row into tokens based on comma delimiter (unless in quotes); see answer here: https://stackoverflow.com/questions/23582276/split-string-by-comma-but-ignore-commas-inside-quotes/23582323#23582323 .map(token => token.replace(/(^"|"$)/g, '') // dequote tokens if needed .replace(/\"\"/g, '"'))); // replace double double quotes with double quotes diff --git a/lib/node/test/index.d.ts b/lib/node/test/index.d.ts new file mode 100644 index 0000000..cb0ff5c --- /dev/null +++ b/lib/node/test/index.d.ts @@ -0,0 +1 @@ +export {}; diff --git a/lib/node/test/index.js b/lib/node/test/index.js new file mode 100644 index 0000000..8c44e6f --- /dev/null +++ b/lib/node/test/index.js @@ -0,0 +1,24 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +const csv = __importStar(require("..")); +const data = '\uFEFFa,b,c\r"1",2,3\n4,"5",6\r\n7,"The number eight: ""8""",9\r"a\r\na",b,"c, d"\n12'; +console.log(csv.parse(data)); diff --git a/package.json b/package.json index a44d6ff..0f81d76 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@steelbreeze/csv", - "version": "1.0.0-alpha.1", + "version": "1.0.0-alpha.2", "description": "Tools for reading and writnig files formatted as CSV", "main": "lib/node/index.js", "module": "lib/node/index.js", diff --git a/src/index.ts b/src/index.ts index bf65478..07c60ab 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,3 @@ - /** * Parses a string encoded as a comma seperated values * @param text The source csv text. @@ -7,7 +6,7 @@ export function parse(text: string): Array { // convert the csv formatted test into a table of tokens const tokens = text.replace(/^\uFEFF|\r\n$|\n$|\r$/g, '') // trim byte order mark from beginning and trailing EOL if needed - .split(/\r\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\r(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(row => // split text into rows at EOL + .split(/\r?\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\r(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(row => // split text into rows at EOL row.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/) // split row into tokens based on comma delimiter (unless in quotes); see answer here: https://stackoverflow.com/questions/23582276/split-string-by-comma-but-ignore-commas-inside-quotes/23582323#23582323 .map(token => token.replace(/(^"|"$)/g, '') // dequote tokens if needed .replace(/\"\"/g, '"'))); // replace double double quotes with double quotes diff --git a/src/test/index.ts b/src/test/index.ts new file mode 100644 index 0000000..28c9e7c --- /dev/null +++ b/src/test/index.ts @@ -0,0 +1,5 @@ +import * as csv from '..'; + +const data = '\uFEFFa,b,c\r"1",2,3\n4,"5",6\r\n7,"The number eight: ""8""",9\r"a\r\na",b,"c, d"\n12'; + +console.log(csv.parse(data));