From 3b260e551121541f45b22032b73a10540ccccd5a Mon Sep 17 00:00:00 2001 From: Matt Patterson <8550+fidothe@users.noreply.github.com> Date: Wed, 26 Jul 2023 12:13:34 +0200 Subject: [PATCH 1/7] Fix errors in examples These are simple errors that happened when the functions were changed around. --- .../xpath-functions-40/src/function-catalog.xml | 16 ++++++++-------- .../xpath-functions-40/src/xpath-functions.xml | 3 +-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/specifications/xpath-functions-40/src/function-catalog.xml b/specifications/xpath-functions-40/src/function-catalog.xml index 9f8a4f07e..6889a0880 100644 --- a/specifications/xpath-functions-40/src/function-catalog.xml +++ b/specifications/xpath-functions-40/src/function-catalog.xml @@ -21876,7 +21876,7 @@ return $M(collation-key("a", $C)) parse-csv(`name,city{$crlf}Bob,Berlin{$crlf}Alice,Aachen{$crlf}`) ( - ["name", "city"] + ["name", "city"], ["Bob", "Berlin"], ["Alice", "Aachen"] ) @@ -21884,7 +21884,7 @@ return $M(collation-key("a", $C)) parse-csv(`name,city{$cr}Bob,Berlin{$cr}Alice,Aachen{$cr}`) ( - ["name", "city"] + ["name", "city"], ["Bob", "Berlin"], ["Alice", "Aachen"] ) @@ -21892,7 +21892,7 @@ return $M(collation-key("a", $C)) parse-csv(`name,city{$lf}Bob,Berlin{$lf}Alice,Aachen{$lf}`) ( - ["name", "city"] + ["name", "city"], ["Bob", "Berlin"], ["Alice", "Aachen"] ) @@ -21903,7 +21903,7 @@ return $M(collation-key("a", $C)) parse-csv(`"name","city"${crlf}"Bob","Berlin"${crlf}"Alice","Aachen"${crlf}`) ( - ["name", "city"] + ["name", "city"], ["Bob", "Berlin"], ["Alice", "Aachen"] ) @@ -21911,7 +21911,7 @@ return $M(collation-key("a", $C)) parse-csv(`"name","city"${crlf}"Bob ""The Exemplar"" Mustermann","Berlin"${crlf}`) ( - ["name", "city"] + ["name", "city"], ['Bob "The Exemplar" Mustermann', "Berlin"], ["Alice", "Aachen"] ) @@ -21922,7 +21922,7 @@ return $M(collation-key("a", $C)) parse-csv("name;city§Bob;Berlin§Alice;Aachen", map{"record-separator": "§", "field-separator": ";"}) ( - ["name", "city"] + ["name", "city"], ["Bob", "Berlin"], ["Alice", "Aachen"] ) @@ -21933,7 +21933,7 @@ return $M(collation-key("a", $C)) parse-csv(`|name|,|city|${crlf}|Bob|,|Berlin|${crlf}`, map{"quote-character": "|"}) ( - ["name", "city"] + ["name", "city"], ["Bob", "Berlin"], ["Alice", "Aachen"] ) @@ -21944,7 +21944,7 @@ return $M(collation-key("a", $C)) parse-csv(`name ,city ${crlf}Bob ,Berlin${crlf}Alice ,Aachen${crlf}`, map{"trim-whitespace: true()}) ( - ["name", "city"] + ["name", "city"], ["Bob", "Berlin"], ["Alice", "Aachen"] ) diff --git a/specifications/xpath-functions-40/src/xpath-functions.xml b/specifications/xpath-functions-40/src/xpath-functions.xml index 705d4668a..f176290fd 100644 --- a/specifications/xpath-functions-40/src/xpath-functions.xml +++ b/specifications/xpath-functions-40/src/xpath-functions.xml @@ -7194,8 +7194,7 @@ Bob,2023-07-14,2.34

Using XQuery:

{ for $column in $csv?columns?fields From cb10e8047e097929faae4dbf92b6dc4515472f7b Mon Sep 17 00:00:00 2001 From: Matt Patterson <8550+fidothe@users.noreply.github.com> Date: Mon, 31 Jul 2023 11:48:37 +0200 Subject: [PATCH 2/7] Fix errors in parse-csv test cases There were some things like bad syntax and using ${var} instead of {$var} in string value templates that I didn't spot until running them against an implementation. --- .../src/function-catalog.xml | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/specifications/xpath-functions-40/src/function-catalog.xml b/specifications/xpath-functions-40/src/function-catalog.xml index 6889a0880..1da98c8a9 100644 --- a/specifications/xpath-functions-40/src/function-catalog.xml +++ b/specifications/xpath-functions-40/src/function-catalog.xml @@ -21868,9 +21868,9 @@ return $M(collation-key("a", $C))

For more discussion of the returned data, see .

- - - + + +

Handling any of the default record separators:

@@ -21901,7 +21901,7 @@ return $M(collation-key("a", $C))

Quote handling:

- parse-csv(`"name","city"${crlf}"Bob","Berlin"${crlf}"Alice","Aachen"${crlf}`) + parse-csv(`"name","city"{$crlf}"Bob","Berlin"{$crlf}"Alice","Aachen"{$crlf}`) ( ["name", "city"], ["Bob", "Berlin"], @@ -21909,18 +21909,17 @@ return $M(collation-key("a", $C)) ) - parse-csv(`"name","city"${crlf}"Bob ""The Exemplar"" Mustermann","Berlin"${crlf}`) + parse-csv(`"name","city"{$crlf}"Bob ""The Exemplar"" Mustermann","Berlin"{$crlf}`) ( ["name", "city"], - ['Bob "The Exemplar" Mustermann', "Berlin"], - ["Alice", "Aachen"] + ['Bob "The Exemplar" Mustermann', "Berlin"] )

Non-default record- and field-separators:

- parse-csv("name;city§Bob;Berlin§Alice;Aachen", map{"record-separator": "§", "field-separator": ";"}) + parse-csv("name;city§Bob;Berlin§Alice;Aachen", map{"row-delimiter": "§", "field-delimiter": ";"}) ( ["name", "city"], ["Bob", "Berlin"], @@ -21931,18 +21930,17 @@ return $M(collation-key("a", $C))

Non-default quote character:

- parse-csv(`|name|,|city|${crlf}|Bob|,|Berlin|${crlf}`, map{"quote-character": "|"}) + parse-csv(`|name|,|city|{$crlf}|Bob|,|Berlin|{$crlf}`, map{"quote-character": "|"}) ( ["name", "city"], - ["Bob", "Berlin"], - ["Alice", "Aachen"] + ["Bob", "Berlin"] )

Trimming whitespace in fields:

- parse-csv(`name ,city ${crlf}Bob ,Berlin${crlf}Alice ,Aachen${crlf}`, map{"trim-whitespace: true()}) + parse-csv(`name ,city {$crlf}Bob ,Berlin{$crlf}Alice ,Aachen{$crlf}`, map{"trim-whitespace": true()}) ( ["name", "city"], ["Bob", "Berlin"], @@ -22177,7 +22175,7 @@ return $M(collation-key("a", $C))

For more discussion of the returned data, see .

- + `name,city{$crlf}Bob,Berlin{$crlf}Alice,Aachen{$crlf}` map { "record-separator": "§", "field-separator": ";", "quote-character": "|" } `|name|;|city|§|Bob|;|Berlin|§|Alice|;|Aachen|` @@ -22475,7 +22473,7 @@ return $M(collation-key("a", $C)) quote-character are equal.

- + `name,city{$crlf}Bob,Berlin{$crlf}Alice,Aachen{$crlf}`

An empty CSV with default column extraction (false):

From c9ec9237709550b5c9f0cac731b80f8a2c60ab63 Mon Sep 17 00:00:00 2001 From: Matt Patterson <8550+fidothe@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:59:00 +0200 Subject: [PATCH 3/7] Fix more typos and spec errors Particularly: * bring csv-to-xdm and csv-to-xml options into line with each other. * ensure things are called -delimiter instead of sometimes -separator. * Fix examples which had `column-names` as a map(xs:string, xs:integer) rather than map(xs:integer, xs:string)... * Add missing error codes Added several examples to csv-to-xdm. (Most of these really need pulling into qt4tests.) --- .../src/function-catalog.xml | 334 ++++++++++++++---- .../src/xpath-functions.xml | 16 + 2 files changed, 280 insertions(+), 70 deletions(-) diff --git a/specifications/xpath-functions-40/src/function-catalog.xml b/specifications/xpath-functions-40/src/function-catalog.xml index 1da98c8a9..735c0673c 100644 --- a/specifications/xpath-functions-40/src/function-catalog.xml +++ b/specifications/xpath-functions-40/src/function-catalog.xml @@ -16723,7 +16723,7 @@ else $c[1] + sum(subsequence($c, 2)) - item-separator + item-delimiter xs:string? @@ -21817,11 +21817,10 @@ return $M(collation-key("a", $C)) xs:string "," - - The characters used to delimit records within the CSV string, if the - default use of line separator as record separator is to be overridden. - xs:string - () + + The sequence of strings used to delimit rows within the CSV string. Defaults to CRLF/LF/CR. + xs:string+ + ("&#13;&#10;", "&#10;", "&#13;") The character used to quote fields within the CSV string. An instance of @@ -21856,10 +21855,10 @@ return $M(collation-key("a", $C)) $csv does not conform to the grammar for quoted fields.

A dynamic error occurs if one or more of the values - for field-separator, record-separator, - quote-character are specified and are not a single character.

+ for field-delimiter or quote-character are specified and are + not a single character.

A dynamic error occurs if any of the values for - field-separator, record-separator, + field-delimiter, row-delimiter, quote-character are equal.

@@ -21917,7 +21916,7 @@ return $M(collation-key("a", $C))
-

Non-default record- and field-separators:

+

Non-default record- and field-delimiters:

parse-csv("name;city§Bob;Berlin§Alice;Aachen", map{"row-delimiter": "§", "field-delimiter": ";"}) ( @@ -22007,11 +22006,10 @@ return $M(collation-key("a", $C)) xs:string "," - - The characters used to delimit records within the CSV string, if the - default use of line separator as record separator is to be overridden. - xs:string - () + + The sequence of strings used to delimit rows within the CSV string. Defaults to CRLF/LF/CR. + xs:string+ + ("&#13;&#10;", "&#10;", "&#13;") The character used to quote fields within the CSV string. An instance of @@ -22037,8 +22035,10 @@ return $M(collation-key("a", $C)) Determines whether the first row of the CSV should be treated as a list of column names and returned as a csv-columns-record in the - columns entry of the returned map. - union(xs:boolean, map(xs:string, xs:integer)) + columns entry of the returned map. Permitted values are a map of type + map(xs:integer, xs:string) or an xs:boolean. + + item() false A csv-columns-record is constructed using the @@ -22067,8 +22067,21 @@ return $M(collation-key("a", $C)) + + A sequence indicating which fields to return and in which order. If this + option is missing or the empty sequence, all fields are returned in their natural + order. Items in the sequence are treated as the index of the column to return. In + the returned data, only fields from the specified columnms are returned, and in + the order specified. This option is mutually exclusive with the + number-of-columns option. Specifying both options will cause an error. + xs:integer* + () + - Specifies how many columns to return. + Specifies how many columns to return. This option is mutually exclusive with the + filter-columns option. Specifying both options will cause an error. union(enum("all", "first-row"), xs:integer) "all" @@ -22102,7 +22115,7 @@ return $M(collation-key("a", $C))

The entry with key "columns" holds a csv-columns-record record. If column names have been extracted, or supplied, then the record will have a names entry whose value is a map of column-name to - column-number, map(xs:integer, xs:string). The record’s + column-number, map(xs:string, xs:integer). The record’s fields entry will contains the column names as a sequence of strings, xs:string*, replicating the row they were taken from.

@@ -22163,11 +22176,18 @@ return $M(collation-key("a", $C)) $csv does not conform to the grammar for quoted fields.

A dynamic error occurs if one or more of the values - for field-separator, record-separator, - quote-character are specified and are not a single character.

+ for field-delimiter or quote-character are specified and are + not a single character.

A dynamic error occurs if any of the values for - field-separator, record-separator, + field-delimiter, row-delimiter, quote-character are equal.

+

A dynamic error occurs if any column-index integers, + such as the values in a map supplied to column-names, or as the value of + number-of-columns or filter-columns, are negative or + zero.

+

A dynamic error occurs if both the + number-of-columns and filter-columns options are set in a + call to fn:csv-to-xdm.

All fields are returned as xs:string values.

@@ -22177,20 +22197,20 @@ return $M(collation-key("a", $C)) `name,city{$crlf}Bob,Berlin{$crlf}Alice,Aachen{$crlf}` - map { "record-separator": "§", "field-separator": ";", "quote-character": "|" } + map { "row-delimiter": "§", "field-delimiter": ";", "quote-character": "|" } `|name|;|city|§|Bob|;|Berlin|§|Alice|;|Aachen|` map { "trim-whitespace": true() }

With defaults for delimiters and quotes, and default column extraction (false):

map:keys(csv-to-xdm($csv-string)) - ("header", "rows") + ("columns", "rows") csv-to-xdm($csv-string)?columns map { "names": map {}, - "fields": (), + "fields": () } @@ -22199,7 +22219,7 @@ return $M(collation-key("a", $C)) csv-to-xdm($csv-string)?rows[1]?field("name") - + csv-to-xdm($csv-string)?rows[1]?field(2) @@ -22207,68 +22227,112 @@ return $M(collation-key("a", $C))

With defaults for delimiters and quotes, and columns: true() set:

- csv-to-xdm($csv-string, map {"columns": true()})?columns + csv-to-xdm($csv-string, map {"column-names": true()})?columns map { "names": map { "name": 1, "city": 2 }, - "fields": ("name", "city"), + "fields": ("name", "city") } - count(csv-to-xdm($csv-string, map {"columns": true()})?rows) + count(csv-to-xdm($csv-string, map {"column-names": true()})?rows) 2 - csv-to-xdm($csv-string), map {"columns": true()}?rows[1]?fields + csv-to-xdm($csv-string, map {"column-names": true()})?rows[1]?fields ("Bob", "Berlin") - csv-to-xdm($csv-string, map {"columns": true()})?rows[1]?field("name") + csv-to-xdm($csv-string, map {"column-names": true()})?rows[1]?field("name") "Bob" - csv-to-xdm($csv-string, map {"columns": true()})?rows[1]?field(2) + csv-to-xdm($csv-string, map {"column-names": true()})?rows[1]?field(2) "Berlin"

Non-default record- and field-delimiters, non-default quotes:

- map:keys(csv-to-xdm($non-std-csv, $options)) - ("header", "rows") + csv-to-xdm($non-std-csv, $options)?rows[3]?field(1) + "Alice" - - csv-to-xdm($non-std-csv, $options)?columns +
+ `Alice,Aachen{$crlf}Bob,Berlin{$crlf}` + map { "column-names": map { 1: "Person", 2: "Location" } } + +

Specifying column names explicitly:

+ + map:keys(csv-to-xdm($csv-string, $options)) + ("columns", "rows") + + + csv-to-xdm($csv-string, $options)?columns map { - "names": map {}, - "fields": (), + "names": map { 1: "Person", 2: "Location" }, + "fields": ("Person", "Location") } - - count(csv-to-xdm($non-std-csv, $options)?rows) - 3 + + count(csv-to-xdm($csv-string, $options)?rows) + 2 - - csv-to-xdm($non-std-csv, $options)?rows[3]?field(1) + + csv-to-xdm($csv-string, $options)?rows[1]?field(1) "Alice" + + csv-to-xdm($csv-string, $options)?rows[2]?field("Location") + "Berlin" +

Trimming whitespace in fields:

- csv-to-xdm(`name ,city ${crlf}Bob ,Berlin${crlf}Alice ,Aachen${crlf}`, $trim-opts)?rows?fields + csv-to-xdm(`name ,city {$crlf}Bob ,Berlin{$crlf}Alice ,Aachen{$crlf}`, $trim-opts)?rows?fields ("name", "city", "Bob", "Berlin", "Alice", "Aachen")
`date,name,city,amount,currency,original amount,note{$crlf}2023-07-19,Bob,Berlin,10.00,USD,13.99{$crlf}2023-07-20,Alice,Aachen,15.00{$crlf}2023-07-20,Charlie,Celle,15.00,GBP,11.99,cake,not a lie{$crlf}` -

Filtering columns

+

Filtering columns, with column-names: true()

- csv-to-xdm($csv-uneven-cols, map { "columns": true(), "filter-columns": (2,1,4) })?columns?fields + csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "filter-columns": (2,1,4) })?columns?fields ("name","date","amount") - for $r in csv-to-xdm($csv-uneven-cols, map { "columns": true(), "filter-columns": (2,1,4) })?rows return array { $r?fields } + for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "filter-columns": (2,1,4) })?rows return array { $r?fields } + ( + ["Bob","2023-07-19","10.00"], + ["Alice","2023-07-20","15.00"], + ["Charlie","2023-07-20","15.00"] +) + +
+ +

Filtering columns, with column-names: false()

+ + for $r in csv-to-xdm($csv-uneven-cols, map { "filter-columns": (2,1,4) })?rows return array { $r?fields } + ( + ["name","date","amount"], + ["Bob","2023-07-19","10.00"], + ["Alice","2023-07-20","15.00"], + ["Charlie","2023-07-20","15.00"] +) + +
+ +

Filtering columns, with column-names: map { ... }

+ + csv-to-xdm($csv-uneven-cols, map { "column-names": map { 1: "Person", 3: "Amount" }, "filter-columns": (2,1,4) })?columns + map { + "names": map { "Person": 1, "Amount": 3 }, + "fields": ("Person", "", "Amount") +} + + + for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": map { 1: "Person", 3: "Amount" }, "filter-columns": (2,1,4) })?rows return array { $r?fields } ( + ["name","date","amount"], ["Bob","2023-07-19","10.00"], ["Alice","2023-07-20","15.00"], ["Charlie","2023-07-20","15.00"] @@ -22278,26 +22342,64 @@ return $M(collation-key("a", $C))

Specifying the number of columns, using "all" (the default)

- csv-to-xdm($csv-uneven-cols, map { "columns": true(), "number-of-columns": "all" })?columns?fields + csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "all" })?columns?fields ("date","name","city","amount","currency","original amount","note") - for $r in csv-to-xdm($csv-uneven-cols, map { "columns": true(), "number-of-columns": "all" })?rows return array { $r?fields } + for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "all" })?rows return array { $r?fields } ( ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], ["2023-07-20","Alice","Aachen","15.00"], ["2023-07-20","Charlie","Celle","15.00","GBP","11.99","cake","not a lie"] +) + + + for $r in csv-to-xdm($csv-uneven-cols, map { "number-of-columns": "all" })?rows return array { $r?fields } + ( + ["date","name","city","amount","currency","original amount","note"], + ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], + ["2023-07-20","Alice","Aachen","15.00"], + ["2023-07-20","Charlie","Celle","15.00","GBP","11.99","cake","not a lie"] )
-

Specifying the number of columns using "first-row"

+

Specifying the number of columns, using "all" and column-names: map { ... }

- csv-to-xdm($csv-uneven-cols, map { "columns": true(), "number-of-columns": "first-row" })?columns?fields + csv-to-xdm($csv-uneven-cols, map { "column-names": map { 1: "Date", 6: "Amount", 5: "Currency" }, "number-of-columns": "all" })?columns + map { + "names": map { "Date": 1, "Amount": 6, "Currency": 5 }, + "fields": ("Date", "", "", "", "Currency", "Amount") +} + + + for $r in csv-to-xdm($csv-uneven-cols, map { + "column-names": map { 1: "Date", 6: "Amount", 5: "Currency" }, + "number-of-columns": "all" +})?rows return $r?field("Amount") + ("original amount", "13.99", "", "11.99") + +
+ +

Specifying the number of columns using "first-row" and column-names: false()

+ + for $r in csv-to-xdm($csv-uneven-cols, map { "number-of-columns": "first-row" })?rows return array { $r?fields } + ( + ["date","name","city","amount","currency","original amount","note"], + ["2023-07-19","Bob","Berlin","10.00","USD","13.99",""], + ["2023-07-20","Alice","Aachen","15.00","","",""], + ["2023-07-20","Charlie","Celle","15.00","GBP","11.99","cake"] +) + +
+ +

Specifying the number of columns using "first-row" and column-names: true()

+ + csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "first-row" })?columns?fields ("date","name","city","amount","currency","original amount","note") - for $r in csv-to-xdm($csv-uneven-cols, map { "columns": true(), "number-of-columns": "first-row" })?rows return array { $r?fields } + for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "first-row" })?rows return array { $r?fields } ( ["2023-07-19","Bob","Berlin","10.00","USD","13.99",""], ["2023-07-20","Alice","Aachen","15.00","","",""], @@ -22306,20 +22408,110 @@ return $M(collation-key("a", $C))
-

Specifying the number of columns with a number

+

Specifying the number of columns, using "first-row" and column-names: map { ... }

+ + csv-to-xdm($csv-uneven-cols, map { "column-names": map { 1: "Date", 4: "Amount" }, "number-of-columns": "first-row" })?columns + map { + "names": map { "Date": 1, "Amount": 4 }, + "fields": ("Date", "", "", "Amount") +} + + + for $r in csv-to-xdm($csv-uneven-cols, map { + "column-names": map { 1: "Date", 4: "Amount" }, + "number-of-columns": "first-row" +})?rows return array { $r?fields } + ( + ["date","name","city","amount"], + ["2023-07-19","Bob","Berlin","10.00"], + ["2023-07-20","Alice","Aachen","15.00"], + ["2023-07-20","Charlie","Celle","15.00"] +) + +
+ +

Specifying the number of columns with a number and column-names: true()

- csv-to-xdm($csv-uneven-cols, map { "columns": true(), "number-of-columns": 6 })?columns?fields + csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 })?columns?fields ("date","name","city","amount","currency","original amount") - for $r in csv-to-xdm($csv-uneven-cols, map { "columns": true(), "number-of-columns": 6 })?rows return array { $r?fields } + for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 })?rows return array { $r?fields } + ( + ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], + ["2023-07-20","Alice","Aachen","15.00","",""], + ["2023-07-20","Charlie","Celle","15.00","GBP","11.99"] +) + +
+ +

Specifying the number of columns with a number and column-names: false()

+ + for $r in csv-to-xdm($csv-uneven-cols, map { "number-of-columns": 6 })?rows return array { $r?fields } + ( + ["date","name","city","amount","currency","original amount"], + ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], + ["2023-07-20","Alice","Aachen","15.00","",""], + ["2023-07-20","Charlie","Celle","15.00","GBP","11.99"] +) + +
+ +

Specifying the number of columns with a number and column-names: map { ... }

+ + csv-to-xdm($csv-uneven-cols, map { "column-names": map { 1: "Date", 4: "Amount" }, "number-of-columns": "first-row" })?columns + map { + "names": map { "Date": 1, "Amount": 4 }, + "fields": ("Date", "", "", "Amount") +} + + + for $r in csv-to-xdm($csv-uneven-cols, map { + "column-names": map { 1: "Date", 4: "Amount" }, + "number-of-columns": 6 +})?rows return array { $r?fields } ( + ["date","name","city","amount","currency","original amount"], ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], ["2023-07-20","Alice","Aachen","15.00","",""], ["2023-07-20","Charlie","Celle","15.00","GBP","11.99"] )
+ +

Specifying both number-of-columns and filter-columns is an error condition

+ + csv-to-xdm($csv-string, map { "filter-columns": (1,3), "number-of-columns": "first-row" }) + + +
+ +

Specifying negative integers, or zero, in options taking xs:integer values is an error condition

+ + csv-to-xdm($csv-string, map { "filter-columns": -1 }) + + + + csv-to-xdm($csv-string, map { "filter-columns": 0 }) + + + + csv-to-xdm($csv-string, map { "number-of-columns": -1 }) + + + + csv-to-xdm($csv-string, map { "number-of-columns": 0 }) + + + + csv-to-xdm($csv-string, map { "column-names": map { 0: "Name" } }) + + + + csv-to-xdm($csv-string, map { "column-names": map { -1: "Name" } }) + + +
@@ -22379,8 +22571,8 @@ return $M(collation-key("a", $C)) xs:string ","
- - The characters used to delimit records within the CSV string, if the + + The characters used to delimit rows within the CSV string, if the default use of line separator as record separator is to be overridden. xs:string () @@ -22408,9 +22600,11 @@ return $M(collation-key("a", $C)) Determines whether the first row of the CSV should be treated as a list - of column headers and returned as a csv-columns-record in the - header entry of the returned map. - union(xs:boolean, map(xs:integer, xs:string)) + of column headers and returned as ]]> elements in + the ]]> element. Permitted values are a map of type + map(xs:integer, xs:string) or an xs:boolean. + + item() false The ]]> element is populated @@ -22466,10 +22660,10 @@ return $M(collation-key("a", $C)) $csv does not conform to the grammar for quoted fields.

A dynamic error occurs if one or more of the values - for field-separator, record-separator, + for field-delimiter, row-delimiter, quote-character are specified and are not a single character.

A dynamic error occurs if any of the values for - field-separator, record-separator, + field-delimiter, row-delimiter, quote-character are equal.

@@ -22489,7 +22683,7 @@ return $M(collation-key("a", $C))

An empty CSV with column extraction:

- csv-to-xml("", map { "columns": true() }) + csv-to-xml("", map { "column-names": true() }) @@ -22501,7 +22695,7 @@ return $M(collation-key("a", $C))

An empty CSV with explicit column names:

- csv-to-xml("", map { "columns": map { "name": 1, "city": 3 }) + csv-to-xml("", map { "column-names": map { 1: "name", 3: "city"}) @@ -22517,7 +22711,7 @@ return $M(collation-key("a", $C))

With defaults for delimiters and quotes, and column extraction:

- csv-to-xml($csv-string, map { "columns": true() }) + csv-to-xml($csv-string, map { "column-names": true() }) @@ -22541,7 +22735,7 @@ return $M(collation-key("a", $C))

With defaults for delimiters and quotes, and column extraction:

- csv-to-xml($csv-string, map { "columns": true() }) + csv-to-xml($csv-string, map { "column-names": true() }) @@ -22566,7 +22760,7 @@ return $M(collation-key("a", $C))

Filtering columns

- csv-to-xml($csv-string, map { "columns": true(), "filter-columns": (2,1,4) }) + csv-to-xml($csv-string, map { "column-names": true(), "filter-columns": (2,1,4) }) @@ -22598,7 +22792,7 @@ return $M(collation-key("a", $C))

Specifying the number of columns, using "all" (the default)

- csv-to-xml($csv-uneven-cols, map { "columns": true(), "number-of-columns": "all" }) + csv-to-xml($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "all" }) @@ -22643,7 +22837,7 @@ return $M(collation-key("a", $C))

Specifying the number of columns using "first-row"

- csv-to-xml($csv-uneven-cols, map { "columns": true(), "number-of-columns": "first-row" }) + csv-to-xml($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "first-row" }) @@ -22691,7 +22885,7 @@ return $M(collation-key("a", $C))

Specifying the number of columns with a number

- csv-to-xml($csv-uneven-cols, map { "columns": true(), "number-of-columns": 6 }) + csv-to-xml($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 }) diff --git a/specifications/xpath-functions-40/src/xpath-functions.xml b/specifications/xpath-functions-40/src/xpath-functions.xml index f176290fd..0f2a7643d 100644 --- a/specifications/xpath-functions-40/src/xpath-functions.xml +++ b/specifications/xpath-functions-40/src/xpath-functions.xml @@ -10995,6 +10995,22 @@ ISBN 0 521 77752 6. $key argument is an xs:string and is not one of the known column names.

+ +

Raised by fn:csv-to-xdm, fn:csv-to-xml, + fn:csv-fetch-field-by-column, and the function from the + field entry of csv-columns-record, if an argument + referring to a column index is zero or negative. (The options + number-of-columns, filter-columns, or in a map passed + to column-names, or the argument to the field function, + or fn:csv-fetch-field-by-column.)

+
+ +

Raised by fn:csv-to-xdm and fn:csv-to-xml, if both the + number-of-columns and filter-columns options are set: + they are mutually exclusive.

+

Raised by fn:id, fn:idref, and fn:element-with-id if the node that identifies the tree to be searched is a node in a tree whose root is not From 2126a1432fe596a3123ad65685de4c2ac850e5c6 Mon Sep 17 00:00:00 2001 From: Matt Patterson <8550+fidothe@users.noreply.github.com> Date: Thu, 21 Sep 2023 21:53:21 +0200 Subject: [PATCH 4/7] Fixed more errors in csv-to-xdm Some old key names were still there, and there were some formatting issues. Moved a number of examples to the qt4test test suite --- .../src/function-catalog.xml | 226 +++--------------- 1 file changed, 37 insertions(+), 189 deletions(-) diff --git a/specifications/xpath-functions-40/src/function-catalog.xml b/specifications/xpath-functions-40/src/function-catalog.xml index 735c0673c..64274ad81 100644 --- a/specifications/xpath-functions-40/src/function-catalog.xml +++ b/specifications/xpath-functions-40/src/function-catalog.xml @@ -21994,9 +21994,10 @@ return $M(collation-key("a", $C)) fn:parse-csv, and the options controlling their use are defined there.

-

If the headers option is true, implementations must - exclude the first record from the returned map’s body key, and return it as - the value of the returned map’s headers-record key.

+

If the column-names option is true, implementations must + exclude the first record from the returned map’s rows key, and use it to + construct a csv-columns-record that is returned as the value of the + returned map’s columns key.

The entries that may appear in the $options map are as follows:

@@ -22102,12 +22103,6 @@ return $M(collation-key("a", $C))
-

If column names were extracted from the first row of the CSV, when there are duplicate - column names, implementations must include only the first occurrence - in the names entry of the csv-columns-record, ignoring - subsequent entries. Any fields in the first record whose value is the empty string - must also be omitted.

-

The result of the function is a parsed-csv-structure-record, a map with string keys containing two entries, columns, and rows.

@@ -22170,6 +22165,25 @@ return $M(collation-key("a", $C)) fn:csv-fetch-field-by-column for more details

+ +

If column names were extracted from the first row of the CSV, when there are duplicate + column names, implementations must include only the first occurrence + in the names entry of the csv-columns-record, ignoring + subsequent entries. Any fields in the first record whose value is the empty string + must also be omitted.

+ +

If the number-of-columns options is set to "first-row" or an + integer, or the filter-columns option is set, and the + column-names option is set to true(), the filtering of + columns is performed before the extraction of the first row and creation of the + csv-columns-record.

+ +

If the number-of-columns options is set to "first-row" or an + integer, or the filter-columns option is set, and the + column-names option is set to a map(xs:string, xs:integer), + then filtering of columns does not affect the creation of the + csv-columns-record, and it is possible that the number of fields in the + rows is smaller than the number of fields in the csv-columns-record.

A dynamic error occurs if the value of @@ -22199,7 +22213,6 @@ return $M(collation-key("a", $C)) `name,city{$crlf}Bob,Berlin{$crlf}Alice,Aachen{$crlf}` map { "row-delimiter": "§", "field-delimiter": ";", "quote-character": "|" } `|name|;|city|§|Bob|;|Berlin|§|Alice|;|Aachen|` - map { "trim-whitespace": true() }

With defaults for delimiters and quotes, and default column extraction (false):

@@ -22268,9 +22281,9 @@ return $M(collation-key("a", $C)) csv-to-xdm($csv-string, $options)?columns map { - "names": map { 1: "Person", 2: "Location" }, - "fields": ("Person", "Location") - } + "names": map { "Person": 1, "Location": 2 }, + "fields": ("Person", "Location") +}
count(csv-to-xdm($csv-string, $options)?rows) @@ -22285,14 +22298,10 @@ return $M(collation-key("a", $C)) "Berlin"
- -

Trimming whitespace in fields:

- - csv-to-xdm(`name ,city {$crlf}Bob ,Berlin{$crlf}Alice ,Aachen{$crlf}`, $trim-opts)?rows?fields - ("name", "city", "Bob", "Berlin", "Alice", "Aachen") - -
- `date,name,city,amount,currency,original amount,note{$crlf}2023-07-19,Bob,Berlin,10.00,USD,13.99{$crlf}2023-07-20,Alice,Aachen,15.00{$crlf}2023-07-20,Charlie,Celle,15.00,GBP,11.99,cake,not a lie{$crlf}` + concat(`date,name,city,amount,currency,original amount,note{$crlf}`, +`2023-07-19,Bob,Berlin,10.00,USD,13.99{$crlf}`, +`2023-07-20,Alice,Aachen,15.00{$crlf}`, +`2023-07-20,Charlie,Celle,15.00,GBP,11.99,cake,not a lie{$crlf}`)

Filtering columns, with column-names: true()

@@ -22305,18 +22314,6 @@ return $M(collation-key("a", $C)) ["Bob","2023-07-19","10.00"], ["Alice","2023-07-20","15.00"], ["Charlie","2023-07-20","15.00"] -)
-
-
- -

Filtering columns, with column-names: false()

- - for $r in csv-to-xdm($csv-uneven-cols, map { "filter-columns": (2,1,4) })?rows return array { $r?fields } - ( - ["name","date","amount"], - ["Bob","2023-07-19","10.00"], - ["Alice","2023-07-20","15.00"], - ["Charlie","2023-07-20","15.00"] )
@@ -22329,56 +22326,6 @@ return $M(collation-key("a", $C)) "fields": ("Person", "", "Amount") }
- - for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": map { 1: "Person", 3: "Amount" }, "filter-columns": (2,1,4) })?rows return array { $r?fields } - ( - ["name","date","amount"], - ["Bob","2023-07-19","10.00"], - ["Alice","2023-07-20","15.00"], - ["Charlie","2023-07-20","15.00"] -) - -
- -

Specifying the number of columns, using "all" (the default)

- - csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "all" })?columns?fields - ("date","name","city","amount","currency","original amount","note") - - - for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "all" })?rows return array { $r?fields } - ( - ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], - ["2023-07-20","Alice","Aachen","15.00"], - ["2023-07-20","Charlie","Celle","15.00","GBP","11.99","cake","not a lie"] -) - - - for $r in csv-to-xdm($csv-uneven-cols, map { "number-of-columns": "all" })?rows return array { $r?fields } - ( - ["date","name","city","amount","currency","original amount","note"], - ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], - ["2023-07-20","Alice","Aachen","15.00"], - ["2023-07-20","Charlie","Celle","15.00","GBP","11.99","cake","not a lie"] -) - -
- -

Specifying the number of columns, using "all" and column-names: map { ... }

- - csv-to-xdm($csv-uneven-cols, map { "column-names": map { 1: "Date", 6: "Amount", 5: "Currency" }, "number-of-columns": "all" })?columns - map { - "names": map { "Date": 1, "Amount": 6, "Currency": 5 }, - "fields": ("Date", "", "", "", "Currency", "Amount") -} - - - for $r in csv-to-xdm($csv-uneven-cols, map { - "column-names": map { 1: "Date", 6: "Amount", 5: "Currency" }, - "number-of-columns": "all" -})?rows return $r?field("Amount") - ("original amount", "13.99", "", "11.99") -

Specifying the number of columns using "first-row" and column-names: false()

@@ -22389,43 +22336,6 @@ return $M(collation-key("a", $C)) ["2023-07-19","Bob","Berlin","10.00","USD","13.99",""], ["2023-07-20","Alice","Aachen","15.00","","",""], ["2023-07-20","Charlie","Celle","15.00","GBP","11.99","cake"] -) -
- - -

Specifying the number of columns using "first-row" and column-names: true()

- - csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "first-row" })?columns?fields - ("date","name","city","amount","currency","original amount","note") - - - for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "first-row" })?rows return array { $r?fields } - ( - ["2023-07-19","Bob","Berlin","10.00","USD","13.99",""], - ["2023-07-20","Alice","Aachen","15.00","","",""], - ["2023-07-20","Charlie","Celle","15.00","GBP","11.99","cake"] -) - -
- -

Specifying the number of columns, using "first-row" and column-names: map { ... }

- - csv-to-xdm($csv-uneven-cols, map { "column-names": map { 1: "Date", 4: "Amount" }, "number-of-columns": "first-row" })?columns - map { - "names": map { "Date": 1, "Amount": 4 }, - "fields": ("Date", "", "", "Amount") -} - - - for $r in csv-to-xdm($csv-uneven-cols, map { - "column-names": map { 1: "Date", 4: "Amount" }, - "number-of-columns": "first-row" -})?rows return array { $r?fields } - ( - ["date","name","city","amount"], - ["2023-07-19","Bob","Berlin","10.00"], - ["2023-07-20","Alice","Aachen","15.00"], - ["2023-07-20","Charlie","Celle","15.00"] )
@@ -22433,85 +22343,20 @@ return $M(collation-key("a", $C))

Specifying the number of columns with a number and column-names: true()

csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 })?columns?fields - ("date","name","city","amount","currency","original amount") - - - for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 })?rows return array { $r?fields } - ( - ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], - ["2023-07-20","Alice","Aachen","15.00","",""], - ["2023-07-20","Charlie","Celle","15.00","GBP","11.99"] -) - - - -

Specifying the number of columns with a number and column-names: false()

- - for $r in csv-to-xdm($csv-uneven-cols, map { "number-of-columns": 6 })?rows return array { $r?fields } - ( - ["date","name","city","amount","currency","original amount"], - ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], - ["2023-07-20","Alice","Aachen","15.00","",""], - ["2023-07-20","Charlie","Celle","15.00","GBP","11.99"] -) - -
- -

Specifying the number of columns with a number and column-names: map { ... }

- - csv-to-xdm($csv-uneven-cols, map { "column-names": map { 1: "Date", 4: "Amount" }, "number-of-columns": "first-row" })?columns map { - "names": map { "Date": 1, "Amount": 4 }, - "fields": ("Date", "", "", "Amount") + "names": map { "date": 1, "name": 2, "city": 3, "amount": 4, "currency": 5, "original amount": 6 }, + "fields": ("date","name","city","amount","currency","original amount") } - for $r in csv-to-xdm($csv-uneven-cols, map { - "column-names": map { 1: "Date", 4: "Amount" }, - "number-of-columns": 6 -})?rows return array { $r?fields } + for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 })?rows return array { $r?fields } ( - ["date","name","city","amount","currency","original amount"], ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], ["2023-07-20","Alice","Aachen","15.00","",""], ["2023-07-20","Charlie","Celle","15.00","GBP","11.99"] )
- -

Specifying both number-of-columns and filter-columns is an error condition

- - csv-to-xdm($csv-string, map { "filter-columns": (1,3), "number-of-columns": "first-row" }) - - -
- -

Specifying negative integers, or zero, in options taking xs:integer values is an error condition

- - csv-to-xdm($csv-string, map { "filter-columns": -1 }) - - - - csv-to-xdm($csv-string, map { "filter-columns": 0 }) - - - - csv-to-xdm($csv-string, map { "number-of-columns": -1 }) - - - - csv-to-xdm($csv-string, map { "number-of-columns": 0 }) - - - - csv-to-xdm($csv-string, map { "column-names": map { 0: "Name" } }) - - - - csv-to-xdm($csv-string, map { "column-names": map { -1: "Name" } }) - - -
@@ -22756,7 +22601,10 @@ return $M(collation-key("a", $C)) ]]>
- `date,name,city,amount,currency,original amount,note{$crlf}2023-07-19,Bob,Berlin,10.00,USD,13.99{$crlf}2023-07-20,Alice,Aachen,15.00{$crlf}2023-07-20,Charlie,Celle,15.00,GBP,11.99,cake,not a lie{$crlf}` + concat(`date,name,city,amount,currency,original amount,note{$crlf}`, +`2023-07-19,Bob,Berlin,10.00,USD,13.99{$crlf}`, +`2023-07-20,Alice,Aachen,15.00{$crlf}`, +`2023-07-20,Charlie,Celle,15.00,GBP,11.99,cake,not a lie{$crlf}`)

Filtering columns

From e5b3dd4c8762d1be852ef744dad013bd3dcd442a Mon Sep 17 00:00:00 2001 From: Matt Patterson <8550+fidothe@users.noreply.github.com> Date: Thu, 21 Sep 2023 22:02:33 +0200 Subject: [PATCH 5/7] Change type of column-names option Change type of column-names map option to map(xs:string, xs:integer) from map(xs:integer, xs:string). It turns out I had used this format for all the examples, and the more I thought about it, the more it seemed unhelpful to have a map like `map { 1: "a", 2: "b" }` produce a `csv-columns-record` whose `names` entry was `map { "a": 1, "b": 2 }`. Not least, that prevented the `names` entry from a `csv-columns-record` being used as the `column-names` option in another invocation, which might be desirable if a user has one CSV with headers and subsequent CSVs of the same schema but without the header line (transaction logs split for size, perhaps). --- .../xpath-functions-40/src/function-catalog.xml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/specifications/xpath-functions-40/src/function-catalog.xml b/specifications/xpath-functions-40/src/function-catalog.xml index 64274ad81..e1e8cbba3 100644 --- a/specifications/xpath-functions-40/src/function-catalog.xml +++ b/specifications/xpath-functions-40/src/function-catalog.xml @@ -22037,7 +22037,7 @@ return $M(collation-key("a", $C)) Determines whether the first row of the CSV should be treated as a list of column names and returned as a csv-columns-record in the columns entry of the returned map. Permitted values are a map of type - map(xs:integer, xs:string) or an xs:boolean. + map(xs:string, xs:integer) or an xs:boolean. item() false @@ -22057,7 +22057,7 @@ return $M(collation-key("a", $C)) the returned parsed-csv-structure-record. Implementations must not exclude the first row from the rows entry of the parsed-csv-structure-record. - A csv-columns-record is + A csv-columns-record is constructed using the supplied map and returned as the header entry of the parsed-csv-structure-record. The supplied map is used as the names entry, and a sequence of strings for the @@ -22271,7 +22271,7 @@ return $M(collation-key("a", $C))
`Alice,Aachen{$crlf}Bob,Berlin{$crlf}` - map { "column-names": map { 1: "Person", 2: "Location" } } + map { "column-names": map { "Person": 1, "Location": 2 } }

Specifying column names explicitly:

@@ -22320,7 +22320,7 @@ return $M(collation-key("a", $C))

Filtering columns, with column-names: map { ... }

- csv-to-xdm($csv-uneven-cols, map { "column-names": map { 1: "Person", 3: "Amount" }, "filter-columns": (2,1,4) })?columns + csv-to-xdm($csv-uneven-cols, map { "column-names": map { "Person": 1, "Amount": 3 }, "filter-columns": (2,1,4) })?columns map { "names": map { "Person": 1, "Amount": 3 }, "fields": ("Person", "", "Amount") @@ -22447,7 +22447,7 @@ return $M(collation-key("a", $C)) Determines whether the first row of the CSV should be treated as a list of column headers and returned as ]]> elements in the ]]> element. Permitted values are a map of type - map(xs:integer, xs:string) or an xs:boolean. + map(xs:string, xs:integer) or an xs:boolean. item() false @@ -22459,7 +22459,7 @@ return $M(collation-key("a", $C)) element. Implementations must not include a ]]> element in the output. - The supplied map is used to + The supplied map is used to construct a sequence of ]]> elements to populate the ]]> element. The xs:integer denotes the column number, and the xs:string the column name. Gaps @@ -22540,7 +22540,7 @@ return $M(collation-key("a", $C))

An empty CSV with explicit column names:

- csv-to-xml("", map { "column-names": map { 1: "name", 3: "city"}) + csv-to-xml("", map { "column-names": map { "name": 1, "city": 3 }) From fbbfa48398e1ff1f14cbe64e0bb41b0f2d52554d Mon Sep 17 00:00:00 2001 From: Matt Patterson <8550+fidothe@users.noreply.github.com> Date: Fri, 20 Oct 2023 14:36:37 +0200 Subject: [PATCH 6/7] Update csv-to-xml examples so generated tests work --- .../src/function-catalog.xml | 372 +++++++++--------- 1 file changed, 186 insertions(+), 186 deletions(-) diff --git a/specifications/xpath-functions-40/src/function-catalog.xml b/specifications/xpath-functions-40/src/function-catalog.xml index e1e8cbba3..815e4ecbe 100644 --- a/specifications/xpath-functions-40/src/function-catalog.xml +++ b/specifications/xpath-functions-40/src/function-catalog.xml @@ -22519,9 +22519,9 @@ return $M(collation-key("a", $C)) csv-to-xml("") - - + + + ]]>
@@ -22530,26 +22530,26 @@ return $M(collation-key("a", $C)) csv-to-xml("", map { "column-names": true() }) - - - + + + + ]]>

An empty CSV with explicit column names:

- csv-to-xml("", map { "column-names": map { "name": 1, "city": 3 }) + csv-to-xml("", map { "column-names": map { "name": 1, "city": 3 } }) - - name - - city - - - + + + name + + city + + + ]]>
@@ -22558,22 +22558,22 @@ return $M(collation-key("a", $C)) csv-to-xml($csv-string, map { "column-names": true() }) - - name - city - - - - Bob - Berlin - - - Alice - Aachen - - - + + + name + city + + + + Bob + Berlin + + + Alice + Aachen + + + ]]>
@@ -22582,22 +22582,22 @@ return $M(collation-key("a", $C)) csv-to-xml($csv-string, map { "column-names": true() }) - - name - city - - - - Bob - Berlin - - - Alice - Aachen - - - + + + name + city + + + + Bob + Berlin + + + Alice + Aachen + + + ]]> @@ -22608,32 +22608,32 @@ return $M(collation-key("a", $C))

Filtering columns

- csv-to-xml($csv-string, map { "column-names": true(), "filter-columns": (2,1,4) }) + csv-to-xml($csv-uneven-cols, map { "column-names": true(), "filter-columns": (2,1,4) }) - - name - date - amount - - - - Bob - 2023-07-19 - 10.00 - - - Alice - 2023-07-20 - 15.00 - - - Charlie - 2023-07-20 - 15.00 - - - + + + name + date + amount + + + + Bob + 2023-07-19 + 10.00 + + + Alice + 2023-07-20 + 15.00 + + + Charlie + 2023-07-20 + 15.00 + + + ]]>
@@ -22642,43 +22642,43 @@ return $M(collation-key("a", $C)) csv-to-xml($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "all" }) - - date - name - city - amount - currency - original amount - note - - - - 2023-07-19 - Bob - Berlin - 10.00 - USD - 13.99 - - - 2023-07-20 - Alice - Aachen - 15.00 - - - 2023-07-20 - Charlie - Celle - 15.00 - GBP - 11.99 - cake - not a lie - - - + + + date + name + city + amount + currency + original amount + note + + + + 2023-07-19 + Bob + Berlin + 10.00 + USD + 13.99 + + + 2023-07-20 + Alice + Aachen + 15.00 + + + 2023-07-20 + Charlie + Celle + 15.00 + GBP + 11.99 + cake + not a lie + + + ]]> @@ -22687,46 +22687,46 @@ return $M(collation-key("a", $C)) csv-to-xml($csv-uneven-cols, map { "column-names": true(), "number-of-columns": "first-row" }) - - date - name - city - amount - currency - original amount - note - - - - 2023-07-19 - Bob - Berlin - 10.00 - USD - 13.99 - - - - 2023-07-20 - Alice - Aachen - 15.00 - - - - - - 2023-07-20 - Charlie - Celle - 15.00 - GBP - 11.99 - cake - - - + + + date + name + city + amount + currency + original amount + note + + + + 2023-07-19 + Bob + Berlin + 10.00 + USD + 13.99 + + + + 2023-07-20 + Alice + Aachen + 15.00 + + + + + + 2023-07-20 + Charlie + Celle + 15.00 + GBP + 11.99 + cake + + + ]]> @@ -22735,42 +22735,42 @@ return $M(collation-key("a", $C)) csv-to-xml($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 }) - - date - name - city - amount - currency - original amount - - - - 2023-07-19 - Bob - Berlin - 10.00 - USD - 13.99 - - - 2023-07-20 - Alice - Aachen - 15.00 - - - - - 2023-07-20 - Charlie - Celle - 15.00 - GBP - 11.99 - - - + + + date + name + city + amount + currency + original amount + + + + 2023-07-19 + Bob + Berlin + 10.00 + USD + 13.99 + + + 2023-07-20 + Alice + Aachen + 15.00 + + + + + 2023-07-20 + Charlie + Celle + 15.00 + GBP + 11.99 + + + ]]> From 11a2c27fa08ee8cf232c308e197dd91a0fc8d420 Mon Sep 17 00:00:00 2001 From: Matt Patterson <8550+fidothe@users.noreply.github.com> Date: Wed, 8 Nov 2023 15:07:22 +0100 Subject: [PATCH 7/7] Rename functions per QTCG meeting Rename parse-csv to csv-to-simple-rows Rename csv-to-xdm to parse-csv Remove csv-fetch-field-by-column --- .../src/function-catalog.xml | 537 ++++++++---------- .../src/xpath-functions.xml | 86 +-- 2 files changed, 264 insertions(+), 359 deletions(-) diff --git a/specifications/xpath-functions-40/src/function-catalog.xml b/specifications/xpath-functions-40/src/function-catalog.xml index 815e4ecbe..df9601954 100644 --- a/specifications/xpath-functions-40/src/function-catalog.xml +++ b/specifications/xpath-functions-40/src/function-catalog.xml @@ -59,6 +59,7 @@ + @@ -67,7 +68,7 @@ - + @@ -21766,193 +21767,7 @@ return $M(collation-key("a", $C)) - - - - - - - deterministic - context-independent - focus-independent - - -

Parses CSV data supplied as a string, returning the results in the form of a sequence of arrays of strings.

-
- -

The effect of the one-argument form of this function is the same as calling the - two-argument form with an empty map as the value of the $options - argument.

- -

The first argument is CSV data, as defined in , in the form of a - sequence of xs:string values. The function parses this sequence to return - an XDM value.

- -

If $csv is the empty sequence, implementations must - return the empty sequence as the value of the body field of the returned - map.

- -

The $options argument can be used to control the way in which the parsing - takes place. The option parameter conventions apply.

- -

Implementations must treat any of CRLF, CR, or LF as a single line - separator, as with fn:unparsed-text-lines.

- -

Fields are regarded as simple xs:string values. Implementations - must leave whitespace within a field untouched, without - normalizing or otherwise altering it, unless whitespace trimming is explicitly requested - by the user using the trim-whitespace option.

- -

When whitespace trimming is requested, implementations must only - strip leading and trailing whitespace, this is not equivalent to calling - fn:normalize-space().

- -

The entries that may appear in the $options map are as follows:

- - - - The character used to delimit fields within a record. An instance of - xs:string whose length is exactly one. - xs:string - "," - - - The sequence of strings used to delimit rows within the CSV string. Defaults to CRLF/LF/CR. - xs:string+ - ("&#13;&#10;", "&#10;", "&#13;") - - - The character used to quote fields within the CSV string. An instance of - xs:string whose length is exactly one. - xs:string - '"' - - - Determines whether fields should have leading and trailing whitespace - removed before being returned. - xs:boolean - false - - Fields will be returned with any leading or trailing - whitespace intact. Implementations must preserve whitespace - as it occurred in the CSV string. - - Fields will be returned with leading or trailing - whitespace removed, and all non-leading or -trailing whitespace preserved. - - - - - -

The result of the function is a sequence of arrays-of-strings - array(xs:string)*.

-

A blank row is represented as an empty array.

-

An empty field is represented by the empty string.

-
- -

A dynamic error occurs if the value of - $csv does not conform to the grammar for quoted - fields.

-

A dynamic error occurs if one or more of the values - for field-delimiter or quote-character are specified and are - not a single character.

-

A dynamic error occurs if any of the values for - field-delimiter, row-delimiter, - quote-character are equal.

-
- -

All fields are returned as xs:string values.

-

Quoted fields in the input are returned without the quotes.

-

For more discussion of the returned data, see .

-
- - - - - -

Handling any of the default record separators:

- - parse-csv(`name,city{$crlf}Bob,Berlin{$crlf}Alice,Aachen{$crlf}`) - ( - ["name", "city"], - ["Bob", "Berlin"], - ["Alice", "Aachen"] -) - - - parse-csv(`name,city{$cr}Bob,Berlin{$cr}Alice,Aachen{$cr}`) - ( - ["name", "city"], - ["Bob", "Berlin"], - ["Alice", "Aachen"] -) - - - parse-csv(`name,city{$lf}Bob,Berlin{$lf}Alice,Aachen{$lf}`) - ( - ["name", "city"], - ["Bob", "Berlin"], - ["Alice", "Aachen"] -) - -
- -

Quote handling:

- - parse-csv(`"name","city"{$crlf}"Bob","Berlin"{$crlf}"Alice","Aachen"{$crlf}`) - ( - ["name", "city"], - ["Bob", "Berlin"], - ["Alice", "Aachen"] -) - - - parse-csv(`"name","city"{$crlf}"Bob ""The Exemplar"" Mustermann","Berlin"{$crlf}`) - ( - ["name", "city"], - ['Bob "The Exemplar" Mustermann', "Berlin"] -) - -
- -

Non-default record- and field-delimiters:

- - parse-csv("name;city§Bob;Berlin§Alice;Aachen", map{"row-delimiter": "§", "field-delimiter": ";"}) - ( - ["name", "city"], - ["Bob", "Berlin"], - ["Alice", "Aachen"] -) - -
- -

Non-default quote character:

- - parse-csv(`|name|,|city|{$crlf}|Bob|,|Berlin|{$crlf}`, map{"quote-character": "|"}) - ( - ["name", "city"], - ["Bob", "Berlin"] -) - -
- -

Trimming whitespace in fields:

- - parse-csv(`name ,city {$crlf}Bob ,Berlin{$crlf}Alice ,Aachen{$crlf}`, map{"trim-whitespace": true()}) - ( - ["name", "city"], - ["Bob", "Berlin"], - ["Alice", "Aachen"] -) - -
-
-
- - - - + @@ -21975,7 +21790,7 @@ return $M(collation-key("a", $C))

The first argument is CSV data, as defined in , in the form of a sequence of xs:string values. The function parses this sequence using - fn:parse-csv, and then processes its result to return an XDM value.

+ fn:csv-to-simple-rows, and then processes its result to return an XDM value.

If $csv is the empty sequence, implementations must return a parsed-csv-structure-record whose rows entry is the empty sequence.

@@ -21991,7 +21806,7 @@ return $M(collation-key("a", $C)) def="option-parameter-conventions">option parameter conventions apply.

Handling of delimiters, and whitespace trimming, are handled using - fn:parse-csv, and the options controlling their use are defined + fn:csv-to-simple-rows, and the options controlling their use are defined there.

If the column-names option is true, implementations must @@ -22155,14 +21970,26 @@ return $M(collation-key("a", $C)) supplied $key is a string and does not occur in the map of column names.

+ +

rules: The function returns the field in the sequence fields entry of this + csv-row-record at the position in + the sequence either explicitly provided (when the $key argument is an + xs:integer), or looked up from the map of name to position in the + names entry of the csv-columns-record of the + parsed-csv-structure-record this csv-row-record + was returned as part of.

+ +

When the argument is a string, if the string is missing from the keys + of the names map , then implementations must + raise an .

+ +

When the argument is an integer, if the integer position is outside the + bounds of the sequence contained in the fields entry of this + csv-row-record (i.e. is greater than the size of the + sequence), then implementations must return the empty + string.

+
- -

This function behaves identically to fn:csv-fetch-field-by-column - would had the header entry of the containing - parsed-csv-structure-record and the fields entry of - this csv-row-record been supplied as its first two arguments, and - $key as its last. See the definition of - fn:csv-fetch-field-by-column for more details

@@ -22201,7 +22028,7 @@ return $M(collation-key("a", $C)) zero.

A dynamic error occurs if both the number-of-columns and filter-columns options are set in a - call to fn:csv-to-xdm.

+ call to fn:parse-csv.

All fields are returned as xs:string values.

@@ -22216,57 +22043,57 @@ return $M(collation-key("a", $C))

With defaults for delimiters and quotes, and default column extraction (false):

- map:keys(csv-to-xdm($csv-string)) + map:keys(parse-csv($csv-string)) ("columns", "rows") - csv-to-xdm($csv-string)?columns + parse-csv($csv-string)?columns map { "names": map {}, "fields": () } - count(csv-to-xdm($csv-string)?rows) + count(parse-csv($csv-string)?rows) 3 - csv-to-xdm($csv-string)?rows[1]?field("name") + parse-csv($csv-string)?rows[1]?field("name") - csv-to-xdm($csv-string)?rows[1]?field(2) + parse-csv($csv-string)?rows[1]?field(2) "city"

With defaults for delimiters and quotes, and columns: true() set:

- csv-to-xdm($csv-string, map {"column-names": true()})?columns + parse-csv($csv-string, map {"column-names": true()})?columns map { "names": map { "name": 1, "city": 2 }, "fields": ("name", "city") } - count(csv-to-xdm($csv-string, map {"column-names": true()})?rows) + count(parse-csv($csv-string, map {"column-names": true()})?rows) 2 - csv-to-xdm($csv-string, map {"column-names": true()})?rows[1]?fields + parse-csv($csv-string, map {"column-names": true()})?rows[1]?fields ("Bob", "Berlin") - csv-to-xdm($csv-string, map {"column-names": true()})?rows[1]?field("name") + parse-csv($csv-string, map {"column-names": true()})?rows[1]?field("name") "Bob" - csv-to-xdm($csv-string, map {"column-names": true()})?rows[1]?field(2) + parse-csv($csv-string, map {"column-names": true()})?rows[1]?field(2) "Berlin"

Non-default record- and field-delimiters, non-default quotes:

- csv-to-xdm($non-std-csv, $options)?rows[3]?field(1) + parse-csv($non-std-csv, $options)?rows[3]?field(1) "Alice"
@@ -22275,26 +22102,26 @@ return $M(collation-key("a", $C))

Specifying column names explicitly:

- map:keys(csv-to-xdm($csv-string, $options)) + map:keys(parse-csv($csv-string, $options)) ("columns", "rows") - csv-to-xdm($csv-string, $options)?columns + parse-csv($csv-string, $options)?columns map { "names": map { "Person": 1, "Location": 2 }, "fields": ("Person", "Location") } - count(csv-to-xdm($csv-string, $options)?rows) + count(parse-csv($csv-string, $options)?rows) 2 - csv-to-xdm($csv-string, $options)?rows[1]?field(1) + parse-csv($csv-string, $options)?rows[1]?field(1) "Alice" - csv-to-xdm($csv-string, $options)?rows[2]?field("Location") + parse-csv($csv-string, $options)?rows[2]?field("Location") "Berlin"
@@ -22305,11 +22132,11 @@ return $M(collation-key("a", $C))

Filtering columns, with column-names: true()

- csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "filter-columns": (2,1,4) })?columns?fields + parse-csv($csv-uneven-cols, map { "column-names": true(), "filter-columns": (2,1,4) })?columns?fields ("name","date","amount") - for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "filter-columns": (2,1,4) })?rows return array { $r?fields } + for $r in parse-csv($csv-uneven-cols, map { "column-names": true(), "filter-columns": (2,1,4) })?rows return array { $r?fields } ( ["Bob","2023-07-19","10.00"], ["Alice","2023-07-20","15.00"], @@ -22320,7 +22147,7 @@ return $M(collation-key("a", $C))

Filtering columns, with column-names: map { ... }

- csv-to-xdm($csv-uneven-cols, map { "column-names": map { "Person": 1, "Amount": 3 }, "filter-columns": (2,1,4) })?columns + parse-csv($csv-uneven-cols, map { "column-names": map { "Person": 1, "Amount": 3 }, "filter-columns": (2,1,4) })?columns map { "names": map { "Person": 1, "Amount": 3 }, "fields": ("Person", "", "Amount") @@ -22330,7 +22157,7 @@ return $M(collation-key("a", $C))

Specifying the number of columns using "first-row" and column-names: false()

- for $r in csv-to-xdm($csv-uneven-cols, map { "number-of-columns": "first-row" })?rows return array { $r?fields } + for $r in parse-csv($csv-uneven-cols, map { "number-of-columns": "first-row" })?rows return array { $r?fields } ( ["date","name","city","amount","currency","original amount","note"], ["2023-07-19","Bob","Berlin","10.00","USD","13.99",""], @@ -22342,14 +22169,14 @@ return $M(collation-key("a", $C))

Specifying the number of columns with a number and column-names: true()

- csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 })?columns?fields + parse-csv($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 })?columns?fields map { "names": map { "date": 1, "name": 2, "city": 3, "amount": 4, "currency": 5, "original amount": 6 }, "fields": ("date","name","city","amount","currency","original amount") } - for $r in csv-to-xdm($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 })?rows return array { $r?fields } + for $r in parse-csv($csv-uneven-cols, map { "column-names": true(), "number-of-columns": 6 })?rows return array { $r?fields } ( ["2023-07-19","Bob","Berlin","10.00","USD","13.99"], ["2023-07-20","Alice","Aachen","15.00","",""], @@ -22360,6 +22187,192 @@ return $M(collation-key("a", $C))
+ + + + + + + + + deterministic + context-independent + focus-independent + + +

Parses CSV data supplied as a string, returning the results in the form of a sequence of arrays of strings.

+
+ +

The effect of the one-argument form of this function is the same as calling the + two-argument form with an empty map as the value of the $options + argument.

+ +

The first argument is CSV data, as defined in , in the form of a + sequence of xs:string values. The function parses this sequence to return + an XDM value.

+ +

If $csv is the empty sequence, implementations must + return the empty sequence as the value of the body field of the returned + map.

+ +

The $options argument can be used to control the way in which the parsing + takes place. The option parameter conventions apply.

+ +

Implementations must treat any of CRLF, CR, or LF as a single line + separator, as with fn:unparsed-text-lines.

+ +

Fields are regarded as simple xs:string values. Implementations + must leave whitespace within a field untouched, without + normalizing or otherwise altering it, unless whitespace trimming is explicitly requested + by the user using the trim-whitespace option.

+ +

When whitespace trimming is requested, implementations must only + strip leading and trailing whitespace, this is not equivalent to calling + fn:normalize-space().

+ +

The entries that may appear in the $options map are as follows:

+ + + + The character used to delimit fields within a record. An instance of + xs:string whose length is exactly one. + xs:string + "," + + + The sequence of strings used to delimit rows within the CSV string. Defaults to CRLF/LF/CR. + xs:string+ + ("&#13;&#10;", "&#10;", "&#13;") + + + The character used to quote fields within the CSV string. An instance of + xs:string whose length is exactly one. + xs:string + '"' + + + Determines whether fields should have leading and trailing whitespace + removed before being returned. + xs:boolean + false + + Fields will be returned with any leading or trailing + whitespace intact. Implementations must preserve whitespace + as it occurred in the CSV string. + + Fields will be returned with leading or trailing + whitespace removed, and all non-leading or -trailing whitespace preserved. + + + + + +

The result of the function is a sequence of arrays-of-strings + array(xs:string)*.

+

A blank row is represented as an empty array.

+

An empty field is represented by the empty string.

+
+ +

A dynamic error occurs if the value of + $csv does not conform to the grammar for quoted + fields.

+

A dynamic error occurs if one or more of the values + for field-delimiter or quote-character are specified and are + not a single character.

+

A dynamic error occurs if any of the values for + field-delimiter, row-delimiter, + quote-character are equal.

+
+ +

All fields are returned as xs:string values.

+

Quoted fields in the input are returned without the quotes.

+

For more discussion of the returned data, see .

+
+ + + + + +

Handling any of the default record separators:

+ + csv-to-simple-rows(`name,city{$crlf}Bob,Berlin{$crlf}Alice,Aachen{$crlf}`) + ( + ["name", "city"], + ["Bob", "Berlin"], + ["Alice", "Aachen"] +) + + + csv-to-simple-rows(`name,city{$cr}Bob,Berlin{$cr}Alice,Aachen{$cr}`) + ( + ["name", "city"], + ["Bob", "Berlin"], + ["Alice", "Aachen"] +) + + + csv-to-simple-rows(`name,city{$lf}Bob,Berlin{$lf}Alice,Aachen{$lf}`) + ( + ["name", "city"], + ["Bob", "Berlin"], + ["Alice", "Aachen"] +) + +
+ +

Quote handling:

+ + csv-to-simple-rows(`"name","city"{$crlf}"Bob","Berlin"{$crlf}"Alice","Aachen"{$crlf}`) + ( + ["name", "city"], + ["Bob", "Berlin"], + ["Alice", "Aachen"] +) + + + csv-to-simple-rows(`"name","city"{$crlf}"Bob ""The Exemplar"" Mustermann","Berlin"{$crlf}`) + ( + ["name", "city"], + ['Bob "The Exemplar" Mustermann', "Berlin"] +) + +
+ +

Non-default record- and field-delimiters:

+ + csv-to-simple-rows("name;city§Bob;Berlin§Alice;Aachen", map{"row-delimiter": "§", "field-delimiter": ";"}) + ( + ["name", "city"], + ["Bob", "Berlin"], + ["Alice", "Aachen"] +) + +
+ +

Non-default quote character:

+ + csv-to-simple-rows(`|name|,|city|{$crlf}|Bob|,|Berlin|{$crlf}`, map{"quote-character": "|"}) + ( + ["name", "city"], + ["Bob", "Berlin"] +) + +
+ +

Trimming whitespace in fields:

+ + csv-to-simple-rows(`name ,city {$crlf}Bob ,Berlin{$crlf}Alice ,Aachen{$crlf}`, map{"trim-whitespace": true()}) + ( + ["name", "city"], + ["Bob", "Berlin"], + ["Alice", "Aachen"] +) + +
+
+
+ @@ -22383,7 +22396,7 @@ return $M(collation-key("a", $C))

The first argument is CSV data, as defined in , in the form of a sequence of xs:string values. The function parses this sequence using - fn:parse-csv, and then processes its result to return an XML document.

+ fn:csv-to-simple-rows, and then processes its result to return an XML document.

If $csv is the empty sequence, implementations must return a ]]> whose ]]> element @@ -22404,7 +22417,7 @@ return $M(collation-key("a", $C)) def="option-parameter-conventions">option parameter conventions apply.

Handling of delimiters, and whitespace trimming, are handled using - fn:parse-csv, and the options controlling their use are defined + fn:csv-to-simple-rows, and the options controlling their use are defined there.

The entries that may appear in the $options map are as follows:

@@ -22777,86 +22790,6 @@ return $M(collation-key("a", $C))
- - - - - - - - - - deterministic - context-independent - focus-independent - - -

Fetches a field from a parsed CSV row by name or position.

-
- -

The first argument is a csv-columns-record, as provided in the - header entry of the parsed-csv-structure-record returned by - fn:csv-to-xdm.

- -

The second argument is the row whose fields are being fetched, represented as a sequence - of strings as would be provided by the fields entry of a - csv-row-record returned by fn:csv-to-xdm.

- -

The final argument is the key to use for the lookup, supplied as either an - xs:string (the column name) or xs:integer (the column - position).

- -

When the argument is a string, if the string is missing from the keys of the map - contained in the names entry of the $columns argument’s - csv-columns-record, then implementations must raise - an .

- -

When the argument is an integer, if the integer position is outside the bounds of the - $fields sequence (i.e. is greater than the size of the sequence), then - implementations must return the empty string.

- -

The function returns the field in the sequence $fields at the position in - the sequence either explicitly provided (when $key is an - xs:integer), or looked up from the map of name to position in the - csv-columns-record provided in $columns.

-
- -

A dynamic error occurs if the value of - $key is an xs:string but is not a member of the keys of the - map contained in the names entry of the csv-columns-record in - $header. fields.

-
- - map { - "names": map { "name": 1, "city": 2 }, - "fields: ("name", "city") -} - ("Bob", "Berlin") - -

With a string key:

- - csv-fetch-field-by-column($columns, $fields, "name") - "Bob" - - - csv-fetch-field-by-column($columns, $fields, "amount") - - -
- -

With an integer key

- - csv-fetch-field-by-column($columns, $fields, 2) - "Berlin" - - - csv-fetch-field-by-column($columns, $fields, 3) - "" - -
-
-
- diff --git a/specifications/xpath-functions-40/src/xpath-functions.xml b/specifications/xpath-functions-40/src/xpath-functions.xml index 0f2a7643d..ca5abe30d 100644 --- a/specifications/xpath-functions-40/src/xpath-functions.xml +++ b/specifications/xpath-functions-40/src/xpath-functions.xml @@ -5762,14 +5762,11 @@ correctly in all browsers, depending on the system configuration.

--> - - - - - + + @@ -6874,19 +6871,19 @@ correctly in all browsers, depending on the system configuration.

--> within a field. (See .)

The functions for processing CSV-formatted data are built on - fn:parse-csv, which provides a simple representation of a parsed CSV + fn:csv-to-simple-rows, which provides a simple representation of a parsed CSV as a sequence of arrays-of-strings, array(xs:string)*, handling row and column delimiters, and quoting.

-

The fn:csv-to-xml and fn:csv-to-xdm functions provide more +

The fn:csv-to-xml and fn:parse-csv functions provide more sophisticated processing.

Common parsing options -

All three functions: fn:parse-csv, fn:csv-to-xml, and - fn:csv-to-xdm, take options to control basic parsing, consisting +

All three functions: fn:csv-to-simple-rows, fn:csv-to-xml, and + fn:parse-csv, take options to control basic parsing, consisting of specifying the various delimiters. These core delimiter options are used by the functions that generate CSV data:

@@ -6895,7 +6892,7 @@ correctly in all browsers, depending on the system configuration.

-->

Additionally, the parsing functions share an additional option to control whether leading and trailing whitespace should be stripped or not.

- +
@@ -6984,11 +6981,11 @@ correctly in all browsers, depending on the system configuration.

--> Basic mapping of CSV to XDM -

The basic output from fn:parse-csv returns a sequence of rows, where +

The basic output from fn:csv-to-simple-rows returns a sequence of rows, where each row is simply mapped to an array of xs:string values.

The first row of the CSV is returned as with all the other rows. - fn:parse-csv does not distinguish between a header row and data + fn:csv-to-simple-rows does not distinguish between a header row and data rows, and returns all of them.

@@ -7031,16 +7028,16 @@ Field 2A,Field 2B,Field 2C,Field 2D' However, the reality is that CSVs can, and sometimes do, contain a variable number of fields in a row. As a result, implementations of this function must not truncate or pad the number of fields in each row for any reason. - The fn:csv-to-xml and fn:csv-to-xdm functions provide + The fn:csv-to-xml and fn:parse-csv functions provide facilities to deal with enforcing uniformity and an expected number of columns.

- Mapping CSV data to XDM in fn:csv-to-xdm + Mapping CSV data to XDM in fn:parse-csv -

The fn:csv-to-xdm function returns a +

The fn:parse-csv function returns a parsed-csv-structure-record:

@@ -7090,28 +7087,6 @@ Field 2A,Field 2B,Field 2C,Field 2D' fields sequence by either column position (when passed an xs:integer) or column name (when passed an xs:string).

- -

This function is, effectively, a partial application of - fn:csv-fetch-field-by-column where its $columns - argument is bound to the columns entry of the - parsed-csv-structure-record, and its $row argument - is bound to array{csv-row?fields}. This is described in more - detail below:

- -

Given a string, $csv-string containing CSV data, implementations - must return a function that will return identical results - to fn:csv-fetch-field-by-column called with the same - csv-columns and an array() containing the same - items as the fields sequence:

- - let $csv-record := fn:csv-to-xdm($csv-string), - $csv-columns := $csv-record?columns, - $csv-row := head($csv-record?rows) - return if (empty($csv-row?field(1))) - then empty(fn:csv-fetch-field-by-column($csv-columns, array{$csv-row}, 1)) - else $csv-row?field(1) = fn:csv-fetch-field-by-column($csv-columns, array{$csv-row}, 1) - (: must return true :) -
@@ -7180,21 +7155,21 @@ Bob,2023-07-14,2.34 Illustrative examples of processing CSV data -

The following examples illustrate how an application can build more complex processing of the output of fn:parse-csv.

+

The following examples illustrate how an application can build more complex processing of the output of fn:csv-to-simple-rows.

A variable, $crlf is assumed to be in scope containing the CR and LF characters

let $crlf := fn:char('x0D')||fn:char('x0A') - Converting a CSV into an HTML-style table using fn:csv-to-xdm + Converting a CSV into an HTML-style table using fn:parse-csv

Direct conversion is a matter of iterating across the records and fields to generate <tr> and <td> elements.

Using XQuery:

{ for $column in $csv?columns?fields @@ -7232,7 +7207,7 @@ return - +
@@ -10974,40 +10949,38 @@ ISBN 0 521 77752 6. -

Raised by fn:parse-csv if a syntax error in the quoting of one of the +

Raised by fn:csv-to-simple-rows if a syntax error in the quoting of one of the fields in the input CSV is found.

-

Raised by fn:parse-csv if the field-separator, +

Raised by fn:csv-to-simple-rows if the field-separator, record-separator, or quote-character option is set to an illegal value.

-

Raised by fn:parse-csv if any of the delimiter characters have been +

Raised by fn:csv-to-simple-rows if any of the delimiter characters have been set to the same value.

-

Raised by fn:csv-fetch-field-by-column, and the function from the - field entry of csv-columns-record, if its - $key argument is an xs:string and is not one of the - known column names.

+

Raised by the function from the field entry of + csv-columns-record, if its $key argument is an + xs:string and is not one of the known column names.

-

Raised by fn:csv-to-xdm, fn:csv-to-xml, - fn:csv-fetch-field-by-column, and the function from the - field entry of csv-columns-record, if an argument - referring to a column index is zero or negative. (The options +

Raised by fn:parse-csv, fn:csv-to-xml, and the function + from the field entry of csv-columns-record, if an + argument referring to a column index is zero or negative. (The options number-of-columns, filter-columns, or in a map passed - to column-names, or the argument to the field function, - or fn:csv-fetch-field-by-column.)

+ to column-names, or the argument to the field function.) +

-

Raised by fn:csv-to-xdm and fn:csv-to-xml, if both the +

Raised by fn:parse-csv and fn:csv-to-xml, if both the number-of-columns and filter-columns options are set: they are mutually exclusive.

@@ -11962,9 +11935,8 @@ declare function eg:distinct-nodes-stable ($arg as node()*) as node()* {

map:replace

map:substitute

fn:parse-csv

-

fn:csv-to-xdm

fn:csv-to-xml

-

fn:csv-fetch-field-by-column

+

fn:csv-to-simple-rows

array:replace

array:slice