From 3d98c77af233c90a6af4f6fe1a1a0dac79f93d85 Mon Sep 17 00:00:00 2001 From: Luther Tychonievich Date: Thu, 22 Feb 2024 15:02:30 -0600 Subject: [PATCH] Defer `digit` to code ABNF's `DIGIT` and adjust to that capitalization (#440) * Defer `digit` to code ABNF's `DIGIT` and adjust to that capitalization * Update specification/gedcom-1-hierarchical-container-format.md --------- Co-authored-by: Dave Thaler --- {extracted-files => build}/core.abnf | 2 +- {extracted-files => build}/languagetag.abnf | 8 +- {extracted-files => build}/mediatype.abnf | 6 +- extracted-files/grammar.abnf | 135 ++++++++++++++++-- extracted-files/tags/type-Time | 8 +- .../gedcom-1-hierarchical-container-format.md | 6 +- specification/gedcom-2-data-types.md | 10 +- 7 files changed, 147 insertions(+), 28 deletions(-) rename {extracted-files => build}/core.abnf (89%) rename {extracted-files => build}/languagetag.abnf (93%) rename {extracted-files => build}/mediatype.abnf (92%) diff --git a/extracted-files/core.abnf b/build/core.abnf similarity index 89% rename from extracted-files/core.abnf rename to build/core.abnf index 2de2347b..7fc621a4 100644 --- a/extracted-files/core.abnf +++ b/build/core.abnf @@ -1,6 +1,6 @@ ; Core Rules extracted from RFC 5234 section B.1 ALPHA = %x41-5A / %x61-7A ; A-Z / a-z -;DIGIT = %x30-39 ; 0-9 +DIGIT = %x30-39 ; 0-9 SP = %x20 HTAB = %x09 ; horizontal tab DQUOTE = %x22 ; " (Double Quote) diff --git a/extracted-files/languagetag.abnf b/build/languagetag.abnf similarity index 93% rename from extracted-files/languagetag.abnf rename to build/languagetag.abnf index f9414116..8718777a 100644 --- a/extracted-files/languagetag.abnf +++ b/build/languagetag.abnf @@ -21,16 +21,16 @@ extlang = 3ALPHA ; selected ISO 639 codes script = 4ALPHA ; ISO 15924 code region = 2ALPHA ; ISO 3166-1 code - / 3digit ; UN M.49 code + / 3DIGIT ; UN M.49 code variant = 5*8alphanum ; registered variants - / (digit 3alphanum) + / (DIGIT 3alphanum) extension = singleton 1*("-" (2*8alphanum)) ; Single alphanumerics ; "x" reserved for private use -singleton = digit ; 0 - 9 +singleton = DIGIT ; 0 - 9 / %x41-57 ; A - W / %x59-5A ; Y - Z / %x61-77 ; a - w @@ -69,4 +69,4 @@ regular = "art-lojban" ; these tags match the 'langtag' / "zh-min-nan" ; subtag or sequence of subtags / "zh-xiang" -alphanum = (ALPHA / digit) ; letters and numbers +alphanum = (ALPHA / DIGIT) ; letters and numbers diff --git a/extracted-files/mediatype.abnf b/build/mediatype.abnf similarity index 92% rename from extracted-files/mediatype.abnf rename to build/mediatype.abnf index d518e0da..206ebbff 100644 --- a/extracted-files/mediatype.abnf +++ b/build/mediatype.abnf @@ -14,8 +14,8 @@ type-name = restricted-name subtype-name = restricted-name restricted-name = restricted-name-first *126restricted-name-chars -restricted-name-first = ALPHA / digit -restricted-name-chars = ALPHA / digit / "!" / "#" / +restricted-name-first = ALPHA / DIGIT +restricted-name-chars = ALPHA / DIGIT / "!" / "#" / "$" / "&" / "-" / "^" / "_" restricted-name-chars =/ "." ; Characters before first dot always ; specify a facet name @@ -30,7 +30,7 @@ parameter-value = ( token / quoted-string ) token = 1*tchar tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" - / digit / ALPHA + / DIGIT / ALPHA ; any VCHAR, except delimiters OWS = *( SP / HTAB ) ; optional whitespace diff --git a/extracted-files/grammar.abnf b/extracted-files/grammar.abnf index f2c4dda0..08c3fbb4 100644 --- a/extracted-files/grammar.abnf +++ b/extracted-files/grammar.abnf @@ -11,7 +11,7 @@ banned = %x00-08 / %x0B-0C / %x0E-1F ; C0 other than LF CR and Tab ; All other rules assume the absence of any banned characters -digit = %x30-39 ; 0 through 9 +; DIGIT = %x30-39 ; 0 through 9 -- part of the ABNF core nonzero = %x31-39 ; 1 through 9 ucletter = %x41-5A ; A through Z underscore = %x5F ; _ @@ -22,7 +22,7 @@ atsign = %x40 ; @ Line = Level D [Xref D] Tag [D LineVal] EOL -Level = "0" / nonzero *digit +Level = "0" / nonzero *DIGIT D = %x20 ; space Xref = atsign 1*tagchar atsign ; but not "@VOID@" Tag = stdTag / extTag @@ -31,7 +31,7 @@ EOL = %x0D [%x0A] / %x0A ; CR-LF, CR, or LF stdTag = ucletter *tagchar extTag = underscore 1*tagchar -tagchar = ucletter / digit / underscore +tagchar = ucletter / DIGIT / underscore pointer = voidPtr / Xref voidPtr = %s"@VOID@" @@ -49,7 +49,7 @@ Text = *anychar ; ------------- Integer ------------- -Integer = 1*digit +Integer = 1*DIGIT ; ------------- Enumeration ------------- @@ -88,10 +88,10 @@ epoch = %s"BCE" / extTag ; constrained by calendar Time = hour ":" minute [":" second ["." fraction]] [%s"Z"] -hour = digit / ("0" / "1") digit / "2" ("0" / "1" / "2" / "3") -minute = ("0" / "1" / "2" / "3" / "4" / "5") digit -second = ("0" / "1" / "2" / "3" / "4" / "5") digit -fraction = 1*digit +hour = DIGIT / ("0" / "1") DIGIT / "2" ("0" / "1" / "2" / "3") +minute = ("0" / "1" / "2" / "3" / "4" / "5") DIGIT +second = ("0" / "1" / "2" / "3" / "4" / "5") DIGIT +fraction = 1*DIGIT ; ------------- Age ------------- @@ -141,3 +141,122 @@ MediaType = type "/" subtype parameters Special = Text +; ABNF derived from RFC 5646 section 2.1 +Language-Tag = langtag ; normal language tags + / privateuse ; private use tag + / grandfathered ; grandfathered tags +langtag = language + ["-" script] + ["-" region] + *("-" variant) + *("-" extension) + ["-" privateuse] + +language = 2*3ALPHA ; shortest ISO 639 code + ["-" extlang] ; sometimes followed by + ; extended language subtags + / 4ALPHA ; or reserved for future use + / 5*8ALPHA ; or registered language subtag + +extlang = 3ALPHA ; selected ISO 639 codes + *2("-" 3ALPHA) ; permanently reserved + +script = 4ALPHA ; ISO 15924 code + +region = 2ALPHA ; ISO 3166-1 code + / 3DIGIT ; UN M.49 code + +variant = 5*8alphanum ; registered variants + / (DIGIT 3alphanum) + +extension = singleton 1*("-" (2*8alphanum)) + + ; Single alphanumerics + ; "x" reserved for private use +singleton = DIGIT ; 0 - 9 + / %x41-57 ; A - W + / %x59-5A ; Y - Z + / %x61-77 ; a - w + / %x79-7A ; y - z + +privateuse = "x" 1*("-" (1*8alphanum)) + +grandfathered = irregular ; non-redundant tags registered + / regular ; during the RFC 3066 era + +irregular = "en-GB-oed" ; irregular tags do not match + / "i-ami" ; the 'langtag' production and + / "i-bnn" ; would not otherwise be + / "i-default" ; considered 'well-formed' + / "i-enochian" ; These tags are all valid, + / "i-hak" ; but most are deprecated + / "i-klingon" ; in favor of more modern + / "i-lux" ; subtags or subtag + / "i-mingo" ; combination + / "i-navajo" + / "i-pwn" + / "i-tao" + / "i-tay" + / "i-tsu" + / "sgn-BE-FR" + / "sgn-BE-NL" + / "sgn-CH-DE" + +regular = "art-lojban" ; these tags match the 'langtag' + / "cel-gaulish" ; production, but their subtags + / "no-bok" ; are not extended language + / "no-nyn" ; or variant subtags: their meaning + / "zh-guoyu" ; is defined by their registration + / "zh-hakka" ; and all of these are deprecated + / "zh-min" ; in favor of a more modern + / "zh-min-nan" ; subtag or sequence of subtags + / "zh-xiang" + +alphanum = (ALPHA / DIGIT) ; letters and numbers +; ABNF derived from RFC 2045 section 5.1 +type = discrete-type / composite-type +discrete-type = "text" / "image" / "audio" / "video" / + "application" / extension-token +composite-type = "message" / "multipart" / extension-token +extension-token = ietf-token / x-token +ietf-token = type-name +x-token = "x-" token +subtype = extension-token / iana-token +iana-token = subtype-name + +; ABNF derived from RFC 6838 section 4.2 +type-name = restricted-name +subtype-name = restricted-name + +restricted-name = restricted-name-first *126restricted-name-chars +restricted-name-first = ALPHA / DIGIT +restricted-name-chars = ALPHA / DIGIT / "!" / "#" / + "$" / "&" / "-" / "^" / "_" +restricted-name-chars =/ "." ; Characters before first dot always + ; specify a facet name +restricted-name-chars =/ "+" ; Characters after last plus always + ; specify a structured syntax suffix + +; ABNF derived from RFC 9110 section 5.6 +parameters = *( OWS ";" OWS [ parameter ] ) +parameter = parameter-name "=" parameter-value +parameter-name = token +parameter-value = ( token / quoted-string ) +token = 1*tchar +tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" + / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" + / DIGIT / ALPHA + ; any VCHAR, except delimiters +OWS = *( SP / HTAB ) + ; optional whitespace +quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE +qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text +obs-text = %x80-FF +quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) +; Core Rules extracted from RFC 5234 section B.1 +ALPHA = %x41-5A / %x61-7A ; A-Z / a-z +DIGIT = %x30-39 ; 0-9 +SP = %x20 +HTAB = %x09 ; horizontal tab +DQUOTE = %x22 ; " (Double Quote) +VCHAR = %x21-7E ; visible (printing) characters diff --git a/extracted-files/tags/type-Time b/extracted-files/tags/type-Time index e21ca3ba..28d1a851 100644 --- a/extracted-files/tags/type-Time +++ b/extracted-files/tags/type-Time @@ -18,10 +18,10 @@ specification: ```abnf Time = hour ":" minute [":" second ["." fraction]] [%s"Z"] - hour = digit / ("0" / "1") digit / "2" ("0" / "1" / "2" / "3") - minute = ("0" / "1" / "2" / "3" / "4" / "5") digit - second = ("0" / "1" / "2" / "3" / "4" / "5") digit - fraction = 1*digit + hour = DIGIT / ("0" / "1") DIGIT / "2" ("0" / "1" / "2" / "3") + minute = ("0" / "1" / "2" / "3" / "4" / "5") DIGIT + second = ("0" / "1" / "2" / "3" / "4" / "5") DIGIT + fraction = 1*DIGIT ```
diff --git a/specification/gedcom-1-hierarchical-container-format.md b/specification/gedcom-1-hierarchical-container-format.md index f7ffceb7..58053a17 100644 --- a/specification/gedcom-1-hierarchical-container-format.md +++ b/specification/gedcom-1-hierarchical-container-format.md @@ -60,7 +60,7 @@ This document additionally makes use of the following named character sets in AB ```abnf -digit = %x30-39 ; 0 through 9 +; DIGIT = %x30-39 ; 0 through 9 -- defined in RFC 5234 section B.1 nonzero = %x31-39 ; 1 through 9 ucletter = %x41-5A ; A through Z underscore = %x5F ; _ @@ -125,7 +125,7 @@ It matches the production `Line`: ```abnf Line = Level D [Xref D] Tag [D LineVal] EOL -Level = "0" / nonzero *digit +Level = "0" / nonzero *DIGIT D = %x20 ; space Xref = atsign 1*tagchar atsign ; but not "@VOID@" Tag = stdTag / extTag @@ -134,7 +134,7 @@ EOL = %x0D [%x0A] / %x0A ; CR-LF, CR, or LF stdTag = ucletter *tagchar extTag = underscore 1*tagchar -tagchar = ucletter / digit / underscore +tagchar = ucletter / DIGIT / underscore pointer = voidPtr / Xref voidPtr = %s"@VOID@" diff --git a/specification/gedcom-2-data-types.md b/specification/gedcom-2-data-types.md index b8ff31c7..be8641a5 100644 --- a/specification/gedcom-2-data-types.md +++ b/specification/gedcom-2-data-types.md @@ -23,7 +23,7 @@ and represents a non-negative integer in base-10. Leading zeros have no semantic meaning and should be omitted. ```abnf -Integer = 1*digit +Integer = 1*DIGIT ``` Negative integers are not supported by this specification. @@ -177,10 +177,10 @@ When a time is used together with a `DateExact`, it is recommended that UTC time ```abnf Time = hour ":" minute [":" second ["." fraction]] [%s"Z"] -hour = digit / ("0" / "1") digit / "2" ("0" / "1" / "2" / "3") -minute = ("0" / "1" / "2" / "3" / "4" / "5") digit -second = ("0" / "1" / "2" / "3" / "4" / "5") digit -fraction = 1*digit +hour = DIGIT / ("0" / "1") DIGIT / "2" ("0" / "1" / "2" / "3") +minute = ("0" / "1" / "2" / "3" / "4" / "5") DIGIT +second = ("0" / "1" / "2" / "3" / "4" / "5") DIGIT +fraction = 1*DIGIT ``` :::note