From 7d193e74f088d12292401c674492f431ee91b34f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Tue, 14 Feb 2023 12:03:42 +0100 Subject: [PATCH 01/15] Define the grammar as an ABNF (RFC 5234) --- spec/message.abnf | 55 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 spec/message.abnf diff --git a/spec/message.abnf b/spec/message.abnf new file mode 100644 index 0000000000..92b7905bd2 --- /dev/null +++ b/spec/message.abnf @@ -0,0 +1,55 @@ +message = [s] *(declaration [s]) body [s] + +declaration = "let" s variable [s] "=" [s] "{" [s] expression [s] "}" +body = pattern + / (selectors 1*([s] variant)) + +pattern = "{" *(text / placeholder) "}" +selectors = "match" 1*([s] selector) +selector = "{" [s] expression [s] "}" +variant = "when" 1*(s key) [s] pattern +key = nmtoken / literal / "*" + +placeholder = "{" [s] (expression / markup / markup-end) [s] "}" + +expression = ((literal / variable) [s annotation]) + / annotation +annotation = function *(s option) +option = name [s] "=" [s] (literal / nmtoken / variable) + +markup = markup-start *(s option) + +any-char = %x0-27 ; omit ( and ) + / %x2A-5B ; omit \ + / %x5D-7A ; omit { + / %x7C ; omit } + / %x7E-D7FF + / %xE000-10FFFF + +text = 1*(text-char / text-escape) +text-char = any-char / "(" / ")" + +variable = "$" name +function = ":" name +markup-start = "+" name +markup-end = "-" name +name = name-start *name-char +nmtoken = 1*name-char +name-start = ALPHA / "_" + / %xC0-D6 / %xD8-F6 / %xF8-2FF + / %x370-37D / %x37F-1FFF / %x200C-200D + / %x2070-218F / %x2C00-2FEF / %x3001-D7FF + / %xF900-FDCF / %xFDF0-FFFD / %x10000-EFFFF +name-char = name-start / DIGIT / "-" / "." / %xB7 + / %x0300-036F / %x203F-2040 + +literal = "(" *(literal-char / literal-escape) ")" +literal-char = any-char / "{" / "}" + +text-escape = "\" ("\" / "{" / "}") +literal-escape = "\" ("\" / "(" / ")") + +s = 1*(%x09 / %x0D / %x0A / %x20) + +ALPHA = %x41-5A / %x61-7A +DIGIT = %x30-39 From 8c3a4e52fa0e539900a389abc097535c2c1d6383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Tue, 14 Feb 2023 13:56:39 +0100 Subject: [PATCH 02/15] Remove ALPHA and DIGIT, which are built-in Co-authored-by: Caleb Maclennan --- spec/message.abnf | 3 --- 1 file changed, 3 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 92b7905bd2..202ec96029 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -50,6 +50,3 @@ text-escape = "\" ("\" / "{" / "}") literal-escape = "\" ("\" / "(" / ")") s = 1*(%x09 / %x0D / %x0A / %x20) - -ALPHA = %x41-5A / %x61-7A -DIGIT = %x30-39 From 27edd646ca3d707353302589a3a7268c0c60a8cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Tue, 14 Feb 2023 16:45:37 +0100 Subject: [PATCH 03/15] Refactor text-char and literal-char to use non-ascii-char --- spec/message.abnf | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 202ec96029..2f26d1ebe2 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -19,15 +19,14 @@ option = name [s] "=" [s] (literal / nmtoken / variable) markup = markup-start *(s option) -any-char = %x0-27 ; omit ( and ) - / %x2A-5B ; omit \ - / %x5D-7A ; omit { - / %x7C ; omit } - / %x7E-D7FF - / %xE000-10FFFF +non-ascii-char = %x80-D7FF / %xE000-10FFFF text = 1*(text-char / text-escape) -text-char = any-char / "(" / ")" +text-char = %x0-5B ; omit \ + / %x5D-7A ; omit { + / %x7C ; omit } + / %x7E-7F + / non-ascii-char variable = "$" name function = ":" name @@ -44,7 +43,10 @@ name-char = name-start / DIGIT / "-" / "." / %xB7 / %x0300-036F / %x203F-2040 literal = "(" *(literal-char / literal-escape) ")" -literal-char = any-char / "{" / "}" +literal-char = %x0-27 ; omit ( and ) + / %x2A-5B ; omit \ + / %x5D-7F + / non-ascii-char text-escape = "\" ("\" / "{" / "}") literal-escape = "\" ("\" / "(" / ")") From ec6c07ecd8b2483b72d03f546c5009556958339c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Wed, 15 Feb 2023 22:38:28 +0100 Subject: [PATCH 04/15] Define let, match, when as separate tokens --- spec/message.abnf | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 2f26d1ebe2..70f5cf6caf 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -1,13 +1,13 @@ message = [s] *(declaration [s]) body [s] -declaration = "let" s variable [s] "=" [s] "{" [s] expression [s] "}" +declaration = let s variable [s] "=" [s] "{" [s] expression [s] "}" body = pattern / (selectors 1*([s] variant)) pattern = "{" *(text / placeholder) "}" -selectors = "match" 1*([s] selector) +selectors = match 1*([s] selector) selector = "{" [s] expression [s] "}" -variant = "when" 1*(s key) [s] pattern +variant = when 1*(s key) [s] pattern key = nmtoken / literal / "*" placeholder = "{" [s] (expression / markup / markup-end) [s] "}" @@ -19,6 +19,10 @@ option = name [s] "=" [s] (literal / nmtoken / variable) markup = markup-start *(s option) +let = %x6C.65.74 +match = %x6D.61.74.63.68 +when = %x77.68.65.6E + non-ascii-char = %x80-D7FF / %xE000-10FFFF text = 1*(text-char / text-escape) From 52fe0ca6435396b0bdb030a2988ca14dd3636e3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Tue, 28 Feb 2023 12:21:05 +0100 Subject: [PATCH 05/15] Remove the EBNF file --- spec/message.ebnf | 51 ----------------------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 spec/message.ebnf diff --git a/spec/message.ebnf b/spec/message.ebnf deleted file mode 100644 index d9ccf27a2e..0000000000 --- a/spec/message.ebnf +++ /dev/null @@ -1,51 +0,0 @@ -Message ::= (s? Declaration)* s? Body s? - -Declaration ::= 'let' s Variable s? '=' s? '{' s? Expression s? '}' -Body ::= Pattern | Selector (s? Variant)+ - -Pattern ::= '{' (Text | Placeholder)* '}' -Selector ::= 'match' (s? '{' s? Expression s? '}')+ -Variant ::= 'when' (s Key)+ s? Pattern -Key ::= Nmtoken | Literal | '*' - -Placeholder ::= '{' s? (Expression | Markup | MarkupEnd) s? '}' - -Expression ::= (Literal | Variable) (s Annotation)? | Annotation -Annotation ::= Function (s Option)* -Option ::= Name s? '=' s? (Literal | Nmtoken | Variable) - -Markup ::= MarkupStart (s Option)* - - - -/* Text */ -Text ::= (TextChar | TextEscape)+ -TextChar ::= AnyChar - ('{' | '}' | Esc) -AnyChar ::= [#x0-#x10FFFF] - [#xD800-#xDFFF] - -/* Names */ -Variable ::= '$' Name -Function ::= ':' Name -MarkupStart ::= '+' Name -MarkupEnd ::= '-' Name -Name ::= NameStart NameChar* -Nmtoken ::= NameChar+ -NameStart ::= [a-zA-Z] | "_" - | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] - | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] - | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] - | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] -NameChar ::= NameStart | [0-9] | "-" | "." | #xB7 - | [#x0300-#x036F] | [#x203F-#x2040] - -/* Literals */ -Literal ::= '(' (LiteralChar | LiteralEscape)* ')' -LiteralChar ::= AnyChar - ('(' | ')' | Esc) - -/* Escape sequences */ -Esc ::= '\' -TextEscape ::= Esc Esc | Esc '{' | Esc '}' -LiteralEscape ::= Esc Esc | Esc '(' | Esc ')' - -/* White space */ -s ::= (#x9 | #xD | #xA | #x20)+ From fe291728229b32c13c70fd08eba876a23b967168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Tue, 28 Feb 2023 12:21:32 +0100 Subject: [PATCH 06/15] Inline the non-ascii-char production --- spec/message.abnf | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 70f5cf6caf..799579091f 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -23,19 +23,18 @@ let = %x6C.65.74 match = %x6D.61.74.63.68 when = %x77.68.65.6E -non-ascii-char = %x80-D7FF / %xE000-10FFFF - text = 1*(text-char / text-escape) text-char = %x0-5B ; omit \ / %x5D-7A ; omit { / %x7C ; omit } - / %x7E-7F - / non-ascii-char + / %x7E-D7FF + / %xE000-10FFFF variable = "$" name function = ":" name markup-start = "+" name markup-end = "-" name + name = name-start *name-char nmtoken = 1*name-char name-start = ALPHA / "_" @@ -49,8 +48,8 @@ name-char = name-start / DIGIT / "-" / "." / %xB7 literal = "(" *(literal-char / literal-escape) ")" literal-char = %x0-27 ; omit ( and ) / %x2A-5B ; omit \ - / %x5D-7F - / non-ascii-char + / %x5D-D7FF + / %xE000-10FFFF text-escape = "\" ("\" / "{" / "}") literal-escape = "\" ("\" / "(" / ")") From 18ec7c1771afe9efa3510d7b6f16debeb793fedb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Tue, 28 Feb 2023 12:36:25 +0100 Subject: [PATCH 07/15] Update the ABNF snippets inside spec/syntax.md --- spec/message.abnf | 20 +++---- spec/syntax.md | 141 ++++++++++++++++++++++++++-------------------- 2 files changed, 89 insertions(+), 72 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 799579091f..bc6fbb4e50 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -24,12 +24,18 @@ match = %x6D.61.74.63.68 when = %x77.68.65.6E text = 1*(text-char / text-escape) -text-char = %x0-5B ; omit \ - / %x5D-7A ; omit { - / %x7C ; omit } - / %x7E-D7FF +text-char = %x0-5B ; omit \ + / %x5D-7A ; omit { + / %x7C ; omit } + / %x7E-D7FF ; omit surrogates / %xE000-10FFFF +literal = "(" *(literal-char / literal-escape) ")" +literal-char = %x0-27 ; omit ( and ) + / %x2A-5B ; omit \ + / %x5D-D7FF ; omit surrogates + / %xE000-10FFFF + variable = "$" name function = ":" name markup-start = "+" name @@ -45,12 +51,6 @@ name-start = ALPHA / "_" name-char = name-start / DIGIT / "-" / "." / %xB7 / %x0300-036F / %x203F-2040 -literal = "(" *(literal-char / literal-escape) ")" -literal-char = %x0-27 ; omit ( and ) - / %x2A-5B ; omit \ - / %x5D-D7FF - / %xE000-10FFFF - text-escape = "\" ("\" / "{" / "}") literal-escape = "\" ("\" / "(" / ")") diff --git a/spec/syntax.md b/spec/syntax.md index 07fe23f1d8..deeb24168f 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -6,8 +6,8 @@ 1. [Design Goals](#design-goals) 1. [Design Restrictions](#design-restrictions) 1. [Overview & Examples](#overview--examples) - 1. [Simple Messages](#simple-messages) - 1. [Simple Placeholders](#simple-placeholders) + 1. [Messages](#messages) + 1. [Placeholders](#placeholders) 1. [Formatting Functions](#formatting-functions) 1. [Markup Elements](#markup-elements) 1. [Selection](#selection) @@ -21,14 +21,14 @@ 1. [Patterns](#patterns) 1. [Placeholders](#placeholders) 1. [Expressions](#expressions) - 1. [Markup Elements](#markup-elements) + 1. [Markup](#markup) 1. [Tokens](#tokens) - 1. [Text](#text) + 1. [Keywords](#keywords) + 1. [Text and Literals](#text-and-literals) 1. [Names](#names) - 1. [Quoted Strings](#quoted-strings) 1. [Escape Sequences](#escape-sequences) 1. [Whitespace](#whitespace) -1. [Complete EBNF](#complete-ebnf) +1. [Complete ABNF](#complete-abnf) ### Introduction to This Section @@ -254,8 +254,10 @@ A *message* MUST be delimited with `{` at the start, and `}` at the end. Whitesp appear outside the delimiters; such whitespace is ignored. No other content is permitted outside the delimiters. -```ebnf -Message ::= Declaration* ( Pattern | Selector Variant+ ) +```abnf +message = [s] *(declaration [s]) body [s] +body = pattern + / (selectors 1*([s] variant)) ``` ### Variable Declarations @@ -264,8 +266,8 @@ A ***declaration*** is an expression binding a variable identifier within the scope of the message to the value of an expression. This local variable can then be used in other expressions within the same message. -```ebnf -Declaration ::= 'let' WhiteSpace Variable '=' '{' Expression '}' +```abnf +declaration = let s variable [s] "=" [s] "{" [s] expression [s] "}" ``` ### Selectors @@ -273,8 +275,9 @@ Declaration ::= 'let' WhiteSpace Variable '=' '{' Expression '}' A ***selector*** is a statement containing one or more expressions which will be used to choose one of the *variants* during formatting. -```ebnf -Selector ::= 'match' ( '{' Expression '}' )+ +```abnf +selectors = match 1*([s] selector) +selector = "{" [s] expression [s] "}" ``` Examples: @@ -298,9 +301,9 @@ A ***variant*** is a keyed *pattern*. The keys are used to match against the selector expressions defined in the `match` statement. The key `*` is a "catch-all" key, matching all selector values. -```ebnf -Variant ::= 'when' ( WhiteSpace VariantKey )+ Pattern -VariantKey ::= Literal | Nmtoken | '*' +```abnf +variant = when 1*(s key) [s] pattern +key = nmtoken / literal / "*" ``` A _well-formed_ message is considered _valid_ if the following requirements are satisfied: @@ -325,8 +328,8 @@ This serves 3 purposes: - The syntax needs to make it as clear as possible which parts of the message body are translatable and which ones are part of the formatting logic definition. -```ebnf -Pattern ::= '{' (Text | Placeholder)* '}' /* ws: explicit */ +```abnf +pattern = "{" *(text / placeholder) "}" ``` Examples: @@ -341,8 +344,8 @@ Whitespace within a *pattern* is meaningful and MUST be preserved. A ***placeholder*** contains either an expression or a markup element. -```ebnf -Placeholder ::= '{' (Expression | Markup | MarkupEnd) '}' +```abnf +placeholder = "{" [s] (expression / markup / markup-end) [s] "}" ``` ### Expressions @@ -357,13 +360,11 @@ other than the operand in front of them. Standalone function calls don't have any operands in front of them. -```ebnf -Expression ::= Operand Annotation? | Annotation -Operand ::= Literal | Variable -Annotation ::= Function Option* -Option ::= Name '=' (Literal | Nmtoken | Variable) -Variable ::= '$' Name /* ws: explicit */ -Function ::= ':' Name /* ws: explicit */ +```abnf +expression = ((literal / variable) [s annotation]) + / annotation +annotation = function *(s option) +option = name [s] "=" [s] (literal / nmtoken / variable) ``` Examples: @@ -400,10 +401,8 @@ each with its own syntax. They mimic XML elements, but do not require well-formedness. Standalone display elements should be represented as function expressions. -```ebnf -Markup ::= MarkupStart Option* -MarkupStart ::= '+' Name /* ws: explicit */ -MarkupEnd ::= '-' Name /* ws: explicit */ +```abnf +markup = markup-start *(s option) ``` Examples: @@ -420,7 +419,17 @@ Examples: The grammar defines the following tokens for the purpose of the lexical analysis. -### Text and literals +### Keywords + +The following three keywords are reserved: `let`, `match`, and `when`. + +```abnf +let = %x6C.65.74 +match = %x6D.61.74.63.68 +when = %x77.68.65.6E +``` + +### Text and Literals _Text_ is the translatable content of a _pattern_, and _Literal_ is used for matching variants and providing input to expressions. @@ -431,19 +440,21 @@ surrogate code points U+D800 through U+DBFF (which cannot be encoded into UTF-8) All code points are preserved. -#### Text - -```ebnf -Text ::= (TextChar | TextEscape)+ /* ws: explicit */ -TextChar ::= AnyChar - ('{' | '}' | Esc) -AnyChar ::= [#x0-#x10FFFF] - [#xD800-#xDBFF] +```abnf +text = 1*(text-char / text-escape) +text-char = %x0-5B ; omit \ + / %x5D-7A ; omit { + / %x7C ; omit } + / %x7E-D7FF ; omit surrogates + / %xE000-10FFFF ``` -#### Literal - -```ebnf -Literal ::= '(' (LiteralChar | LiteralEscape)* ')' /* ws: explicit */ -LiteralChar ::= AnyChar - ('(' | ')' | Esc) +```abnf +literal = "(" *(literal-char / literal-escape) ")" +literal-char = %x0-27 ; omit ( and ) + / %x2A-5B ; omit \ + / %x5D-D7FF ; omit surrogates + / %xE000-10FFFF ``` ### Names @@ -465,16 +476,23 @@ In particular, the grammatical feature data [specified in LDML](https://unicode. and [defined in CLDR](https://unicode-org.github.io/cldr-staging/charts/latest/grammar/index.html) uses Nmtokens. -```ebnf -Name ::= NameStart NameChar* /* ws: explicit */ -Nmtoken ::= NameChar+ /* ws: explicit */ -NameStart ::= [a-zA-Z] | "_" - | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] - | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] - | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] - | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] -NameChar ::= NameStart | [0-9] | "-" | "." | #xB7 - | [#x0300-#x036F] | [#x203F-#x2040] +```abnf +variable = "$" name +function = ":" name +markup-start = "+" name +markup-end = "-" name +``` + +```abnf +name = name-start *name-char +nmtoken = 1*name-char +name-start = ALPHA / "_" + / %xC0-D6 / %xD8-F6 / %xF8-2FF + / %x370-37D / %x37F-1FFF / %x200C-200D + / %x2070-218F / %x2C00-2FEF / %x3001-D7FF + / %xF900-FDCF / %xFDF0-FFFD / %x10000-EFFFF +name-char = name-start / DIGIT / "-" / "." / %xB7 + / %x0300-036F / %x203F-2040 ``` ### Escape Sequences @@ -482,10 +500,9 @@ NameChar ::= NameStart | [0-9] | "-" | "." | #xB7 Escape sequences are introduced by the backslash character (`\`). They are allowed in translatable text as well as in literals. -```ebnf -Esc ::= '\' -TextEscape ::= Esc Esc | Esc '{' | Esc '}' -LiteralEscape ::= Esc Esc | Esc '(' | Esc ')' +```abnf +text-escape = "\" ("\" / "{" / "}") +literal-escape = "\" ("\" / "(" / ")") ``` ### Whitespace @@ -496,12 +513,12 @@ Inside _patterns_, whitespace is part of the translatable content and is recorded and stored verbatim. Whitespace is not significant outside translatable text, except where required by the syntax. -```ebnf -WhiteSpace ::= #x9 | #xD | #xA | #x20 /* ws: definition */ +```abnf +s = 1*(%x09 / %x0D / %x0A / %x20) ``` -## Complete EBNF +## Complete ABNF -The complete EBNF is available as [`message.ebnf`](./message.ebnf). -It uses the [W3C flavor](https://www.w3.org/TR/xml/#sec-notation) of the BNF notation. -The grammar is an LL(1) grammar without backtracking. +The grammar is formally defined in [`message.abnf`](./message.abnf) +using the ABNF notation, +as specified by [RFC 5234](https://datatracker.ietf.org/doc/html/rfc5234). From 6ffc08d478cb5f050987c9b0a01f3eb466129d96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Tue, 28 Feb 2023 16:26:36 +0100 Subject: [PATCH 08/15] Simplify text-escape and literal-escape Co-authored-by: Eemeli Aro --- spec/message.abnf | 4 ++-- spec/syntax.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index bc6fbb4e50..9c6eae686d 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -51,7 +51,7 @@ name-start = ALPHA / "_" name-char = name-start / DIGIT / "-" / "." / %xB7 / %x0300-036F / %x203F-2040 -text-escape = "\" ("\" / "{" / "}") -literal-escape = "\" ("\" / "(" / ")") +text-escape = "\\" / "\{" / "\}" +literal-escape = "\\" / "\(" / "\)" s = 1*(%x09 / %x0D / %x0A / %x20) diff --git a/spec/syntax.md b/spec/syntax.md index deeb24168f..af3baf382c 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -501,8 +501,8 @@ Escape sequences are introduced by the backslash character (`\`). They are allowed in translatable text as well as in literals. ```abnf -text-escape = "\" ("\" / "{" / "}") -literal-escape = "\" ("\" / "(" / ")") +text-escape = "\\" / "\{" / "\}" +literal-escape = "\\" / "\(" / "\)" ``` ### Whitespace From 8a003514ae5b75553132859effb90f0a50143a42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Tue, 28 Feb 2023 22:50:59 +0100 Subject: [PATCH 09/15] Introduce the backslash production to make text-escape and literal-escape easier to understand Co-authored-by: Richard Gibson --- spec/message.abnf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 9c6eae686d..1f5871a05c 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -51,7 +51,9 @@ name-start = ALPHA / "_" name-char = name-start / DIGIT / "-" / "." / %xB7 / %x0300-036F / %x203F-2040 -text-escape = "\\" / "\{" / "\}" -literal-escape = "\\" / "\(" / "\)" +text-escape = backslash ( backslash / "{" / "}" ) +literal-escape = backslash ( backslash / "(" / ")" ) + +backslash = %x5C ; U+005C REVERSE SOLIDUS "\" s = 1*(%x09 / %x0D / %x0A / %x20) From 7ef8a11ee4c475d45a741708b1a61fbd785e396d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Tue, 28 Feb 2023 22:52:59 +0100 Subject: [PATCH 10/15] Denormalize the alternatives inside the placeholder production Co-authored-by: Richard Gibson --- spec/message.abnf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spec/message.abnf b/spec/message.abnf index 1f5871a05c..4647367566 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -10,7 +10,9 @@ selector = "{" [s] expression [s] "}" variant = when 1*(s key) [s] pattern key = nmtoken / literal / "*" -placeholder = "{" [s] (expression / markup / markup-end) [s] "}" +placeholder = "{" [s] expression [s] "}" + / "{" [s] markup-start *(s option) [s] "}" + / "{" [s] markup-end [s] "}" expression = ((literal / variable) [s annotation]) / annotation From f3a73d85f24df7f1e7bbc99103bdc378cf711ee8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Tue, 28 Feb 2023 22:54:04 +0100 Subject: [PATCH 11/15] Link XML's Name and Nmtoken Co-authored-by: Richard Gibson --- spec/message.abnf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 4647367566..9ffc1581eb 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -43,8 +43,8 @@ function = ":" name markup-start = "+" name markup-end = "-" name -name = name-start *name-char -nmtoken = 1*name-char +name = name-start *name-char ; matches XML https://www.w3.org/TR/xml/#NT-Name +nmtoken = 1*name-char ; matches XML https://www.w3.org/TR/xml/#NT-Nmtokens name-start = ALPHA / "_" / %xC0-D6 / %xD8-F6 / %xF8-2FF / %x370-37D / %x37F-1FFF / %x200C-200D From ad48d07740bd113421cb8d1c586cb0d90f8eda62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Wed, 1 Mar 2023 08:52:29 +0100 Subject: [PATCH 12/15] Use the builtin ABNF whitespace productions Co-authored-by: Richard Gibson --- spec/message.abnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/message.abnf b/spec/message.abnf index 9ffc1581eb..f6a765be48 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -58,4 +58,4 @@ literal-escape = backslash ( backslash / "(" / ")" ) backslash = %x5C ; U+005C REVERSE SOLIDUS "\" -s = 1*(%x09 / %x0D / %x0A / %x20) +s = 1*( SP / HTAB / CR / LF ) From 47877297d1625362f7d662b742de6ae045ba9735 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Wed, 1 Mar 2023 09:11:29 +0100 Subject: [PATCH 13/15] Remove the unused markup production Co-authored-by: Eemeli Aro --- spec/message.abnf | 3 --- 1 file changed, 3 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index f6a765be48..179a1e3e88 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -18,9 +18,6 @@ expression = ((literal / variable) [s annotation]) / annotation annotation = function *(s option) option = name [s] "=" [s] (literal / nmtoken / variable) - -markup = markup-start *(s option) - let = %x6C.65.74 match = %x6D.61.74.63.68 when = %x77.68.65.6E From 4205cd200eefbfaf7bd4a76c4d777f740aa49dca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Wed, 1 Mar 2023 09:26:13 +0100 Subject: [PATCH 14/15] Add comment about keywords being lowercase --- spec/message.abnf | 8 +++++--- spec/syntax.md | 7 ++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 179a1e3e88..278751b8f5 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -18,9 +18,11 @@ expression = ((literal / variable) [s annotation]) / annotation annotation = function *(s option) option = name [s] "=" [s] (literal / nmtoken / variable) -let = %x6C.65.74 -match = %x6D.61.74.63.68 -when = %x77.68.65.6E + +; reserved keywords are always lowercase +let = %x6C.65.74 ; "let" +match = %x6D.61.74.63.68 ; "match" +when = %x77.68.65.6E ; "when" text = 1*(text-char / text-escape) text-char = %x0-5B ; omit \ diff --git a/spec/syntax.md b/spec/syntax.md index af3baf382c..cc854e634a 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -424,9 +424,10 @@ The grammar defines the following tokens for the purpose of the lexical analysis The following three keywords are reserved: `let`, `match`, and `when`. ```abnf -let = %x6C.65.74 -match = %x6D.61.74.63.68 -when = %x77.68.65.6E +; reserved keywords are always lowercase +let = %x6C.65.74 ; "let" +match = %x6D.61.74.63.68 ; "match" +when = %x77.68.65.6E ; "when" ``` ### Text and Literals From 517612d288681618a3467c31de302b0e55961f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Ma=C5=82olepszy?= Date: Wed, 1 Mar 2023 09:30:49 +0100 Subject: [PATCH 15/15] Update syntax.md with the recent suggestions to the ABNF --- spec/message.abnf | 3 +-- spec/syntax.md | 19 +++++++++---------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 278751b8f5..feb3b3b3b0 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -54,7 +54,6 @@ name-char = name-start / DIGIT / "-" / "." / %xB7 text-escape = backslash ( backslash / "{" / "}" ) literal-escape = backslash ( backslash / "(" / ")" ) - -backslash = %x5C ; U+005C REVERSE SOLIDUS "\" +backslash = %x5C ; U+005C REVERSE SOLIDUS "\" s = 1*( SP / HTAB / CR / LF ) diff --git a/spec/syntax.md b/spec/syntax.md index cc854e634a..1006648e47 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -345,7 +345,9 @@ Whitespace within a *pattern* is meaningful and MUST be preserved. A ***placeholder*** contains either an expression or a markup element. ```abnf -placeholder = "{" [s] (expression / markup / markup-end) [s] "}" +placeholder = "{" [s] expression [s] "}" + / "{" [s] markup-start *(s option) [s] "}" + / "{" [s] markup-end [s] "}" ``` ### Expressions @@ -401,10 +403,6 @@ each with its own syntax. They mimic XML elements, but do not require well-formedness. Standalone display elements should be represented as function expressions. -```abnf -markup = markup-start *(s option) -``` - Examples: ``` @@ -485,8 +483,8 @@ markup-end = "-" name ``` ```abnf -name = name-start *name-char -nmtoken = 1*name-char +name = name-start *name-char ; matches XML https://www.w3.org/TR/xml/#NT-Name +nmtoken = 1*name-char ; matches XML https://www.w3.org/TR/xml/#NT-Nmtokens name-start = ALPHA / "_" / %xC0-D6 / %xD8-F6 / %xF8-2FF / %x370-37D / %x37F-1FFF / %x200C-200D @@ -502,8 +500,9 @@ Escape sequences are introduced by the backslash character (`\`). They are allowed in translatable text as well as in literals. ```abnf -text-escape = "\\" / "\{" / "\}" -literal-escape = "\\" / "\(" / "\)" +text-escape = backslash ( backslash / "{" / "}" ) +literal-escape = backslash ( backslash / "(" / ")" ) +backslash = %x5C ; U+005C REVERSE SOLIDUS "\" ``` ### Whitespace @@ -515,7 +514,7 @@ whitespace is part of the translatable content and is recorded and stored verbat Whitespace is not significant outside translatable text, except where required by the syntax. ```abnf -s = 1*(%x09 / %x0D / %x0A / %x20) +s = 1*( SP / HTAB / CR / LF ) ``` ## Complete ABNF