Skip to content

Commit 41e9cd7

Browse files
pekkaklarckyanne
authored andcommitted
Support non-ASCII spaces with new parser robotframework#3121
They can be used - as separators everywhere, - in arguments, - in setting/header names, - etc. For loop support isn't fully ready yet.
1 parent 3397244 commit 41e9cd7

File tree

10 files changed

+77
-107
lines changed

10 files changed

+77
-107
lines changed

atest/robot/parsing/non_ascii_spaces.robot

Lines changed: 18 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5,58 +5,19 @@ Resource atest_resource.robot
55
*** Test Cases ***
66
In suite settings
77
${tc} = Check Test Case In test and keywords
8-
Check Log Message ${tc.setup.kws[0].msgs[0]} ': :'
9-
Check Log Message ${tc.teardown.kws[0].msgs[0]} ': :'
10-
Normalization deprecated 0 Test\\xa0Setup 2
11-
Normalization deprecated 1 No-break\\xa0space 2
12-
Normalization deprecated 2 :\\xa0: 2
13-
Normalization deprecated 3 Test\\u1680Teardown 3
14-
Normalization deprecated 4 Ogham\\u1680space\\u1680mark 3
15-
Normalization deprecated 5 :\\u1680: 3
16-
17-
In variables
18-
Normalization deprecated 6 \${NO-BREAK\\xa0SPACE}\\xa0= 6
19-
Normalization deprecated 7 :\\xa0: 6
20-
Normalization deprecated 8 \${OGHAM\\u1680SPACE\\u1680MARK}\\u1680= 7
21-
Normalization deprecated 9 :\\u1680: 7
22-
Normalization deprecated 10 \${IDEOGRAPHIC\\u3000SPACE}\\u3000= 8
23-
Normalization deprecated 11 :\\u3000: 8
8+
Check Log Message ${tc.setup.kws[0].msgs[0]} ':\\xa0:'
9+
Check Log Message ${tc.setup.kws[1].msgs[0]} : :
10+
Check Log Message ${tc.teardown.kws[0].msgs[0]} ':\\u1680:'
11+
Check Log Message ${tc.teardown.kws[1].msgs[0]} : :
2412

2513
In test and keywords
2614
${tc} = Check Test Case ${TESTNAME}
27-
Normalization deprecated 12 [\\xa0Tags\\u1680] 14
28-
Normalization deprecated 13 NBSP\\xa0and\\u1680Ogham 14
29-
Normalization deprecated 14 \${x}\\xa0= 15
30-
Normalization deprecated 15 No-break\\xa0space 15
31-
Normalization deprecated 16 :\\xa0: 15
32-
Normalization deprecated 17 \${x}\\u1680= 16
33-
Normalization deprecated 18 Ogham\\u1680space\\u1680mark 16
34-
Normalization deprecated 19 :\\u1680: 16
35-
Normalization deprecated 20 \${x}\\u3000= 17
36-
Normalization deprecated 21 Ideographic\\u3000space 17
37-
Normalization deprecated 22 :\\u3000: 17
38-
Normalization deprecated 23 No-break\\xa0space 21
39-
Normalization deprecated 24 :\\xa0: 21
40-
Normalization deprecated 25 No-break\\xa0space 25
41-
Normalization deprecated 26 :\\xa0: 25
42-
Normalization deprecated 27 No-break\\xa0space 28
43-
Normalization deprecated 28 [\\xa0Arguments\\xa0] 29
44-
Normalization deprecated 29 Should\\xa0be\\xa0equal 31
45-
Normalization deprecated 30 Should\\xa0be\\xa0equal 32
46-
Normalization deprecated 31 Should\\xa0be\\xa0equal 33
47-
Normalization deprecated 32 \${NO-BREAK\\xa0SPACE} 33
48-
Normalization deprecated 33 Ogham\\u1680space\\u1680mark 35
49-
Normalization deprecated 34 [\\u1680Arguments\\u1680] 36
50-
Normalization deprecated 35 Should\\u1680be\\u1680equal 38
51-
Normalization deprecated 36 Should\\u1680be\\u1680equal 39
52-
Normalization deprecated 37 Should\\u1680be\\u1680equal 40
53-
Normalization deprecated 38 \${OGHAM\\u1680SPACE\\u1680MARK} 40
54-
Normalization deprecated 39 Ideographic\\u3000space 42
55-
Normalization deprecated 40 [\\u3000Arguments\\u3000] 43
56-
Normalization deprecated 41 Should\\u3000be\\u3000equal 45
57-
Normalization deprecated 42 Should\\u3000be\\u3000equal 46
58-
Normalization deprecated 43 Should\\u3000be\\u3000equal 47
59-
Normalization deprecated 44 \${IDEOGRAPHIC\\u3000SPACE} 47
15+
Check Log Message ${tc.kws[0].kws[0].msgs[0]} ':\\xa0:'
16+
Check Log Message ${tc.kws[0].kws[1].msgs[0]} : :
17+
Check Log Message ${tc.kws[1].kws[0].msgs[0]} ':\\u1680:'
18+
Check Log Message ${tc.kws[1].kws[1].msgs[0]} : :
19+
Check Log Message ${tc.kws[2].kws[0].msgs[0]} ':\\u3000:'
20+
Check Log Message ${tc.kws[2].kws[1].msgs[0]} : :
6021

6122
As separator
6223
Check Test Case ${TESTNAME}
@@ -66,13 +27,12 @@ With pipes
6627

6728
In header
6829
Check Test Case ${TESTNAME}
69-
Normalization deprecated 45 ***\\xa0Test\\u1680Cases\\u3000*** 49
7030

71-
*** Keywords ***
72-
Normalization deprecated
73-
[Arguments] ${index} ${text} ${line}
74-
${path} = Normalize Path ${DATADIR}/parsing/non_ascii_spaces.robot
75-
${msg} = Catenate
76-
... Converting whitespace characters to ASCII spaces during parsing is deprecated.
77-
... Fix '${text}' in file '${path}' on line ${line}.
78-
Check Log Message ${ERRORS}[${index}] ${msg} WARN
31+
In test casename
32+
Check Test Case ${TESTNAME}
33+
34+
In WITH NAME
35+
Check Test Case ${TESTNAME}
36+
37+
In FOR separator
38+
Check Test Case ${TESTNAME}

atest/robot/parsing/non_breaking_space.robot

Lines changed: 0 additions & 14 deletions
This file was deleted.

atest/testdata/parsing/nbsp.robot

Lines changed: 0 additions & 6 deletions
This file was deleted.

atest/testdata/parsing/nbsp.tsv

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,73 @@
11
*** Settings ***
22
Test Setup No-break space : :
33
Test Teardown Ogham space mark : :
4+
Library         OperatingSystem      WITH NAME   OS
45

56
*** Variables ***
67
${NO-BREAK SPACE} = : :
7-
${OGHAM SPACE MARK} = : :
8-
${IDEOGRAPHIC SPACE} = : :
8+
${OGHAM SPACE MARK} = : :      # Trailing         
9+
${IDEOGRAPHIC SPACE} = : :                  
910

1011
*** Test Cases ***
1112
In test and keywords
1213
[Documentation] Used in keyword name, arguments and assign.
13-
... In RF 3.2 arguments shouldn't be normalized.
1414
[ Tags ] NBSP and Ogham
1515
${x} = No-break space : :
16-
${x} = Ogham space mark : :
17-
${x} = Ideographic space : :
16+
${x} = Ogham space mark : :      # Trailing         
17+
${x} = Ideographic space : :                  
1818

1919
As separator
20-
[Documentation] In RF 3.1 only NBSP works
2120
    No-break space    : :
21+
     Ogham space mark    : :
22+
    Ideographic space    : :
2223

2324
With pipes
24-
[Documentation] In RF 3.1 only NBSP works
2525
| | No-break space  | : : |
26+
| | Ogham space mark | : : |
27+
| | Ideographic space | : : |
2628

2729
*** Keywords ***
2830
No-break space
2931
[ Arguments ] ${arg}
30-
Log ${arg} repr=True
31-
Should be equal ${arg} : :
32+
Log ${arg} formatter=repr
33+
Log ${arg}
34+
Should be equal ${arg} : :
3235
Should be equal ${arg} ${NO-BREAK SPACE}
3336
Should be equal ${arg} ${NO-BREAK SPACE}
3437

3538
Ogham space mark
3639
[ Arguments ] ${arg}
37-
Log ${arg} repr=True
38-
Should be equal ${arg} : :
40+
Log ${arg} formatter=repr
41+
Log ${arg}
42+
Should be equal ${arg} : :
3943
Should be equal ${arg} ${OGHAM SPACE MARK}
4044
Should be equal ${arg} ${OGHAM SPACE MARK}
4145

4246
Ideographic space
4347
[ Arguments ] ${arg}
44-
Log ${arg} repr=True
45-
Should be equal ${arg} : :
48+
Log ${arg} formatter=repr
49+
Log ${arg}
50+
Should be equal ${arg} : :
4651
Should be equal ${arg} ${IDEOGRAPHIC SPACE}
4752
Should be equal ${arg} ${IDEOGRAPHIC SPACE}
4853

4954
*** Test Cases ***
5055
In header
5156
No operation
57+
58+
In test case name
59+
No operation
60+
61+
In WITH NAME
62+
OS.Directory Should Exist ${CURDIR}
63+
64+
In FOR separator
65+
FOR ${index} IN RANGE 1
66+
Should Be Equal ${index} ${0}
67+
END
68+
FOR ${index} ${item} IN ENUMERATE value
69+
Should Be Equal ${index}: ${item} 0: value
70+
END
71+
FOR ${tag} IN ZIP ${TEST TAGS}
72+
Fail Should not be executed
73+
END

src/robot/parsing/lexer/lexers.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# limitations under the License.
1515

1616
from robot.variables import is_var
17+
from robot.utils import normalize_whitespace
1718

1819
from .tokens import Token
1920

@@ -107,10 +108,13 @@ class SectionLexer(BlockLexer):
107108

108109
@classmethod
109110
def handles(cls, statement):
110-
# TODO: Non-ASCII spaces
111111
marker = statement[0].value
112112
return (marker.startswith('*') and
113-
marker.strip('* ').title() in cls.markers)
113+
cls._normalize(marker) in cls.markers)
114+
115+
@classmethod
116+
def _normalize(cls, marker):
117+
return normalize_whitespace(marker).strip('* ').title()
114118

115119
def accepts_more(self, statement):
116120
return not statement[0].value.startswith('*')
@@ -218,7 +222,6 @@ def lex(self, ctx):
218222

219223

220224
class TestCaseSectionLexer(SectionLexer):
221-
# FIXME: Non-ASCII spaces
222225
markers = ('Test Case', 'Test Cases', 'Task', 'Tasks')
223226

224227
def lexer_classes(self):
@@ -302,6 +305,7 @@ def handles(cls, statement):
302305

303306

304307
class ForLoopLexer(StatementLexer):
308+
_separators = ('IN', 'IN RANGE', 'IN ENUMERATE', 'IN ZIP')
305309

306310
@classmethod
307311
def handles(cls, statement):
@@ -310,7 +314,7 @@ def handles(cls, statement):
310314
marker.startswith(':') and
311315
marker.replace(':', '').replace(' ', '').upper() == 'FOR')
312316

313-
def lex(self, ctc):
317+
def lex(self, ctx):
314318
separator_seen = False
315319
arguments_seen = False
316320
self.statement[0].type = Token.FOR
@@ -325,8 +329,7 @@ def lex(self, ctc):
325329
def _is_separator(self, value, arguments_seen, separator_seen):
326330
if separator_seen or not arguments_seen:
327331
return False
328-
# FIXME: Non-ASCII spaces
329-
return value in ('IN', 'IN RANGE', 'IN ENUMERATE', 'IN ZIP')
332+
return normalize_whitespace(value) in self._separators
330333

331334

332335
class EndLexer(StatementLexer):

src/robot/parsing/lexer/settings.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
from robot.utils import normalize_whitespace
17+
1618
from .tokens import Token
1719

1820

@@ -51,7 +53,7 @@ def _validate(self, name, normalized, statement):
5153
% (name, len(statement) - 1))
5254

5355
def _normalize_name(self, name):
54-
upper = name.upper() # TODO: Non-ASCII spaces
56+
upper = normalize_whitespace(name).upper()
5557
if upper in self.aliases:
5658
return self.aliases[upper]
5759
return upper
@@ -61,7 +63,6 @@ def _format_name(self, name):
6163

6264

6365
class TestCaseFileSettings(Settings):
64-
# FIXME: Non-ASCII spaces
6566
names = (
6667
'DOCUMENTATION',
6768
'SUITE SETUP',

src/robot/parsing/nodes.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from ast import AST
1717
import re
1818

19+
from robot.utils import normalize_whitespace
20+
1921

2022
class Node(AST):
2123
_fields = ()
@@ -27,8 +29,8 @@ def _add_joiners(self, values):
2729
yield self._joiner_based_on_eol_escapes(item)
2830

2931
def _joiner_based_on_eol_escapes(self, item):
30-
_end_of_line_escapes = re.compile(r'(\\+)n?$')
31-
match = _end_of_line_escapes.search(item)
32+
eol_escapes = re.compile(r'(\\+)n?$')
33+
match = eol_escapes.search(item)
3234
if match and len(match.group(1)) % 2 == 1:
3335
return ''
3436
return '\n'
@@ -74,7 +76,7 @@ class TestCaseSection(Node):
7476

7577
def __init__(self, tests, header):
7678
self.tests = tests
77-
self.header = header[0].strip("*").strip()
79+
self.header = header[0].strip('*').strip()
7880

7981

8082
class KeywordSection(Node):
@@ -151,7 +153,7 @@ def __init__(self, name, args):
151153
self.alias = alias
152154

153155
def _split_alias(self, args):
154-
if len(args) > 1 and args[-2] == 'WITH NAME':
156+
if len(args) > 1 and normalize_whitespace(args[-2]) == 'WITH NAME':
155157
return args[:-2], args[-1]
156158
return args, None
157159

src/robot/utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
from .match import eq, Matcher, MultiMatcher
5252
from .misc import (plural_or_not, printable_name, roundup, seq2str,
5353
seq2str2)
54-
from .normalizing import lower, normalize, NormalizedDict
54+
from .normalizing import lower, normalize, normalize_whitespace, NormalizedDict
5555
from .platform import (IRONPYTHON, JAVA_VERSION, JYTHON, PY_VERSION,
5656
PY2, PY3, PYPY, UNIXY, WINDOWS, RERAISED_EXCEPTIONS)
5757
from .recommendations import RecommendationFinder

src/robot/utils/normalizing.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from collections.abc import MutableMapping
1818
except ImportError:
1919
from collections import MutableMapping
20+
import re
2021

2122
from .platform import IRONPYTHON, PY_VERSION, PY3
2223
from .robottypes import is_dict_like, is_unicode
@@ -45,6 +46,10 @@ def normalize(string, ignore=(), caseless=True, spaceless=True):
4546
return string
4647

4748

49+
def normalize_whitespace(string):
50+
return re.sub(r'\s', ' ', string, flags=re.UNICODE)
51+
52+
4853
# http://ironpython.codeplex.com/workitem/33133
4954
if IRONPYTHON and PY_VERSION < (2, 7, 5):
5055
def lower(string):

0 commit comments

Comments
 (0)