@@ -78,6 +78,7 @@ class Lexer:
78
78
(3 , 3 ): _reserved_3_1 ,
79
79
(3 , 4 ): _reserved_3_1 ,
80
80
(3 , 5 ): _reserved_3_5 ,
81
+ (3 , 6 ): _reserved_3_5 ,
81
82
}
82
83
"""
83
84
A map from a tuple (*major*, *minor*) corresponding to Python version to
@@ -86,6 +87,9 @@ class Lexer:
86
87
87
88
_string_prefixes_3_1 = frozenset (["" , "r" , "b" , "br" ])
88
89
_string_prefixes_3_3 = frozenset (["" , "r" , "u" , "b" , "br" , "rb" ])
90
+ _string_prefixes_3_6 = _string_prefixes_3_3 .union (frozenset ([
91
+ "f" , "F" , "fr" , "Fr" , "fR" , "FR" , "rf" , "rF" , "Rf" , "RF"
92
+ ]))
89
93
90
94
# holy mother of god why
91
95
_string_prefixes = {
@@ -97,6 +101,7 @@ class Lexer:
97
101
(3 , 3 ): _string_prefixes_3_3 ,
98
102
(3 , 4 ): _string_prefixes_3_3 ,
99
103
(3 , 5 ): _string_prefixes_3_3 ,
104
+ (3 , 6 ): _string_prefixes_3_6 ,
100
105
}
101
106
"""
102
107
A map from a tuple (*major*, *minor*) corresponding to Python version to
@@ -123,7 +128,8 @@ def __init__(self, source_buffer, version, diagnostic_engine, interactive=False)
123
128
try :
124
129
reserved = self ._reserved [version ]
125
130
except KeyError :
126
- raise NotImplementedError ("pythonparser.lexer.Lexer cannot lex Python %s" % str (version ))
131
+ raise NotImplementedError ("pythonparser.lexer.Lexer cannot lex Python %s" %
132
+ str (version ))
127
133
128
134
# Sort for the regexp to obey longest-match rule.
129
135
re_reserved = sorted (reserved , reverse = True , key = len )
@@ -136,6 +142,14 @@ def __init__(self, source_buffer, version, diagnostic_engine, interactive=False)
136
142
else :
137
143
id_xid = "X"
138
144
145
+ # Python 3.6+ permits underscores as number delimiters
146
+ if self .version >= (3 , 6 ):
147
+ underscore = "_?"
148
+ digit = "[0-9] (?: _? [0-9] )*"
149
+ else :
150
+ underscore = ""
151
+ digit = "[0-9]+"
152
+
139
153
# To speed things up on CPython, we use the re module to generate a DFA
140
154
# from our token set and execute it in C. Every result yielded by
141
155
# iterating this regular expression has exactly one non-empty group
@@ -156,19 +170,21 @@ def __init__(self, source_buffer, version, diagnostic_engine, interactive=False)
156
170
([\n]|[\r][\n]|[\r]) # 3 newline
157
171
| (\#.*) # 4 comment
158
172
| ( # 5 floating point or complex literal
159
- (?: [0-9]* \. [0-9]+
160
- | [0-9]+ \.?
161
- ) [eE] [+-]? [0-9]+
162
- | [0-9]* \. [0-9]+
163
- | [0-9]+ \.
173
+ (?: \. {d}
174
+ | {d} \. {d}
175
+ | {d} \.?
176
+ ) [eE] [+-]? {d}
177
+ | \. {d}
178
+ | {d} \. {d}
179
+ | {d} \.
164
180
) ([jJ])? # ?6 complex suffix
165
- | ([0-9]+ ) [jJ] # 7 complex literal
181
+ | ({d} ) [jJ] # 7 complex literal
166
182
| (?: # integer literal
167
- ( [1-9] [0-9]* ) # 8 dec
168
- | 0[oO] ( [0-7]+ ) # 9 oct
169
- | 0[xX] ( [0-9A-Fa-f]+ ) # 10 hex
170
- | 0[bB] ( [01]+ ) # 11 bin
171
- | ( [0-9] [0-9]* ) # 12 bare oct
183
+ ( [1-9] (?: {u} [0-9] )* ) # 8 dec
184
+ | 0[oO] ( (?: {u} [0-7] )+ ) # 9 oct
185
+ | 0[xX] ( (?: {u} [0-9A-Fa-f] ) + ) # 10 hex
186
+ | 0[bB] ( (?: {u} [01] )+ ) # 11 bin
187
+ | ( [0-9] (?: {u} [0-9] )* ) # 12 bare oct
172
188
)
173
189
[Ll]?
174
190
| ([BbUu]?[Rr]?) # ?13 string literal options
@@ -185,8 +201,14 @@ def __init__(self, source_buffer, version, diagnostic_engine, interactive=False)
185
201
| (\p{{{id_xid}ID_Start}}\p{{{id_xid}ID_Continue}}*) # 23 Unicode identifier
186
202
| ($) # 24 end-of-file
187
203
)
188
- """ .format (keywords = re_keywords , operators = re_operators ,
189
- id_xid = id_xid ), re .VERBOSE | re .UNICODE )
204
+ """ .format (
205
+ u = underscore ,
206
+ d = digit ,
207
+ keywords = re_keywords ,
208
+ operators = re_operators ,
209
+ id_xid = id_xid
210
+ ),
211
+ re .VERBOSE | re .UNICODE )
190
212
191
213
# These are identical for all lexer instances.
192
214
_lex_escape_pattern = r"""
@@ -327,25 +349,34 @@ def _refill(self, eof_token):
327
349
self .new_line = False
328
350
329
351
if match .group (5 ) is not None : # floating point or complex literal
352
+ literal = match .group (5 ).replace ("_" , "" )
330
353
if match .group (6 ) is None :
331
- self .queue .append (Token (tok_range , "float" , float (match .group (5 ))))
354
+ self .queue .append (Token (tok_range , "float" ,
355
+ float (literal )))
332
356
else :
333
- self .queue .append (Token (tok_range , "complex" , float (match .group (5 )) * 1j ))
357
+ self .queue .append (Token (tok_range , "complex" ,
358
+ float (literal ) * 1j ))
334
359
335
360
elif match .group (7 ) is not None : # complex literal
336
- self .queue .append (Token (tok_range , "complex" , int (match .group (7 )) * 1j ))
361
+ literal = match .group (7 ).replace ("_" , "" )
362
+ self .queue .append (Token (tok_range , "complex" ,
363
+ int (literal ) * 1j ))
337
364
338
365
elif match .group (8 ) is not None : # integer literal, dec
339
- self .queue .append (self ._make_int_token (tok_range , match .group (1 ), 10 ))
366
+ literal = match .group (8 ).replace ("_" , "" )
367
+ self .queue .append (self ._make_int_token (tok_range , literal , 10 ))
340
368
341
369
elif match .group (9 ) is not None : # integer literal, oct
342
- self .queue .append (self ._make_int_token (tok_range , match .group (1 ), 8 ))
370
+ literal = match .group (9 ).replace ("_" , "" )
371
+ self .queue .append (self ._make_int_token (tok_range , literal , 8 ))
343
372
344
373
elif match .group (10 ) is not None : # integer literal, hex
345
- self .queue .append (self ._make_int_token (tok_range , match .group (1 ), 16 ))
374
+ literal = match .group (10 ).replace ("_" , "" )
375
+ self .queue .append (self ._make_int_token (tok_range , literal , 16 ))
346
376
347
377
elif match .group (11 ) is not None : # integer literal, bin
348
- self .queue .append (self ._make_int_token (tok_range , match .group (1 ), 2 ))
378
+ literal = match .group (11 ).replace ("_" , "" )
379
+ self .queue .append (self ._make_int_token (tok_range , literal , 2 ))
349
380
350
381
elif match .group (12 ) is not None : # integer literal, bare oct
351
382
if len (match .group (12 )) > 1 and self .version >= (3 , 0 ):
@@ -410,6 +441,10 @@ def _string_literal(self, options, begin_span, data, data_span, end_span):
410
441
{"prefix" : options , "major" : self .version [0 ], "minor" : self .version [1 ]},
411
442
begin_range )
412
443
self .diagnostic_engine .process (error )
444
+ if "f" in options or "F" in options :
445
+ error = diagnostic .Diagnostic (
446
+ "error" , "pythonparser does not yet support format strings" ,
447
+ begin_range )
413
448
414
449
self .queue .append (Token (begin_range , "strbegin" , options ))
415
450
self .queue .append (Token (data_range ,
0 commit comments