Skip to content

Commit 41a8f60

Browse files
committed
Faster searching of variables in strings.
Based on `timeit` this implementation is considerably faster. For example, ``` python -m timeit 'from robot.variables.search import search_variable' 'search_variable("${var}")' ``` reports ~3.5usec per loop with the old implemenation and only ~2.0usec per loop with the new one. Although the difference in percentages is huge, also the old code was so was that this doesn't affect the actual execution times too much. For example, the execution time of the following test dropped from ~910msec to ~880msec. Not a huge drop, but the difference is measurable and adds up with longer suites. ``` *** Variables *** ${var} var @{list} one two *** Test Cases *** Example FOR ${i} IN RANGE 1000 Log ${var} Log Some text with ${var} and more with ${i + ${1}} still more with ${list}[0] END ``` One reason for the speedup is having the core logic in loop without any function calls. That results with somewhat complicated code, but the old state machine wasn't that easy to understand either and this implementation is quite a bit shorter.
1 parent cbd4507 commit 41a8f60

File tree

1 file changed

+70
-112
lines changed

1 file changed

+70
-112
lines changed

src/robot/variables/search.py

Lines changed: 70 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
def search_variable(string, identifiers='$@&%*', ignore_errors=False):
2323
if not (is_string(string) and '{' in string):
2424
return VariableMatch(string)
25-
return VariableSearcher(identifiers, ignore_errors).search(string)
25+
return _search_variable(string, identifiers, ignore_errors)
2626

2727

2828
def contains_variable(string, identifiers='$@&'):
@@ -142,119 +142,77 @@ def __str__(self):
142142
return '%s{%s}%s' % (self.identifier, self.base, items)
143143

144144

145-
class VariableSearcher:
145+
def _search_variable(string, identifiers, ignore_errors=False):
146+
start = _find_variable_start(string, identifiers)
147+
if start < 0:
148+
return VariableMatch(string)
146149

147-
def __init__(self, identifiers, ignore_errors=False):
148-
self.identifiers = identifiers
149-
self._ignore_errors = ignore_errors
150-
self.start = -1
151-
self.variable_chars = []
152-
self.item_chars = []
153-
self.items = []
154-
self._open_brackets = 0 # Used both with curly and square brackets
155-
self._escaped = False
156-
157-
def search(self, string):
158-
if not self._search(string):
150+
match = VariableMatch(string, identifier=string[start], start=start)
151+
left_brace, right_brace = '{', '}'
152+
open_braces = 1
153+
escaped = False
154+
items = []
155+
indices_and_chars = enumerate(string[start+2:], start=start+2)
156+
157+
for index, char in indices_and_chars:
158+
if char == left_brace and not escaped:
159+
open_braces += 1
160+
161+
elif char == right_brace and not escaped:
162+
open_braces -= 1
163+
164+
if open_braces == 0:
165+
next_char = string[index+1] if index+1 < len(string) else None
166+
167+
if left_brace == '{': # Parsing name.
168+
match.base = string[start+2:index]
169+
if match.identifier not in '$@&' or next_char != '[':
170+
match.end = index + 1
171+
break
172+
left_brace, right_brace = '[', ']'
173+
174+
else: # Parsing items.
175+
items.append(string[start+1:index])
176+
if next_char != '[':
177+
match.end = index + 1
178+
match.items = tuple(items)
179+
break
180+
181+
next(indices_and_chars) # Consume '['.
182+
start = index + 1 # Start of the next item.
183+
open_braces = 1
184+
185+
else:
186+
escaped = False if char != '\\' else not escaped
187+
188+
if open_braces:
189+
if ignore_errors:
159190
return VariableMatch(string)
160-
match = VariableMatch(string=string,
161-
identifier=self.variable_chars[0],
162-
base=''.join(self.variable_chars[2:-1]),
163-
start=self.start,
164-
end=self.start + len(self.variable_chars))
165-
if self.items:
166-
match.items = tuple(self.items)
167-
match.end += sum(len(i) for i in self.items) + 2 * len(self.items)
168-
return match
169-
170-
def _search(self, string):
171-
start = self._find_variable_start(string)
172-
if start == -1:
173-
return False
174-
self.start = start
175-
self._open_brackets += 1
176-
self.variable_chars = [string[start], '{']
177-
start += 2
178-
state = self.variable_state
179-
for char in string[start:]:
180-
state = state(char)
181-
self._escaped = False if char != '\\' else not self._escaped
182-
if state is None:
183-
break
184-
if state:
185-
try:
186-
self._validate_end_state(state)
187-
except VariableError:
188-
if self._ignore_errors:
189-
return False
190-
raise
191-
return True
192-
193-
def _find_variable_start(self, string):
194-
start = 1
195-
while True:
196-
start = string.find('{', start) - 1
197-
if start < 0:
198-
return -1
199-
if self._start_index_is_ok(string, start):
200-
return start
201-
start += 2
202-
203-
def _start_index_is_ok(self, string, index):
204-
return (string[index] in self.identifiers
205-
and not self._is_escaped(string, index))
206-
207-
def _is_escaped(self, string, index):
208-
escaped = False
209-
while index > 0 and string[index-1] == '\\':
210-
index -= 1
211-
escaped = not escaped
212-
return escaped
213-
214-
def variable_state(self, char):
215-
self.variable_chars.append(char)
216-
if char == '}' and not self._escaped:
217-
self._open_brackets -= 1
218-
if self._open_brackets == 0:
219-
if not self._can_have_items():
220-
return None
221-
return self.waiting_item_state
222-
elif char == '{' and not self._escaped:
223-
self._open_brackets += 1
224-
return self.variable_state
225-
226-
def _can_have_items(self):
227-
return self.variable_chars[0] in '$@&'
228-
229-
def waiting_item_state(self, char):
230-
if char == '[':
231-
self._open_brackets += 1
232-
return self.item_state
233-
return None
234-
235-
def item_state(self, char):
236-
if char == ']' and not self._escaped:
237-
self._open_brackets -= 1
238-
if self._open_brackets == 0:
239-
self.items.append(''.join(self.item_chars))
240-
self.item_chars = []
241-
return self.waiting_item_state
242-
elif char == '[' and not self._escaped:
243-
self._open_brackets += 1
244-
self.item_chars.append(char)
245-
return self.item_state
246-
247-
def _validate_end_state(self, state):
248-
if state == self.variable_state:
249-
incomplete = ''.join(self.variable_chars)
250-
raise VariableError("Variable '%s' was not closed properly."
251-
% incomplete)
252-
if state == self.item_state:
253-
variable = ''.join(self.variable_chars)
254-
items = ''.join('[%s]' % i for i in self.items)
255-
incomplete = ''.join(self.item_chars)
256-
raise VariableError("Variable item '%s%s[%s' was not closed "
257-
"properly." % (variable, items, incomplete))
191+
incomplete = string[match.start:]
192+
if left_brace == '{':
193+
raise VariableError(f"Variable '{incomplete}' was not closed properly.")
194+
raise VariableError(f"Variable item '{incomplete}' was not closed properly.")
195+
196+
return match if match else VariableMatch(match)
197+
198+
199+
def _find_variable_start(string, identifiers):
200+
index = 1
201+
while True:
202+
index = string.find('{', index) - 1
203+
if index < 0:
204+
return -1
205+
if string[index] in identifiers and _not_escaped(string, index):
206+
return index
207+
index += 2
208+
209+
210+
def _not_escaped(string, index):
211+
escaped = False
212+
while index > 0 and string[index-1] == '\\':
213+
index -= 1
214+
escaped = not escaped
215+
return not escaped
258216

259217

260218
def unescape_variable_syntax(item):

0 commit comments

Comments
 (0)