Skip to content

Commit dc15164

Browse files
mfpierrebrian-brazil
authored andcommitted
Text parser optimization (~4.5x perf) (prometheus#282)
* Optimize parsing & ease debug/testing of the Metric class Signed-off-by: Pierre Margueritte <mfpierre@gmail.com> * Fix repr and add empty label test Signed-off-by: Pierre <mfpierre@gmail.com>
1 parent 9551846 commit dc15164

File tree

3 files changed

+116
-118
lines changed

3 files changed

+116
-118
lines changed

prometheus_client/core.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,9 @@ def __eq__(self, other):
181181
self.type == other.type and
182182
self.samples == other.samples)
183183

184+
def __repr__(self):
185+
return "Metric(%s, %s, %s, %s)" % (self.name, self.documentation,
186+
self.type, self.samples)
184187

185188
class UntypedMetricFamily(Metric):
186189
'''A single untyped metric and its samples.

prometheus_client/parser.py

Lines changed: 87 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -19,129 +19,98 @@ def text_string_to_metric_families(text):
1919
for metric_family in text_fd_to_metric_families(StringIO.StringIO(text)):
2020
yield metric_family
2121

22+
def _replace_help_escaping(s):
23+
return s.replace("\\n", "\n").replace('\\\\', '\\')
2224

23-
def _unescape_help(text):
24-
result = []
25-
slash = False
25+
def _replace_escaping(s):
26+
return s.replace("\\n", "\n").replace('\\\\', '\\').replace('\\"', '"')
2627

27-
for char in text:
28-
if slash:
29-
if char == '\\':
30-
result.append('\\')
31-
elif char == 'n':
32-
result.append('\n')
33-
else:
34-
result.append('\\' + char)
35-
slash = False
36-
else:
37-
if char == '\\':
38-
slash = True
39-
else:
40-
result.append(char)
41-
42-
if slash:
43-
result.append('\\')
44-
45-
return ''.join(result)
28+
def _parse_labels(labels_string):
29+
labels = {}
30+
# Return if we don't have valid labels
31+
if "=" not in labels_string:
32+
return labels
33+
34+
escaping = False
35+
if "\\" in labels_string:
36+
escaping = True
37+
38+
# Copy original labels
39+
sub_labels = labels_string
40+
try:
41+
# Process one label at a time
42+
while sub_labels:
43+
# The label name is before the equal
44+
value_start = sub_labels.index("=")
45+
label_name = sub_labels[:value_start]
46+
sub_labels = sub_labels[value_start + 1:].lstrip()
47+
# Find the first quote after the equal
48+
quote_start = sub_labels.index('"') + 1
49+
value_substr = sub_labels[quote_start:]
50+
51+
# Find the last unescaped quote
52+
i = 0
53+
while i < len(value_substr):
54+
i = value_substr.index('"', i)
55+
if value_substr[i - 1] != "\\":
56+
break
57+
i += 1
58+
59+
# The label value is inbetween the first and last quote
60+
quote_end = i + 1
61+
label_value = sub_labels[quote_start:quote_end]
62+
# Replace escaping if needed
63+
if escaping:
64+
label_value = _replace_escaping(label_value)
65+
labels[label_name.strip()] = label_value.strip()
66+
67+
# Remove the processed label from the sub-slice for next iteration
68+
sub_labels = sub_labels[quote_end + 1:]
69+
next_comma = sub_labels.find(",") + 1
70+
sub_labels = sub_labels[next_comma:].lstrip()
71+
72+
return labels
73+
74+
except ValueError:
75+
raise ValueError("Invalid labels: %s" % labels_string)
76+
77+
78+
# If we have multiple values only consider the first
79+
def _parse_value(s):
80+
s = s.lstrip()
81+
separator = " "
82+
if separator not in s:
83+
separator = "\t"
84+
i = s.find(separator)
85+
if i == -1:
86+
return s
87+
return s[:i]
4688

4789

4890
def _parse_sample(text):
49-
name = []
50-
labelname = []
51-
labelvalue = []
52-
value = []
53-
labels = {}
91+
# Detect the labels in the text
92+
try:
93+
label_start, label_end = text.index("{"), text.rindex("}")
94+
# The name is before the labels
95+
name = text[:label_start].strip()
96+
# We ignore the starting curly brace
97+
label = text[label_start + 1:label_end]
98+
# The value is after the label end (ignoring curly brace and space)
99+
value = float(_parse_value(text[label_end + 2:]))
100+
return name, _parse_labels(label), value
101+
102+
# We don't have labels
103+
except ValueError:
104+
# Detect what separator is used
105+
separator = " "
106+
if separator not in text:
107+
separator = "\t"
108+
name_end = text.index(separator)
109+
name = text[:name_end]
110+
# The value is after the name
111+
value = float(_parse_value(text[name_end:]))
112+
return name, {}, value
54113

55-
state = 'name'
56-
57-
for char in text:
58-
if state == 'name':
59-
if char == '{':
60-
state = 'startoflabelname'
61-
elif char == ' ' or char == '\t':
62-
state = 'endofname'
63-
else:
64-
name.append(char)
65-
elif state == 'endofname':
66-
if char == ' ' or char == '\t':
67-
pass
68-
elif char == '{':
69-
state = 'startoflabelname'
70-
else:
71-
value.append(char)
72-
state = 'value'
73-
elif state == 'startoflabelname':
74-
if char == ' ' or char == '\t' or char == ',':
75-
pass
76-
elif char == '}':
77-
state = 'endoflabels'
78-
else:
79-
state = 'labelname'
80-
labelname.append(char)
81-
elif state == 'labelname':
82-
if char == '=':
83-
state = 'labelvaluequote'
84-
elif char == ' ' or char == '\t':
85-
state = 'labelvalueequals'
86-
else:
87-
labelname.append(char)
88-
elif state == 'labelvalueequals':
89-
if char == '=':
90-
state = 'labelvaluequote'
91-
elif char == ' ' or char == '\t':
92-
pass
93-
else:
94-
raise ValueError("Invalid line: " + text)
95-
elif state == 'labelvaluequote':
96-
if char == '"':
97-
state = 'labelvalue'
98-
elif char == ' ' or char == '\t':
99-
pass
100-
else:
101-
raise ValueError("Invalid line: " + text)
102-
elif state == 'labelvalue':
103-
if char == '\\':
104-
state = 'labelvalueslash'
105-
elif char == '"':
106-
labels[''.join(labelname)] = ''.join(labelvalue)
107-
labelname = []
108-
labelvalue = []
109-
state = 'nextlabel'
110-
else:
111-
labelvalue.append(char)
112-
elif state == 'labelvalueslash':
113-
state = 'labelvalue'
114-
if char == '\\':
115-
labelvalue.append('\\')
116-
elif char == 'n':
117-
labelvalue.append('\n')
118-
elif char == '"':
119-
labelvalue.append('"')
120-
else:
121-
labelvalue.append('\\' + char)
122-
elif state == 'nextlabel':
123-
if char == ',':
124-
state = 'startoflabelname'
125-
elif char == '}':
126-
state = 'endoflabels'
127-
elif char == ' ' or char == '\t':
128-
pass
129-
else:
130-
raise ValueError("Invalid line: " + text)
131-
elif state == 'endoflabels':
132-
if char == ' ' or char == '\t':
133-
pass
134-
else:
135-
value.append(char)
136-
state = 'value'
137-
elif state == 'value':
138-
if char == ' ' or char == '\t':
139-
# Timestamps are not supported, halt
140-
break
141-
else:
142-
value.append(char)
143-
return (''.join(name), labels, float(''.join(value)))
144-
145114

146115
def text_fd_to_metric_families(fd):
147116
"""Parse Prometheus text format from a file descriptor.
@@ -180,7 +149,7 @@ def build_metric(name, documentation, typ, samples):
180149
samples = []
181150
allowed_names = [parts[2]]
182151
if len(parts) == 4:
183-
documentation = _unescape_help(parts[3])
152+
documentation = _replace_help_escaping(parts[3])
184153
else:
185154
documentation = ''
186155
elif parts[1] == 'TYPE':

tests/test_parser.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,15 @@ def test_tabs(self):
119119
""")
120120
self.assertEqual([CounterMetricFamily("a", "help", value=1)], list(families))
121121

122+
def test_labels_with_curly_braces(self):
123+
families = text_string_to_metric_families("""# TYPE a counter
124+
# HELP a help
125+
a{foo="bar", bar="b{a}z"} 1
126+
""")
127+
metric_family = CounterMetricFamily("a", "help", labels=["foo", "bar"])
128+
metric_family.add_metric(["bar", "b{a}z"], 1)
129+
self.assertEqual([metric_family], list(families))
130+
122131
def test_empty_help(self):
123132
families = text_string_to_metric_families("""# TYPE a counter
124133
# HELP a
@@ -142,10 +151,16 @@ def test_spaces(self):
142151
# HELP a help
143152
a{ foo = "bar" } 1
144153
a\t\t{\t\tfoo\t\t=\t\t"baz"\t\t}\t\t2
154+
a { foo = "buz" } 3
155+
a\t { \t foo\t = "biz"\t } \t 4
156+
a \t{\t foo = "boz"\t}\t 5
145157
""")
146158
metric_family = CounterMetricFamily("a", "help", labels=["foo"])
147159
metric_family.add_metric(["bar"], 1)
148160
metric_family.add_metric(["baz"], 2)
161+
metric_family.add_metric(["buz"], 3)
162+
metric_family.add_metric(["biz"], 4)
163+
metric_family.add_metric(["boz"], 5)
149164
self.assertEqual([metric_family], list(families))
150165

151166
def test_commas(self):
@@ -174,6 +189,17 @@ def test_nan(self):
174189
# Can't use a simple comparison as nan != nan.
175190
self.assertTrue(math.isnan(list(families)[0].samples[0][2]))
176191

192+
def test_empty_label(self):
193+
families = text_string_to_metric_families("""# TYPE a counter
194+
# HELP a help
195+
a{foo="bar"} 1
196+
a{foo=""} 2
197+
""")
198+
metric_family = CounterMetricFamily("a", "help", labels=["foo"])
199+
metric_family.add_metric(["bar"], 1)
200+
metric_family.add_metric([""], 2)
201+
self.assertEqual([metric_family], list(families))
202+
177203
def test_escaping(self):
178204
families = text_string_to_metric_families("""# TYPE a counter
179205
# HELP a he\\n\\\\l\\tp

0 commit comments

Comments
 (0)