Skip to content

Commit 0fb630e

Browse files
authored
ref(serializer): Make trimming faster by a few ms (getsentry#509)
* ref(serializer): Make trimming faster by a few ms Refactor MetaNode and merge it into Serializer. 40% speedup for some random microbenchmark, lol * fix: Fix tests
1 parent f763061 commit 0fb630e

File tree

1 file changed

+123
-162
lines changed

1 file changed

+123
-162
lines changed

sentry_sdk/serializer.py

+123-162
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import contextlib
2+
import itertools
23

34
from datetime import datetime
45

@@ -17,6 +18,7 @@
1718
from typing import Any
1819
from typing import Dict
1920
from typing import List
21+
from typing import Tuple
2022
from typing import Optional
2123
from typing import Callable
2224
from typing import Union
@@ -50,81 +52,73 @@ def add_global_repr_processor(processor):
5052
global_repr_processors.append(processor)
5153

5254

53-
class MetaNode(object):
54-
__slots__ = (
55-
"_parent",
56-
"_segment",
57-
"_depth",
58-
"_data",
59-
"_is_databag",
60-
"_should_repr_strings",
61-
)
55+
class Memo(object):
56+
def __init__(self):
57+
# type: () -> None
58+
self._inner = {} # type: Dict[int, Any]
59+
60+
@contextlib.contextmanager
61+
def memoize(self, obj):
62+
# type: (Any) -> Generator[bool, None, None]
63+
if id(obj) in self._inner:
64+
yield True
65+
else:
66+
self._inner[id(obj)] = obj
67+
yield False
68+
69+
self._inner.pop(id(obj), None)
70+
71+
72+
class Serializer(object):
73+
__slots__ = ("memo", "_path", "_meta_stack", "_is_databag", "_should_repr_strings")
6274

6375
def __init__(self):
6476
# type: () -> None
65-
self._parent = None # type: Optional[MetaNode]
66-
self._segment = None # type: Optional[Segment]
67-
self._depth = 0 # type: int
68-
self._data = None # type: Optional[Dict[str, Any]]
77+
self.memo = Memo()
78+
79+
self._path = [] # type: List[Segment]
80+
self._meta_stack = [] # type: List[Dict[Segment, Any]]
6981
self._is_databag = None # type: Optional[bool]
7082
self._should_repr_strings = None # type: Optional[bool]
7183

7284
def startswith_path(self, path):
73-
# type: (List[Optional[str]]) -> bool
74-
if len(path) > self._depth:
85+
# type: (Tuple[Optional[Segment], ...]) -> bool
86+
if len(path) > len(self._path):
7587
return False
7688

77-
return self.is_path(path + [None] * (self._depth - len(path)))
78-
79-
def is_path(self, path):
80-
# type: (List[Optional[str]]) -> bool
81-
if len(path) != self._depth:
82-
return False
89+
for i, segment in enumerate(path):
90+
if segment is None:
91+
continue
8392

84-
cur = self
85-
for segment in reversed(path):
86-
if segment is not None and segment != cur._segment:
93+
if self._path[i] != segment:
8794
return False
88-
assert cur._parent is not None
89-
cur = cur._parent
9095

91-
return cur._segment is None
92-
93-
def enter(self, segment):
94-
# type: (Segment) -> MetaNode
95-
rv = MetaNode()
96-
rv._parent = self
97-
rv._depth = self._depth + 1
98-
rv._segment = segment
99-
return rv
96+
return True
10097

101-
def _create_annotations(self):
102-
# type: () -> None
103-
if self._data is not None:
104-
return
98+
def annotate(self, **meta):
99+
# type: (**Any) -> None
100+
while len(self._meta_stack) <= len(self._path):
101+
try:
102+
segment = self._path[len(self._meta_stack) - 1]
103+
node = self._meta_stack[-1].setdefault(text_type(segment), {})
104+
except IndexError:
105+
node = {}
105106

106-
self._data = {}
107-
if self._parent is not None:
108-
self._parent._create_annotations()
109-
self._parent._data[str(self._segment)] = self._data # type: ignore
107+
self._meta_stack.append(node)
110108

111-
def annotate(self, **meta):
112-
# type: (Any) -> None
113-
self._create_annotations()
114-
assert self._data is not None
115-
self._data.setdefault("", {}).update(meta)
109+
self._meta_stack[-1].setdefault("", {}).update(meta)
116110

117111
def should_repr_strings(self):
118112
# type: () -> bool
119113
if self._should_repr_strings is None:
120114
self._should_repr_strings = (
121115
self.startswith_path(
122-
["exception", "values", None, "stacktrace", "frames", None, "vars"]
116+
("exception", "values", None, "stacktrace", "frames", None, "vars")
123117
)
124118
or self.startswith_path(
125-
["threads", "values", None, "stacktrace", "frames", None, "vars"]
119+
("threads", "values", None, "stacktrace", "frames", None, "vars")
126120
)
127-
or self.startswith_path(["stacktrace", "frames", None, "vars"])
121+
or self.startswith_path(("stacktrace", "frames", None, "vars"))
128122
)
129123

130124
return self._should_repr_strings
@@ -133,153 +127,120 @@ def is_databag(self):
133127
# type: () -> bool
134128
if self._is_databag is None:
135129
self._is_databag = (
136-
self.startswith_path(["request", "data"])
137-
or self.startswith_path(["breadcrumbs", None])
138-
or self.startswith_path(["extra"])
139-
or self.startswith_path(
140-
["exception", "values", None, "stacktrace", "frames", None, "vars"]
141-
)
142-
or self.startswith_path(
143-
["threads", "values", None, "stacktrace", "frames", None, "vars"]
144-
)
145-
or self.startswith_path(["stacktrace", "frames", None, "vars"])
130+
self.should_repr_strings()
131+
or self.startswith_path(("request", "data"))
132+
or self.startswith_path(("breadcrumbs", None))
133+
or self.startswith_path(("extra",))
146134
)
147135

148136
return self._is_databag
149137

150-
151-
def _flatten_annotated(obj, meta_node):
152-
# type: (Any, MetaNode) -> Any
153-
if isinstance(obj, AnnotatedValue):
154-
meta_node.annotate(**obj.metadata)
155-
obj = obj.value
156-
return obj
157-
158-
159-
class Memo(object):
160-
def __init__(self):
161-
# type: () -> None
162-
self._inner = {} # type: Dict[int, Any]
163-
164-
@contextlib.contextmanager
165-
def memoize(self, obj):
166-
# type: (Any) -> Generator[bool, None, None]
167-
if id(obj) in self._inner:
168-
yield True
169-
else:
170-
self._inner[id(obj)] = obj
171-
yield False
172-
173-
self._inner.pop(id(obj), None)
174-
175-
176-
class Serializer(object):
177-
def __init__(self):
178-
# type: () -> None
179-
self.memo = Memo()
180-
self.meta_node = MetaNode()
181-
182-
@contextlib.contextmanager
183-
def enter(self, segment):
184-
# type: (Segment) -> Generator[None, None, None]
185-
old_node = self.meta_node
186-
self.meta_node = self.meta_node.enter(segment)
187-
188-
try:
189-
yield
190-
finally:
191-
self.meta_node = old_node
192-
193138
def serialize_event(self, obj):
194139
# type: (Any) -> Dict[str, Any]
195140
rv = self._serialize_node(obj)
196-
if self.meta_node._data is not None:
197-
rv["_meta"] = self.meta_node._data
141+
if self._meta_stack:
142+
rv["_meta"] = self._meta_stack[0]
198143
return rv
199144

200-
def _serialize_node(self, obj, max_depth=None, max_breadth=None):
201-
# type: (Any, Optional[int], Optional[int]) -> Any
202-
with capture_internal_exceptions():
203-
with self.memo.memoize(obj) as result:
204-
if result:
205-
return CYCLE_MARKER
145+
def _serialize_node(self, obj, max_depth=None, max_breadth=None, segment=None):
146+
# type: (Any, Optional[int], Optional[int], Optional[Segment]) -> Any
147+
if segment is not None:
148+
self._path.append(segment)
149+
self._is_databag = self._is_databag or None
150+
self._should_repr_strings = self._should_repr_strings or None
206151

207-
return self._serialize_node_impl(
208-
obj, max_depth=max_depth, max_breadth=max_breadth
209-
)
152+
try:
153+
with capture_internal_exceptions():
154+
with self.memo.memoize(obj) as result:
155+
if result:
156+
return CYCLE_MARKER
157+
158+
return self._serialize_node_impl(
159+
obj, max_depth=max_depth, max_breadth=max_breadth
160+
)
210161

211-
if self.meta_node.is_databag():
212-
return u"<failed to serialize, use init(debug=True) to see error logs>"
162+
if self.is_databag():
163+
return u"<failed to serialize, use init(debug=True) to see error logs>"
213164

214-
return None
165+
return None
166+
finally:
167+
if segment is not None:
168+
self._path.pop()
169+
del self._meta_stack[len(self._path) + 1 :]
170+
self._is_databag = self._is_databag and None
171+
self._should_repr_strings = self._should_repr_strings and None
172+
173+
def _flatten_annotated(self, obj):
174+
# type: (Any) -> Any
175+
if isinstance(obj, AnnotatedValue):
176+
self.annotate(**obj.metadata)
177+
obj = obj.value
178+
return obj
215179

216180
def _serialize_node_impl(self, obj, max_depth, max_breadth):
217181
# type: (Any, Optional[int], Optional[int]) -> Any
218-
if max_depth is None and max_breadth is None and self.meta_node.is_databag():
219-
max_depth = self.meta_node._depth + MAX_DATABAG_DEPTH
220-
max_breadth = self.meta_node._depth + MAX_DATABAG_BREADTH
182+
cur_depth = len(self._path)
183+
if max_depth is None and max_breadth is None and self.is_databag():
184+
max_depth = cur_depth + MAX_DATABAG_DEPTH
185+
max_breadth = cur_depth + MAX_DATABAG_BREADTH
221186

222187
if max_depth is None:
223188
remaining_depth = None
224189
else:
225-
remaining_depth = max_depth - self.meta_node._depth
190+
remaining_depth = max_depth - cur_depth
226191

227-
obj = _flatten_annotated(obj, self.meta_node)
192+
obj = self._flatten_annotated(obj)
228193

229194
if remaining_depth is not None and remaining_depth <= 0:
230-
self.meta_node.annotate(rem=[["!limit", "x"]])
231-
if self.meta_node.is_databag():
232-
return _flatten_annotated(strip_string(safe_repr(obj)), self.meta_node)
195+
self.annotate(rem=[["!limit", "x"]])
196+
if self.is_databag():
197+
return self._flatten_annotated(strip_string(safe_repr(obj)))
233198
return None
234199

235-
if self.meta_node.is_databag():
200+
if global_repr_processors and self.is_databag():
236201
hints = {"memo": self.memo, "remaining_depth": remaining_depth}
237202
for processor in global_repr_processors:
238203
with capture_internal_exceptions():
239204
result = processor(obj, hints)
240205
if result is not NotImplemented:
241-
return _flatten_annotated(result, self.meta_node)
206+
return self._flatten_annotated(result)
242207

243208
if isinstance(obj, Mapping):
244-
# Create temporary list here to avoid calling too much code that
209+
# Create temporary copy here to avoid calling too much code that
245210
# might mutate our dictionary while we're still iterating over it.
246-
items = []
247-
for i, (k, v) in enumerate(iteritems(obj)):
248-
if max_breadth is not None and i >= max_breadth:
249-
self.meta_node.annotate(len=max_breadth)
250-
break
251-
252-
items.append((k, v))
253-
254-
rv_dict = {} # type: Dict[Any, Any]
255-
for k, v in items:
256-
k = text_type(k)
257-
258-
with self.enter(k):
259-
v = self._serialize_node(
260-
v, max_depth=max_depth, max_breadth=max_breadth
261-
)
262-
if v is not None:
263-
rv_dict[k] = v
211+
if max_breadth is not None and len(obj) >= max_breadth:
212+
rv_dict = dict(itertools.islice(iteritems(obj), None, max_breadth))
213+
self.annotate(len=len(obj))
214+
else:
215+
rv_dict = dict(iteritems(obj))
216+
217+
for k in list(rv_dict):
218+
str_k = text_type(k)
219+
v = self._serialize_node(
220+
rv_dict.pop(k),
221+
max_depth=max_depth,
222+
max_breadth=max_breadth,
223+
segment=str_k,
224+
)
225+
if v is not None:
226+
rv_dict[str_k] = v
264227

265228
return rv_dict
266-
elif isinstance(obj, Sequence) and not isinstance(obj, string_types):
267-
rv_list = [] # type: List[Any]
268-
for i, v in enumerate(obj):
269-
if max_breadth is not None and i >= max_breadth:
270-
self.meta_node.annotate(len=max_breadth)
271-
break
272-
273-
with self.enter(i):
274-
rv_list.append(
275-
self._serialize_node(
276-
v, max_depth=max_depth, max_breadth=max_breadth
277-
)
278-
)
229+
elif not isinstance(obj, string_types) and isinstance(obj, Sequence):
230+
if max_breadth is not None and len(obj) >= max_breadth:
231+
rv_list = list(obj)[:max_breadth]
232+
self.annotate(len=len(obj))
233+
else:
234+
rv_list = list(obj)
235+
236+
for i in range(len(rv_list)):
237+
rv_list[i] = self._serialize_node(
238+
rv_list[i], max_depth=max_depth, max_breadth=max_breadth, segment=i
239+
)
279240

280241
return rv_list
281242

282-
if self.meta_node.should_repr_strings():
243+
if self.should_repr_strings():
283244
obj = safe_repr(obj)
284245
else:
285246
if obj is None or isinstance(obj, (bool, number_types)):
@@ -294,4 +255,4 @@ def _serialize_node_impl(self, obj, max_depth, max_breadth):
294255
if not isinstance(obj, string_types):
295256
obj = safe_repr(obj)
296257

297-
return _flatten_annotated(strip_string(obj), self.meta_node)
258+
return self._flatten_annotated(strip_string(obj))

0 commit comments

Comments
 (0)