Skip to content
This repository was archived by the owner on Oct 29, 2024. It is now read-only.

Commit 8710075

Browse files
committed
Improve DataFrameClient tag conversion performance
In `_convert_dataframe_to_lines`, if only `global_tags` is specified but not `tag_columns`, take a faster route to process the tags. Previously, in such a case, global tags are duplicated as tag columns and processed as if they were tag columns. Such processing is wasteful and results in a slowdown that becomes noticeable when batch loading many thousands of data points with a handful of global tags.
1 parent c9a1b86 commit 8710075

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

influxdb/_dataframe_client.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -298,12 +298,6 @@ def _convert_dataframe_to_lines(self,
298298
field_columns = list(field_columns) if list(field_columns) else []
299299
tag_columns = list(tag_columns) if list(tag_columns) else []
300300

301-
# Make global_tags as tag_columns
302-
if global_tags:
303-
for tag in global_tags:
304-
dataframe[tag] = global_tags[tag]
305-
tag_columns.append(tag)
306-
307301
# If field columns but no tag columns, assume rest of columns are tags
308302
if field_columns and (not tag_columns):
309303
tag_columns = list(column_series[~column_series.isin(
@@ -333,6 +327,13 @@ def _convert_dataframe_to_lines(self,
333327

334328
# If tag columns exist, make an array of formatted tag keys and values
335329
if tag_columns:
330+
331+
# Make global_tags as tag_columns
332+
if global_tags:
333+
for tag in global_tags:
334+
dataframe[tag] = global_tags[tag]
335+
tag_columns.append(tag)
336+
336337
tag_df = dataframe[tag_columns]
337338
tag_df = tag_df.fillna('') # replace NA with empty string
338339
tag_df = tag_df.sort_index(axis=1)
@@ -345,6 +346,12 @@ def _convert_dataframe_to_lines(self,
345346
tags = tags.sum(axis=1)
346347

347348
del tag_df
349+
elif global_tags:
350+
tag_string = ''.join(
351+
[",{}={}".format(k, _escape_tag(v)) if v else ''
352+
for k, v in sorted(global_tags.items())]
353+
)
354+
tags = pd.Series(tag_string, index=dataframe.index)
348355
else:
349356
tags = ''
350357

0 commit comments

Comments
 (0)