Skip to content

Commit c300105

Browse files
shushenxginn8
authored andcommitted
Fix performance degradation with line protocol (influxdata#592)
Assemble line by line in the commit bf232a7 to remove NaN has significant performance impact. This change fixes the issue by keeping the NaN fields before stringify the dataframe, replacing the fields with empty string, and reverting back to use pd.DataFrame.sum() function to yield the lines. Fixes: influxdata#591
1 parent b3ed5db commit c300105

File tree

1 file changed

+7
-9
lines changed

1 file changed

+7
-9
lines changed

influxdb/_dataframe_client.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -365,16 +365,18 @@ def _convert_dataframe_to_lines(self,
365365

366366
# Make an array of formatted field keys and values
367367
field_df = dataframe[field_columns]
368+
# Keep the positions where Null values are found
369+
mask_null = field_df.isnull().values
368370

369371
field_df = self._stringify_dataframe(field_df,
370372
numeric_precision,
371373
datatype='field')
372374

373-
def format_line(line):
374-
line = line[~line.isnull()] # drop None entries
375-
return ",".join((line.index + '=' + line.values))
376-
377-
fields = field_df.apply(format_line, axis=1)
375+
field_df = (field_df.columns.values + '=').tolist() + field_df
376+
field_df[field_df.columns[1:]] = ',' + field_df[
377+
field_df.columns[1:]]
378+
field_df = field_df.where(~mask_null, '') # drop Null entries
379+
fields = field_df.sum(axis=1)
378380
del field_df
379381

380382
# Generate line protocol string
@@ -388,9 +390,6 @@ def _stringify_dataframe(dframe, numeric_precision, datatype='field'):
388390
# Prevent modification of input dataframe
389391
dframe = dframe.copy()
390392

391-
# Keep the positions where Null values are found
392-
mask_null = dframe.isnull().values
393-
394393
# Find int and string columns for field-type data
395394
int_columns = dframe.select_dtypes(include=['integer']).columns
396395
string_columns = dframe.select_dtypes(include=['object']).columns
@@ -435,7 +434,6 @@ def _stringify_dataframe(dframe, numeric_precision, datatype='field'):
435434

436435
dframe.columns = dframe.columns.astype(str)
437436

438-
dframe = dframe.where(~mask_null, None)
439437
return dframe
440438

441439
def _datetime_to_epoch(self, datetime, time_precision='s'):

0 commit comments

Comments
 (0)