Skip to content
This repository was archived by the owner on Oct 29, 2024. It is now read-only.

Fix DataFrameClient empty tag processing #367

Merged
merged 2 commits into from
Dec 7, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions influxdb/_dataframe_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,13 +279,24 @@ def _convert_dataframe_to_lines(self,
# If tag columns exist, make an array of formatted tag keys and values
if tag_columns:
tag_df = dataframe[tag_columns]
tag_df = tag_df.fillna('') # replace NA with empty string
tag_df = tag_df.sort_index(axis=1)
tag_df = self._stringify_dataframe(
tag_df, numeric_precision, datatype='tag')
tags = (',' + (
(tag_df.columns.values + '=').tolist() + tag_df)).sum(axis=1)
del tag_df

# prepend tag keys
tag_df = tag_df.apply(
lambda s: s.apply(
lambda v, l: l + '=' + v if v else None, l=s.name))

# join tags, but leave out None values
tags = tag_df.apply(
lambda r: ','.join(r.dropna()), axis=1)

# prepend comma
tags = tags.apply(lambda v: ',' + v if v else '')

del tag_df
else:
tags = ''

Expand Down
31 changes: 22 additions & 9 deletions influxdb/tests/dataframe_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,20 +201,33 @@ def test_write_points_from_dataframe_with_tag_cols_and_defaults(self):
def test_write_points_from_dataframe_with_tag_escaped(self):
now = pd.Timestamp('1970-01-01 00:00+00:00')
dataframe = pd.DataFrame(
data=[['blue', 1, "1", 1, 1.0, 'hot'],
['red,green=orange', 0, "2", 2, 2.0, 'cold']],
index=[now, now + timedelta(hours=1)],
columns=["tag_one", "tag_two", "column_one",
"column_two", "column_three",
"tag_three"])
data=[
['blue orange', "1", 1, 'hot=cold'], # space, equal
['red,green', "2", 2, r'cold\fire'], # comma, backslash
['some', "2", 2, ''], # skip empty
['some', "2", 2, None], # skip None
['', "2", 2, None], # all tags empty
],
index=pd.period_range(now, freq='H', periods=5),
columns=["tag_one", "column_one", "column_two", "tag_three"]
)

expected_escaped_tags = (
b"foo,tag_one=blue "
b"foo,tag_one=blue\\ orange,tag_three=hot\\=cold "
b"column_one=\"1\",column_two=1i "
b"0\n"
b"foo,tag_one=red\\,green\\=orange "
b"foo,tag_one=red\\,green,tag_three=cold\\\\fire "
b"column_one=\"2\",column_two=2i "
b"3600000000000\n"
b"foo,tag_one=some "
b"column_one=\"2\",column_two=2i "
b"7200000000000\n"
b"foo,tag_one=some "
b"column_one=\"2\",column_two=2i "
b"10800000000000\n"
b"foo "
b"column_one=\"2\",column_two=2i "
b"14400000000000\n"
)

with requests_mock.Mocker() as m:
Expand All @@ -224,7 +237,7 @@ def test_write_points_from_dataframe_with_tag_escaped(self):
cli = DataFrameClient(database='db')
cli.write_points(dataframe, 'foo',
field_columns=['column_one', 'column_two'],
tag_columns=['tag_one'])
tag_columns=['tag_one', 'tag_three'])
self.assertEqual(m.last_request.body, expected_escaped_tags)

def test_write_points_from_dataframe_with_numeric_column_names(self):
Expand Down