Merge branch 'master' into master

xginn8 · web-flow · commit f7865a7668c1 · 2017-12-12T08:25:42.000-05:00
diff --git a/influxdb/_dataframe_client.py b/influxdb/_dataframe_client.py
@@ -10,6 +10,7 @@
 from collections import defaultdict
 
 import pandas as pd
+import numpy as np
 
 from .client import InfluxDBClient
 from .line_protocol import _escape_tag
@@ -174,6 +175,7 @@ def query(self,
                           expected_response_code=expected_response_code,
                           raise_errors=raise_errors,
                           chunked=chunked,
+                          database=database,
                           chunk_size=chunk_size)
         results = super(DataFrameClient, self).query(query, **query_args)
         if query.strip().upper().startswith("SELECT"):
@@ -257,7 +259,7 @@ def _convert_dataframe_to_json(dataframe,
             {'measurement': measurement,
              'tags': dict(list(tag.items()) + list(tags.items())),
              'fields': rec,
-             'time': int(ts.value / precision_factor)}
+             'time': np.int64(ts.value / precision_factor)}
             for ts, tag, rec in zip(dataframe.index,
                                     dataframe[tag_columns].to_dict('record'),
                                     dataframe[field_columns].to_dict('record'))
@@ -274,6 +276,10 @@ def _convert_dataframe_to_lines(self,
                                     time_precision=None,
                                     numeric_precision=None):
 
+        dataframe = dataframe.dropna(how='all').copy()
+        if len(dataframe) == 0:
+            return []
+
         if not isinstance(dataframe, pd.DataFrame):
             raise TypeError('Must be DataFrame, but type was: {0}.'
                             .format(type(dataframe)))
@@ -319,11 +325,11 @@ def _convert_dataframe_to_lines(self,
 
         # Make array of timestamp ints
         if isinstance(dataframe.index, pd.PeriodIndex):
-            time = ((dataframe.index.to_timestamp().values.astype(int) /
-                     precision_factor).astype(int).astype(str))
+            time = ((dataframe.index.to_timestamp().values.astype(np.int64) /
+                     precision_factor).astype(np.int64).astype(str))
         else:
-            time = ((pd.to_datetime(dataframe.index).values.astype(int) /
-                     precision_factor).astype(int).astype(str))
+            time = ((pd.to_datetime(dataframe.index).values.astype(np.int64) /
+                     precision_factor).astype(np.int64).astype(str))
 
         # If tag columns exist, make an array of formatted tag keys and values
         if tag_columns:
@@ -357,20 +363,32 @@ def _convert_dataframe_to_lines(self,
 
         # Make an array of formatted field keys and values
         field_df = dataframe[field_columns]
+
         field_df = self._stringify_dataframe(field_df,
                                              numeric_precision,
                                              datatype='field')
-        field_df = (field_df.columns.values + '=').tolist() + field_df
-        field_df[field_df.columns[1:]] = ',' + field_df[field_df.columns[1:]]
-        fields = field_df.sum(axis=1)
+
+        def format_line(line):
+            line = line[~line.isnull()]  # drop None entries
+            return ",".join((line.index + '=' + line.values))
+
+        fields = field_df.apply(format_line, axis=1)
         del field_df
 
         # Generate line protocol string
+        measurement = _escape_tag(measurement)
         points = (measurement + tags + ' ' + fields + ' ' + time).tolist()
         return points
 
     @staticmethod
     def _stringify_dataframe(dframe, numeric_precision, datatype='field'):
+
+        # Prevent modification of input dataframe
+        dframe = dframe.copy()
+
+        # Keep the positions where Null values are found
+        mask_null = dframe.isnull().values
+
         # Find int and string columns for field-type data
         int_columns = dframe.select_dtypes(include=['integer']).columns
         string_columns = dframe.select_dtypes(include=['object']).columns
@@ -414,6 +432,8 @@ def _stringify_dataframe(dframe, numeric_precision, datatype='field'):
             dframe = dframe.apply(_escape_pandas_series)
 
         dframe.columns = dframe.columns.astype(str)
+
+        dframe = dframe.where(~mask_null, None)
         return dframe
 
     def _datetime_to_epoch(self, datetime, time_precision='s'):
diff --git a/influxdb/client.py b/influxdb/client.py
@@ -869,7 +869,7 @@ def send_packet(self, packet, protocol='json'):
 
         :param packet: the packet to be sent
         :type packet: (if protocol is 'json') dict
-                      (if protocol is 'line') sequence of line protocol strings
+                      (if protocol is 'line') list of line protocol strings
         :param protocol: protocol of input data, either 'json' or 'line'
         :type protocol: str
         """
diff --git a/influxdb/line_protocol.py b/influxdb/line_protocol.py
@@ -57,6 +57,13 @@ def _escape_tag(tag):
     )
 
 
+def _escape_tag_value(value):
+    ret = _escape_tag(value)
+    if ret.endswith('\\'):
+        ret += ' '
+    return ret
+
+
 def quote_ident(value):
     """Indent the quotes."""
     return "\"{}\"".format(value
@@ -135,7 +142,7 @@ def make_lines(data, precision=None):
         # tags should be sorted client-side to take load off server
         for tag_key, tag_value in sorted(iteritems(tags)):
             key = _escape_tag(tag_key)
-            value = _escape_tag(tag_value)
+            value = _escape_tag_value(tag_value)
 
             if key != '' and value != '':
                 key_values.append(key + "=" + value)
diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py
@@ -59,6 +59,103 @@ def test_write_points_from_dataframe(self):
             cli.write_points(dataframe, 'foo', tags=None)
             self.assertEqual(m.last_request.body, expected)
 
+    def test_dataframe_write_points_with_whitespace_measurement(self):
+        """write_points should escape white space in measurements."""
+        now = pd.Timestamp('1970-01-01 00:00+00:00')
+        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
+                                 index=[now, now + timedelta(hours=1)],
+                                 columns=["column_one", "column_two",
+                                          "column_three"])
+        expected = (
+            b"meas\\ with\\ space "
+            b"column_one=\"1\",column_two=1i,column_three=1.0 0\n"
+            b"meas\\ with\\ space "
+            b"column_one=\"2\",column_two=2i,column_three=2.0 "
+            b"3600000000000\n"
+        )
+        with requests_mock.Mocker() as m:
+            m.register_uri(requests_mock.POST,
+                           "http://localhost:8086/write",
+                           status_code=204)
+            cli = DataFrameClient(database='db')
+            cli.write_points(dataframe, 'meas with space')
+            self.assertEqual(m.last_request.body, expected)
+
+    def test_write_points_from_dataframe_with_none(self):
+        """Test write points from df in TestDataFrameClient object."""
+        now = pd.Timestamp('1970-01-01 00:00+00:00')
+        dataframe = pd.DataFrame(data=[["1", None, 1.0], ["2", 2.0, 2.0]],
+                                 index=[now, now + timedelta(hours=1)],
+                                 columns=["column_one", "column_two",
+                                          "column_three"])
+        expected = (
+            b"foo column_one=\"1\",column_three=1.0 0\n"
+            b"foo column_one=\"2\",column_two=2.0,column_three=2.0 "
+            b"3600000000000\n"
+        )
+
+        with requests_mock.Mocker() as m:
+            m.register_uri(requests_mock.POST,
+                           "http://localhost:8086/write",
+                           status_code=204)
+
+            cli = DataFrameClient(database='db')
+
+            cli.write_points(dataframe, 'foo')
+            self.assertEqual(m.last_request.body, expected)
+
+            cli.write_points(dataframe, 'foo', tags=None)
+            self.assertEqual(m.last_request.body, expected)
+
+    def test_write_points_from_dataframe_with_line_of_none(self):
+        """Test write points from df in TestDataFrameClient object."""
+        now = pd.Timestamp('1970-01-01 00:00+00:00')
+        dataframe = pd.DataFrame(data=[[None, None, None], ["2", 2.0, 2.0]],
+                                 index=[now, now + timedelta(hours=1)],
+                                 columns=["column_one", "column_two",
+                                          "column_three"])
+        expected = (
+            b"foo column_one=\"2\",column_two=2.0,column_three=2.0 "
+            b"3600000000000\n"
+        )
+
+        with requests_mock.Mocker() as m:
+            m.register_uri(requests_mock.POST,
+                           "http://localhost:8086/write",
+                           status_code=204)
+
+            cli = DataFrameClient(database='db')
+
+            cli.write_points(dataframe, 'foo')
+            self.assertEqual(m.last_request.body, expected)
+
+            cli.write_points(dataframe, 'foo', tags=None)
+            self.assertEqual(m.last_request.body, expected)
+
+    def test_write_points_from_dataframe_with_all_none(self):
+        """Test write points from df in TestDataFrameClient object."""
+        now = pd.Timestamp('1970-01-01 00:00+00:00')
+        dataframe = pd.DataFrame(data=[[None, None, None], [None, None, None]],
+                                 index=[now, now + timedelta(hours=1)],
+                                 columns=["column_one", "column_two",
+                                          "column_three"])
+        expected = (
+            b"\n"
+        )
+
+        with requests_mock.Mocker() as m:
+            m.register_uri(requests_mock.POST,
+                           "http://localhost:8086/write",
+                           status_code=204)
+
+            cli = DataFrameClient(database='db')
+
+            cli.write_points(dataframe, 'foo')
+            self.assertEqual(m.last_request.body, expected)
+
+            cli.write_points(dataframe, 'foo', tags=None)
+            self.assertEqual(m.last_request.body, expected)
+
     def test_write_points_from_dataframe_in_batches(self):
         """Test write points in batch from df in TestDataFrameClient object."""
         now = pd.Timestamp('1970-01-01 00:00+00:00')
diff --git a/influxdb/tests/test_line_protocol.py b/influxdb/tests/test_line_protocol.py
@@ -22,6 +22,7 @@ def test_make_lines(self):
             "tags": {
                 "empty_tag": "",
                 "none_tag": None,
+                "backslash_tag": "C:\\",
                 "integer_tag": 2,
                 "string_tag": "hello"
             },
@@ -41,7 +42,7 @@ def test_make_lines(self):
 
         self.assertEqual(
             line_protocol.make_lines(data),
-            'test,integer_tag=2,string_tag=hello '
+            'test,backslash_tag=C:\\\\ ,integer_tag=2,string_tag=hello '
             'bool_val=True,float_val=1.1,int_val=1i,string_val="hello!"\n'
         )