mdbartos
diff --git a/‎influxdb/_dataframe_client.py
Lines changed: 188 additions & 19 deletions b/‎influxdb/_dataframe_client.py
Lines changed: 188 additions & 19 deletions
diff --git a/‎influxdb/client.py
Lines changed: 30 additions & 14 deletions b/‎influxdb/client.py
Lines changed: 30 additions & 14 deletions
@@ -35,9 +35,17 @@ class DataFrameClient(InfluxDBClient):
 
     EPOCH = pd.Timestamp('1970-01-01 00:00:00.000+00:00')
 
-    def write_points(self, dataframe, measurement, tags=None,
-                     time_precision=None, database=None, retention_policy=None,
-                     batch_size=None):
+    def write_points(self,
+                     dataframe,
+                     measurement,
+                     tags=None,
+                     tag_columns=[],
+                     field_columns=[],
+                     time_precision=None,
+                     database=None,
+                     retention_policy=None,
+                     batch_size=None,
+                     protocol='line'):
         """
         Write to multiple time series names.
 
@@ -50,27 +58,60 @@ def write_points(self, dataframe, measurement, tags=None,
             instead of all at one time. Useful for when doing data dumps from
             one database to another or when doing a massive write operation
         :type batch_size: int
+        :param protocol: Protocol for writing data. Either 'line' or 'json'.
 
         """
         if batch_size:
-            number_batches = int(math.ceil(
-                len(dataframe) / float(batch_size)))
+            number_batches = int(math.ceil(len(dataframe) / float(batch_size)))
             for batch in range(number_batches):
                 start_index = batch * batch_size
                 end_index = (batch + 1) * batch_size
-                points = self._convert_dataframe_to_json(
-                    dataframe.ix[start_index:end_index].copy(),
-                    measurement, tags, time_precision
-                )
+                if protocol == 'line':
+                    points = self._convert_dataframe_to_lines(
+                        dataframe.ix[start_index:end_index].copy(),
+                        measurement=measurement,
+                        global_tags=tags,
+                        time_precision=time_precision,
+                        tag_columns=tag_columns,
+                        field_columns=field_columns)
+                else:
+                    points = self._convert_dataframe_to_json(
+                        dataframe.ix[start_index:end_index].copy(),
+                        measurement=measurement,
+                        tags=tags,
+                        time_precision=time_precision,
+                        tag_columns=tag_columns,
+                        field_columns=field_columns)
                 super(DataFrameClient, self).write_points(
-                    points, time_precision, database, retention_policy)
+                    points,
+                    time_precision,
+                    database,
+                    retention_policy,
+                    protocol='line')
             return True
         else:
-            points = self._convert_dataframe_to_json(
-                dataframe, measurement, tags, time_precision
-            )
+            if protocol == 'line':
+                points = self._convert_dataframe_to_lines(
+                    dataframe,
+                    measurement=measurement,
+                    global_tags=tags,
+                    tag_columns=tag_columns,
+                    field_columns=field_columns,
+                    time_precision=time_precision)
+            else:
+                points = self._convert_dataframe_to_json(
+                    dataframe,
+                    measurement=measurement,
+                    tags=tags,
+                    time_precision=time_precision,
+                    tag_columns=tag_columns,
+                    field_columns=field_columns)
             super(DataFrameClient, self).write_points(
-                points, time_precision, database, retention_policy)
+                points,
+                time_precision,
+                database,
+                retention_policy,
+                protocol='line')
             return True
 
     def query(self, query, chunked=False, database=None):
@@ -108,7 +149,12 @@ def _to_dataframe(self, rs):
             result[key] = df
         return result
 
-    def _convert_dataframe_to_json(self, dataframe, measurement, tags=None,
+    def _convert_dataframe_to_json(self,
+                                   dataframe,
+                                   measurement,
+                                   tags=None,
+                                   tag_columns=[],
+                                   field_columns=[],
                                    time_precision=None):
 
         if not isinstance(dataframe, pd.DataFrame):
@@ -119,6 +165,15 @@ def _convert_dataframe_to_json(self, dataframe, measurement, tags=None,
             raise TypeError('Must be DataFrame with DatetimeIndex or \
                             PeriodIndex.')
 
+        # Make sure tags and tag columns are correctly typed
+        tag_columns = tag_columns if tag_columns else []
+        field_columns = field_columns if field_columns else []
+        tags = tags if tags else {}
+        # Assume field columns are all columns not included in tag columns
+        if not field_columns:
+            field_columns = list(
+                set(dataframe.columns).difference(set(tag_columns)))
+
         dataframe.index = dataframe.index.to_datetime()
         if dataframe.index.tzinfo is None:
             dataframe.index = dataframe.index.tz_localize('UTC')
@@ -140,13 +195,127 @@ def _convert_dataframe_to_json(self, dataframe, measurement, tags=None,
 
         points = [
             {'measurement': measurement,
-             'tags': tags if tags else {},
+             'tags': dict(list(tag.items()) + list(tags.items())),
              'fields': rec,
-             'time': int(ts.value / precision_factor)
-             }
-            for ts, rec in zip(dataframe.index, dataframe.to_dict('record'))]
+             'time': int(ts.value / precision_factor)}
+            for ts, tag, rec in zip(dataframe.index,
+                                    dataframe[tag_columns].to_dict('record'),
+                                    dataframe[field_columns].to_dict('record'))
+        ]
+
         return points
 
+    def _convert_dataframe_to_lines(self,
+                                    dataframe,
+                                    measurement,
+                                    field_columns=[],
+                                    tag_columns=[],
+                                    global_tags={},
+                                    time_precision=None,
+                                    numeric_precision=None):
+
+        if not isinstance(dataframe, pd.DataFrame):
+            raise TypeError('Must be DataFrame, but type was: {0}.'
+                            .format(type(dataframe)))
+        if not (isinstance(dataframe.index, pd.tseries.period.PeriodIndex) or
+                isinstance(dataframe.index, pd.tseries.index.DatetimeIndex)):
+            raise TypeError('Must be DataFrame with DatetimeIndex or \
+                            PeriodIndex.')
+
+        column_series = pd.Series(dataframe.columns)
+
+        if field_columns is None:
+            field_columns = []
+        if tag_columns is None:
+            tag_columns = []
+
+        field_columns = list(field_columns) if list(field_columns) else []
+        tag_columns = list(tag_columns) if list(tag_columns) else []
+
+        # Assume that all columns not listed as tag columns are field columns
+        if not field_columns:
+            field_columns = list(column_series[~column_series.isin(
+                tag_columns)])
+
+        precision_factor = {
+            "n": 1,
+            "u": 1e3,
+            "ms": 1e6,
+            "s": 1e9,
+            "m": 1e9 * 60,
+            "h": 1e9 * 3600,
+        }.get(time_precision, 1)
+
+        # Make array of timestamp ints
+        time = ((dataframe.index.to_datetime().values.astype(int) /
+                 precision_factor).astype(int).astype(str))
+
+        # If tag columns exist, make an array of formatted tag keys and values
+        if tag_columns:
+            tag_df = dataframe[tag_columns]
+            tag_df = self._stringify_dataframe(
+                tag_df, numeric_precision, datatype='tag')
+            tags = (',' + (
+                (tag_df.columns.values + '=').tolist() + tag_df)).sum(axis=1)
+            del tag_df
+
+        else:
+            tags = ''
+
+        # Make an array of formatted field keys and values
+        field_df = dataframe[field_columns]
+        field_df = self._stringify_dataframe(
+            field_df, numeric_precision, datatype='field')
+        field_df = (field_df.columns.values + '=').tolist() + field_df
+        field_df[field_df.columns[1:]] = ',' + field_df[field_df.columns[1:]]
+        fields = field_df.sum(axis=1)
+        del field_df
+
+        # Add any global tags to formatted tag strings
+        if global_tags:
+            global_tags = ','.join(['='.join([tag, global_tags[tag]])
+                                    for tag in global_tags])
+            if tag_columns:
+                tags = tags + ',' + global_tags
+            else:
+                tags = ',' + global_tags
+
+        # Generate line protocol string
+        points = (measurement + tags + ' ' + fields + ' ' + time).tolist()
+        return points
+
+    def _stringify_dataframe(self,
+                             dataframe,
+                             numeric_precision,
+                             datatype='field'):
+        int_columns = dataframe.select_dtypes(include=['int']).columns
+        float_columns = dataframe.select_dtypes(include=['floating']).columns
+        nonfloat_columns = dataframe.columns[~dataframe.columns.isin(
+            float_columns)]
+        numeric_columns = dataframe.select_dtypes(include=['number']).columns
+        string_columns = dataframe.select_dtypes(include=['object']).columns
+
+        # Convert dataframe to string
+        if numeric_precision is None:
+            dataframe = dataframe.astype(str)
+        elif numeric_precision == 'full':
+            dataframe[float_columns] = dataframe[float_columns].applymap(repr)
+            dataframe[nonfloat_columns] = (dataframe[nonfloat_columns]
+                                           .astype(str))
+        elif isinstance(numeric_precision, int):
+            dataframe[numeric_columns] = (dataframe[numeric_columns]
+                                          .round(numeric_precision))
+            dataframe = dataframe.astype(str)
+        else:
+            raise ValueError('Invalid numeric precision')
+
+        if datatype == 'field':
+            dataframe[int_columns] = dataframe[int_columns] + 'i'
+            dataframe[string_columns] = '"' + dataframe[string_columns] + '"'
+
+        dataframe.columns = dataframe.columns.astype(str)
+        return dataframe
+
     def _datetime_to_epoch(self, datetime, time_precision='s'):
         seconds = (datetime - self.EPOCH).total_seconds()
         if time_precision == 'h':
 
@@ -252,7 +252,8 @@ def request(self, url, method='GET', params=None, data=None,
         else:
             raise InfluxDBClientError(response.content, response.status_code)
 
-    def write(self, data, params=None, expected_response_code=204):
+    def write(self, data, params=None, expected_response_code=204,
+              protocol='json'):
         """Write data to InfluxDB.
 
         :param data: the data to be written
@@ -274,11 +275,16 @@ def write(self, data, params=None, expected_response_code=204):
         else:
             precision = None
 
+        if protocol == 'json':
+            data = make_lines(data, precision).encode('utf-8')
+        elif protocol == 'line':
+            data = ('\n'.join(data) + '\n').encode('utf-8')
+
         self.request(
             url="write",
             method='POST',
             params=params,
-            data=make_lines(data, precision).encode('utf-8'),
+            data=data,
             expected_response_code=expected_response_code,
             headers=headers
         )
@@ -351,6 +357,7 @@ def write_points(self,
                      retention_policy=None,
                      tags=None,
                      batch_size=None,
+                     protocol='json'
                      ):
         """Write to multiple time series names.
 
@@ -375,6 +382,7 @@ def write_points(self,
         :type batch_size: int
         :returns: True, if the operation is successful
         :rtype: bool
+        :param protocol: Protocol for writing data. Either 'line' or 'json'.
 
         .. note:: if no retention policy is specified, the default retention
             policy for the database is used
@@ -386,14 +394,14 @@ def write_points(self,
                                    time_precision=time_precision,
                                    database=database,
                                    retention_policy=retention_policy,
-                                   tags=tags)
+                                   tags=tags, protocol=protocol)
             return True
         else:
             return self._write_points(points=points,
                                       time_precision=time_precision,
                                       database=database,
                                       retention_policy=retention_policy,
-                                      tags=tags)
+                                      tags=tags, protocol=protocol)
 
     def _batches(self, iterable, size):
         for i in xrange(0, len(iterable), size):
@@ -404,7 +412,8 @@ def _write_points(self,
                       time_precision,
                       database,
                       retention_policy,
-                      tags):
+                      tags,
+                      protocol='json'):
         if time_precision not in ['n', 'u', 'ms', 's', 'm', 'h', None]:
             raise ValueError(
                 "Invalid time precision is given. "
@@ -415,12 +424,15 @@ def _write_points(self,
                 "InfluxDB only supports seconds precision for udp writes"
             )
 
-        data = {
-            'points': points
-        }
+        if protocol == 'json':
+            data = {
+                'points': points
+            }
 
-        if tags is not None:
-            data['tags'] = tags
+            if tags is not None:
+                data['tags'] = tags
+        else:
+            data = points
 
         params = {
             'db': database or self._database
@@ -433,12 +445,13 @@ def _write_points(self,
             params['rp'] = retention_policy
 
         if self.use_udp:
-            self.send_packet(data)
+            self.send_packet(data, protocol=protocol)
         else:
             self.write(
                 data=data,
                 params=params,
-                expected_response_code=204
+                expected_response_code=204,
+                protocol=protocol
             )
 
         return True
@@ -737,13 +750,16 @@ def get_list_privileges(self, username):
         text = "SHOW GRANTS FOR {0}".format(username)
         return list(self.query(text).get_points())
 
-    def send_packet(self, packet):
+    def send_packet(self, packet, protocol='json'):
         """Send an UDP packet.
 
         :param packet: the packet to be sent
         :type packet: dict
         """
-        data = make_lines(packet).encode('utf-8')
+        if protocol == 'json':
+            data = make_lines(packet).encode('utf-8')
+        elif protocol == 'line':
+            data = ('\n'.join(data) + '\n').encode('utf-8')
         self.udp_socket.sendto(data, (self._host, self.udp_port))