Merge PR#86 (Thanks @timtroendle!)

aviau · aviau · commit b8206ed140b9 · 2014-11-21T14:44:32.000-05:00
Added support for Pandas DataFrames
diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -1,5 +1,6 @@
 requests
 nose
 mock
+pandas
 Sphinx==1.2.3
-sphinx_rtd_theme
+sphinx_rtd_theme
diff --git a/examples/tutorial_pandas.py b/examples/tutorial_pandas.py
@@ -0,0 +1,45 @@
+import argparse
+import pandas as pd
+
+from influxdb.misc import DataFrameClient
+
+
+def main(host='localhost', port=8086):
+    user = 'root'
+    password = 'root'
+    dbname = 'example'
+
+    client = DataFrameClient(host, port, user, password, dbname)
+
+    print("Create pandas DataFrame")
+    df = pd.DataFrame(data=list(range(30)),
+                      index=pd.date_range(start='2014-11-16',
+                                          periods=30, freq='H'))
+
+    print("Create database: " + dbname)
+    client.create_database(dbname)
+
+    print("Write DataFrame")
+    client.write_points({'demo':df})
+
+    print("Read DataFrame")
+    client.query("select * from demo")
+
+    print("Delete database: " + dbname)
+    client.delete_database(dbname)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='example code to play with InfluxDB')
+    parser.add_argument('--host', type=str, required=False,
+                        default='localhost',
+                        help='hostname of InfluxDB http API')
+    parser.add_argument('--port', type=int, required=False, default=8086,
+                        help='port of InfluxDB http API')
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(host=args.host, port=args.port)
diff --git a/influxdb/client.py b/influxdb/client.py
@@ -166,12 +166,13 @@ def request(self, url, method='GET', params=None, data=None,
     # by doing a POST to /db/foo_production/series?u=some_user&p=some_password
     # with a JSON body of points.
 
-    def write_points(self, *args, **kwargs):
+    def write_points(self, data, *args, **kwargs):
         """
         write_points()
 
         Write to multiple time series names.
 
+        :param data: A list of dicts.
         :param batch_size: [Optional] Value to write the points in batches
             instead of all at one time. Useful for when doing data dumps from
             one database to another or when doing a massive write operation
@@ -186,25 +187,25 @@ def list_chunks(l, n):
 
         batch_size = kwargs.get('batch_size')
         if batch_size:
-            for data in kwargs.get('data'):
-                name = data.get('name')
-                columns = data.get('columns')
-                point_list = data.get('points')
+            for item in data:
+                name = item.get('name')
+                columns = item.get('columns')
+                point_list = item.get('points')
 
                 for batch in list_chunks(point_list, batch_size):
-                    data = [{
+                    item = [{
                         "points": batch,
                         "name": name,
                         "columns": columns
                     }]
                     time_precision = kwargs.get('time_precision', 's')
                     self.write_points_with_precision(
-                        data=data,
+                        data=item,
                         time_precision=time_precision)
 
                 return True
 
-        return self.write_points_with_precision(*args, **kwargs)
+        return self.write_points_with_precision(data, *args, **kwargs)
 
     def write_points_with_precision(self, data, time_precision='s'):
         """
@@ -298,6 +299,11 @@ def remove_scheduled_delete(self, delete_id):
     def query(self, query, time_precision='s', chunked=False):
         """
         Quering data
+
+        :param time_precision: [Optional, default 's'] Either 's', 'm', 'ms'
+            or 'u'.
+        :param chunked: [Optional, default=False] True if the data shall be
+            retrieved in chunks, False otherwise.
         """
         if time_precision not in ['s', 'm', 'ms', 'u']:
             raise Exception(
diff --git a/influxdb/misc.py b/influxdb/misc.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+"""
+Miscellaneous
+"""
+from time import mktime
+
+from .client import InfluxDBClient
+
+
+class DataFrameClient(InfluxDBClient):
+    """
+    The ``DataFrameClient`` object holds information necessary to connect
+    to InfluxDB. Requests can be made to InfluxDB directly through the client.
+    The client reads and writes from pandas DataFrames.
+    """
+
+    def write_points(self, data, *args, **kwargs):
+        """
+        write_points()
+
+        Write to multiple time series names.
+
+        :param data: A dictionary mapping series names to pandas DataFrames
+        :param batch_size: [Optional] Value to write the points in batches
+            instead of all at one time. Useful for when doing data dumps from
+            one database to another or when doing a massive write operation
+        :type batch_size: int
+        """
+
+        data = [self._convert_dataframe_to_json(name=key, dataframe=value)
+                for key, value in data.items()]
+        return InfluxDBClient.write_points_with_precision(self, data,
+                                                          *args, **kwargs)
+
+    def write_points_with_precision(self, data, time_precision='s'):
+        """
+        Write to multiple time series names
+        """
+        return self.write_points(data, time_precision='s')
+
+    def query(self, query, time_precision='s', chunked=False):
+        """
+        Quering data into a DataFrame.
+
+        :param time_precision: [Optional, default 's'] Either 's', 'm', 'ms'
+            or 'u'.
+        :param chunked: [Optional, default=False] True if the data shall be
+            retrieved in chunks, False otherwise.
+
+        """
+        result = InfluxDBClient.query(self, query=query,
+                                      time_precision=time_precision,
+                                      chunked=chunked)
+        return self._to_dataframe(result[0], time_precision)
+
+    def _to_dataframe(self, json_result, time_precision):
+        try:
+            import pandas as pd
+        except ImportError:
+            raise ImportError('pandas required for retrieving as dataframe.')
+        dataframe = pd.DataFrame(data=json_result['points'],
+                                 columns=json_result['columns'])
+        pandas_time_unit = time_precision
+        if time_precision == 'm':
+            pandas_time_unit = 'ms'
+        elif time_precision == 'u':
+            pandas_time_unit = 'us'
+        dataframe.index = pd.to_datetime(list(dataframe['time']),
+                                         unit=pandas_time_unit,
+                                         utc=True)
+        del dataframe['time']
+        return dataframe
+
+    def _convert_dataframe_to_json(self, dataframe, name):
+        try:
+            import pandas as pd
+        except ImportError:
+            raise ImportError('pandas required for writing as dataframe.')
+        if not isinstance(dataframe, pd.DataFrame):
+            raise TypeError('Must be DataFrame, but type was: {}.'
+                            .format(type(dataframe)))
+        if not (isinstance(dataframe.index, pd.tseries.period.PeriodIndex) or
+                isinstance(dataframe.index, pd.tseries.index.DatetimeIndex)):
+            raise TypeError('Must be DataFrame with DatetimeIndex or \
+                            PeriodIndex.')
+        dataframe.index = dataframe.index.to_datetime()
+        dataframe['time'] = [mktime(dt.timetuple()) for dt in dataframe.index]
+        data = {'name': name,
+                'columns': [str(column) for column in dataframe.columns],
+                'points': list([list(x) for x in dataframe.values])}
+        return data
diff --git a/test-requirements.txt b/test-requirements.txt
@@ -1,3 +1,4 @@
 nose
 mock
 requests-mock
+pandas
diff --git a/tests/influxdb/misc_test.py b/tests/influxdb/misc_test.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+"""
+unit tests for misc module
+"""
+import unittest
+import json
+import requests_mock
+from nose.tools import raises
+from datetime import datetime, timedelta
+import time
+import pandas as pd
+from pandas.util.testing import assert_frame_equal
+
+from influxdb.misc import DataFrameClient
+from .client_test import _mocked_session
+
+
+class TestDataFrameClient(unittest.TestCase):
+
+    def test_write_points_from_dataframe(self):
+        now = datetime(2014, 11, 15, 15, 42, 44, 543)
+        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
+                                 index=[now, now + timedelta(hours=1)],
+                                 columns=["column_one", "column_two",
+                                          "column_three"])
+        points = [
+            {
+                "points": [
+                    ["1", 1, 1.0, time.mktime(now.timetuple())],
+                    ["2", 2, 2.0, time.mktime((now + timedelta(hours=1))
+                                              .timetuple())]
+                ],
+                "name": "foo",
+                "columns": ["column_one", "column_two", "column_three", "time"]
+            }
+        ]
+
+        with requests_mock.Mocker() as m:
+            m.register_uri(requests_mock.POST,
+                           "http://localhost:8086/db/db/series")
+
+            cli = DataFrameClient(database='db')
+            cli.write_points({"foo": dataframe})
+
+            self.assertListEqual(json.loads(m.last_request.body), points)
+
+    def test_write_points_from_dataframe_with_numeric_column_names(self):
+        now = datetime(2014, 11, 15, 15, 42, 44, 543)
+        # df with numeric column names
+        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
+                                 index=[now, now + timedelta(hours=1)])
+        points = [
+            {
+                "points": [
+                    ["1", 1, 1.0, time.mktime(now.timetuple())],
+                    ["2", 2, 2.0, time.mktime((now + timedelta(hours=1))
+                                              .timetuple())]
+                ],
+                "name": "foo",
+                "columns": ['0', '1', '2', "time"]
+            }
+        ]
+
+        with requests_mock.Mocker() as m:
+            m.register_uri(requests_mock.POST,
+                           "http://localhost:8086/db/db/series")
+
+            cli = DataFrameClient(database='db')
+            cli.write_points({"foo": dataframe})
+
+            self.assertListEqual(json.loads(m.last_request.body), points)
+
+    def test_write_points_from_dataframe_with_period_index(self):
+        now = datetime(2014, 11, 16)
+        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
+                                 index=[pd.Period('2014-11-16'),
+                                        pd.Period('2014-11-17')],
+                                 columns=["column_one", "column_two",
+                                          "column_three"])
+        points = [
+            {
+                "points": [
+                    ["1", 1, 1.0, time.mktime(now.timetuple())],
+                    ["2", 2, 2.0, time.mktime((now + timedelta(hours=24))
+                                              .timetuple())]
+                ],
+                "name": "foo",
+                "columns": ["column_one", "column_two", "column_three", "time"]
+            }
+        ]
+
+        with requests_mock.Mocker() as m:
+            m.register_uri(requests_mock.POST,
+                           "http://localhost:8086/db/db/series")
+
+            cli = DataFrameClient(database='db')
+            cli.write_points({"foo": dataframe})
+
+            self.assertListEqual(json.loads(m.last_request.body), points)
+
+    @raises(TypeError)
+    def test_write_points_from_dataframe_fails_without_time_index(self):
+        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
+                                 columns=["column_one", "column_two",
+                                          "column_three"])
+
+        with requests_mock.Mocker() as m:
+            m.register_uri(requests_mock.POST,
+                           "http://localhost:8086/db/db/series")
+
+            cli = DataFrameClient(database='db')
+            cli.write_points({"foo": dataframe})
+
+    @raises(TypeError)
+    def test_write_points_from_dataframe_fails_with_series(self):
+        now = datetime(2014, 11, 16)
+        dataframe = pd.Series(data=[1.0, 2.0],
+                              index=[now, now + timedelta(hours=1)])
+
+        with requests_mock.Mocker() as m:
+            m.register_uri(requests_mock.POST,
+                           "http://localhost:8086/db/db/series")
+
+            cli = DataFrameClient(database='db')
+            cli.write_points({"foo": dataframe})
+
+    def test_query_into_dataframe(self):
+        data = [
+            {
+                "name": "foo",
+                "columns": ["time", "sequence_number", "column_one"],
+                "points": [
+                    [1383876043, 16, 2], [1383876043, 15, 1],
+                    [1383876035, 14, 2], [1383876035, 13, 1]
+                ]
+            }
+        ]
+        dataframe = pd.DataFrame(data=[[16, 2], [15, 1], [14, 2], [13, 1]],
+                                 index=pd.to_datetime([1383876043, 1383876043,
+                                                      1383876035, 1383876035],
+                                                      unit='s', utc=True),
+                                 columns=['sequence_number', 'column_one'])
+        with _mocked_session('get', 200, data):
+            cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
+            result = cli.query('select column_one from foo;')
+            assert_frame_equal(dataframe, result)

-Original file line number
+Diff line change
@@ @@ -1,3 +1,4 @@ @@
 nose
 mock
 requests-mock
 +pandas