Add support for custom indexes for query in the DataFrameClient (influxdata#785)

rolincova · rolincova · commit de75e7351ce3 · 2020-05-28T10:41:50.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ### Added
 - Add support for custom headers in the InfluxDBClient (#710 thx @nathanielatom)
+- Add support for custom indexes for query in the DataFrameClient (#785)
 
 ### Changed
 - Amend retry to avoid sleep after last retry before raising exception (#790 thx @krzysbaranski)
diff --git a/influxdb/_dataframe_client.py b/influxdb/_dataframe_client.py
@@ -8,6 +8,7 @@
 
 import math
 from collections import defaultdict
+from typing import List
 
 import pandas as pd
 import numpy as np
@@ -152,7 +153,8 @@ def query(self,
               chunked=False,
               chunk_size=0,
               method="GET",
-              dropna=True):
+              dropna=True,
+              data_frame_index: List[str] = None):
         """
         Query data into a DataFrame.
 
@@ -181,6 +183,7 @@ def query(self,
             containing all results within that chunk
         :param chunk_size: Size of each chunk to tell InfluxDB to use.
         :param dropna: drop columns where all values are missing
+        :param data_frame_index: the list of columns that are used as DataFrame index
         :returns: the queried data
         :rtype: :class:`~.ResultSet`
         """
@@ -196,13 +199,13 @@ def query(self,
         results = super(DataFrameClient, self).query(query, **query_args)
         if query.strip().upper().startswith("SELECT"):
             if len(results) > 0:
-                return self._to_dataframe(results, dropna)
+                return self._to_dataframe(results, dropna, data_frame_index=data_frame_index)
             else:
                 return {}
         else:
             return results
 
-    def _to_dataframe(self, rs, dropna=True):
+    def _to_dataframe(self, rs, dropna=True, data_frame_index: List[str] = None):
         result = defaultdict(list)
         if isinstance(rs, list):
             return map(self._to_dataframe, rs,
@@ -216,10 +219,15 @@ def _to_dataframe(self, rs, dropna=True):
                 key = (name, tuple(sorted(tags.items())))
             df = pd.DataFrame(data)
             df.time = pd.to_datetime(df.time)
-            df.set_index('time', inplace=True)
-            if df.index.tzinfo is None:
-                df.index = df.index.tz_localize('UTC')
-            df.index.name = None
+
+            if data_frame_index:
+                df.set_index(data_frame_index, inplace=True)
+            else:
+                df.set_index('time', inplace=True)
+                if df.index.tzinfo is None:
+                    df.index = df.index.tz_localize('UTC')
+                df.index.name = 'time'
+
             result[key].append(df)
         for key, data in result.items():
             df = pd.concat(data).sort_index()
diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py
@@ -1240,3 +1240,36 @@ def test_write_points_from_dataframe_with_tags_and_nan_json(self):
             cli.write_points(dataframe, 'foo', tags=None, protocol='json',
                              tag_columns=['tag_one', 'tag_two'])
             self.assertEqual(m.last_request.body, expected)
+
+    def test_query_custom_index(self):
+        data = {
+            "results": [
+                {
+                    "series": [
+                        {
+                            "name": "cpu_load_short",
+                            "columns": ["time", "value", "host"],
+                            "values": [
+                                [1, 0.55, "local"],
+                                [2, 23422, "local"],
+                                [3, 0.64, "local"]
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }
+
+        cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
+        iql = "SELECT value FROM cpu_load_short WHERE region=$region;" \
+              "SELECT count(value) FROM cpu_load_short WHERE region=$region"
+        bind_params = {'region': 'us-west'}
+        with _mocked_session(cli, 'GET', 200, data):
+            result = cli.query(iql, bind_params=bind_params, data_frame_index=["time", "host"])
+
+            _data_frame = result['cpu_load_short']
+            print(_data_frame)
+
+            self.assertListEqual(["time", "host"], list(_data_frame.index.names))
+
+