Merge branch 'no_dataframe_index' of bitbucket.org:janschulz/python-tabulate into feature/data-frame-index

astanin · astanin · commit f7b6d9c2c78e · 2015-08-19T13:10:04.000+02:00
diff --git a/tabulate.py b/tabulate.py
@@ -598,7 +598,7 @@ def _align_header(header, alignment, width):
         return _padleft(width, header)
 
 
-def _normalize_tabular_data(tabular_data, headers):
+def _normalize_tabular_data(tabular_data, headers, index=None):
     """Transform a supported data type to a list of lists, and a list of headers.
 
     Supported tabular data types:
@@ -634,8 +634,10 @@ def _normalize_tabular_data(tabular_data, headers):
             # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
             keys = tabular_data.keys()
             vals = tabular_data.values  # values matrix doesn't need to be transposed
-            names = tabular_data.index
-            rows = [[v]+list(row) for v,row in zip(names, vals)]
+            # For DataFrames add an index per default
+            if index in [None, True]:
+                index = tabular_data.index
+            rows = [list(row) for row in vals]
         else:
             raise ValueError("tabular data doesn't appear to be a dict or a DataFrame")
 
@@ -687,6 +689,7 @@ def _normalize_tabular_data(tabular_data, headers):
             elif headers:
                 raise ValueError('headers for a list of dicts is not a dict or a keyword')
             rows = [[row.get(k) for k in keys] for row in rows]
+
         elif headers == "keys" and len(rows) > 0:
             # keys are column indices
             headers = list(map(_text_type, range(len(rows[0]))))
@@ -696,6 +699,18 @@ def _normalize_tabular_data(tabular_data, headers):
         headers = list(map(_text_type, rows[0])) # headers should be strings
         rows = rows[1:]
 
+    # Add an index column, either from a supplied list of index values or from the dataframe index
+    # or simple a running count if index==True
+    if index:
+        if index is True:
+            index = range(len(rows))
+        elif index:
+            index = list(index)
+            if len(index) != len(rows):
+                raise ValueError('index must be as long as the rows (excluding a header row if '
+                                 '"headers=firstrow"')
+        rows = [[v]+list(row) for v,row in zip(index, rows)]
+
     headers = list(map(_text_type,headers))
     rows = list(map(list,rows))
 
@@ -711,7 +726,7 @@ def _normalize_tabular_data(tabular_data, headers):
 
 def tabulate(tabular_data, headers=(), tablefmt="simple",
              floatfmt="g", numalign="decimal", stralign="left",
-             missingval=""):
+             missingval="", index=None):
     """Format a fixed width table for pretty printing.
 
     >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]]))
@@ -743,6 +758,11 @@ def tabulate(tabular_data, headers=(), tablefmt="simple",
     are supposed to be names of the last columns. This is consistent
     with the plain-text format of R and Pandas' dataframes.
 
+    If `index=True` or if `tabular_data` is a pandas.DataFrame, a column with
+    a row count or the index of the dataframe is shown. `index=False` does not
+    show an index. If `index` is an iterable, this value is used as the index
+    row (must be as long as the number of rows).
+
     >>> print(tabulate([["sex","age"],["Alice","F",24],["Bob","M",19]],
     ...       headers="firstrow"))
            sex      age
@@ -946,7 +966,8 @@ def tabulate(tabular_data, headers=(), tablefmt="simple",
     """
     if tabular_data is None:
         tabular_data = []
-    list_of_lists, headers = _normalize_tabular_data(tabular_data, headers)
+    list_of_lists, headers = _normalize_tabular_data(tabular_data, headers,
+                                                     index=index)
 
     # optimization: look for ANSI control codes once,
     # enable smart width functions only if a control code is found
diff --git a/test/test_output.py b/test/test_output.py
@@ -5,7 +5,7 @@
 from __future__ import print_function
 from __future__ import unicode_literals
 from tabulate import tabulate, simple_separated_format
-from common import assert_equal
+from common import assert_equal, assert_raises
 
 
 # _test_table shows
@@ -415,3 +415,86 @@ def test_unaligned_separated():
                       ["name", "score"],
                       tablefmt=fmt, stralign=None, numalign=None)
     assert_equal(expected, result)
+
+
+def test_pandas_without_index():
+    "Output: a pandas Dataframe without an index"
+    try:
+        import pandas
+        df = pandas.DataFrame([["one",1],["two",None]],
+                              columns=["string","number"],
+                              index=["a","b"])
+        expected = "\n".join(
+            ['string      number',
+             '--------  --------',
+             'one              1',
+             'two            nan'])
+        result   = tabulate(df, headers="keys", index=False)
+        assert_equal(expected, result)
+    except ImportError:
+        print("test_pandas_keys is skipped")
+        raise SkipTest()   # this test is optional
+
+
+def test_dict_like_with_index():
+    "Output: a table with a running index"
+    dd = {"a": range(3), "b": range(101,104)}
+    # keys' order (hence columns' order) is not deterministic in Python 3
+    # => we have to consider both possible results as valid
+    expected = "\n".join([
+        '      a    b',
+        '--  ---  ---',
+        ' 0    0  101',
+        ' 1    1  102',
+        ' 2    2  103'])
+    result    = tabulate(dd, "keys", index=True)
+    assert_equal(result, expected)
+
+
+def test_list_of_lists_with_index():
+    "Output: a table with a running index"
+    dd = zip(*[range(3), range(101,104)])
+    # keys' order (hence columns' order) is not deterministic in Python 3
+    # => we have to consider both possible results as valid
+    expected = "\n".join([
+        '      a    b',
+        '--  ---  ---',
+        ' 0    0  101',
+        ' 1    1  102',
+        ' 2    2  103'])
+    result    = tabulate(dd, headers=["a","b"], index=True)
+    assert_equal(result, expected)
+
+def test_list_of_lists_with_supplied_index():
+    "Output: a table with a supplied index"
+    dd = zip(*[range(3), range(101,104)])
+    # keys' order (hence columns' order) is not deterministic in Python 3
+    # => we have to consider both possible results as valid
+    expected = "\n".join([
+        '      a    b',
+        '--  ---  ---',
+        ' 1    0  101',
+        ' 2    1  102',
+        ' 3    2  103'])
+    result    = tabulate(dd, headers=["a","b"], index=[1,2,3])
+    assert_equal(result, expected)
+    # the index must be as long as the number of rows
+    assert_raises(ValueError, lambda: tabulate(dd, headers=["a","b"], index=[1,2]))
+
+
+
+def test_list_of_lists_with_index_firstrow():
+    "Output: a table with a running index which takes into account header='firstrow'"
+    dd = zip(*[["a"]+range(3), ["b"]+range(101,104)])
+    # keys' order (hence columns' order) is not deterministic in Python 3
+    # => we have to consider both possible results as valid
+    expected = "\n".join([
+        '      a    b',
+        '--  ---  ---',
+        ' 0    0  101',
+        ' 1    1  102',
+        ' 2    2  103'])
+    result    = tabulate(dd, headers="firstrow", index=True)
+    assert_equal(result, expected)
+        # the index must be as long as the number of rows
+    assert_raises(ValueError, lambda: tabulate(dd, headers="firstrow", index=[1,2]))