From 85cb4763cf72d4a0dbcdfdb02fccd5c1a5f9b1c2 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Mon, 5 Feb 2018 17:12:18 +0100 Subject: [PATCH 1/3] fix #575 and fix #371 : renamed argument 'transpose' of to_csv as 'wide' + added argument 'wide' to to_excel --- doc/source/changes/version_0_28.rst.inc | 35 ++++++++++++++++++- larray/core/array.py | 46 +++++++++++++++---------- larray/tests/test_array.py | 23 ++++++++++++- 3 files changed, 84 insertions(+), 20 deletions(-) diff --git a/doc/source/changes/version_0_28.rst.inc b/doc/source/changes/version_0_28.rst.inc index a5ecea095..4671da090 100644 --- a/doc/source/changes/version_0_28.rst.inc +++ b/doc/source/changes/version_0_28.rst.inc @@ -211,7 +211,6 @@ Miscellaneous improvements a0 0 0 a1 1 1 - * renamed argument `nb_index` of `read_csv`, `read_excel`, `read_sas`, `from_lists` and `from_string` functions as `nb_axes`. The relation between `nb_index` and `nb_axes` is given by `nb_axes = nb_index + 1`: @@ -234,6 +233,40 @@ Miscellaneous improvements Closes :issue:`548`: +* renamed argument `transpose` by `wide` in `to_csv` method. + +* added argument `wide` in `to_excel` method. When argument `wide` is set to False, the array is exported + in "narrow" format, i.e. one column per axis plus one value column: + + >>> arr = ndtest((2, 3)) + >>> arr + a\b b0 b1 b2 + a0 0 1 2 + a1 3 4 5 + + Default behavior (`wide=True`): + + >>> arr.to_excel('my_file.xlsx') + + a\b b0 b1 b2 + a0 0 1 2 + a1 3 4 5 + + With `wide=False`: + + >>> arr.to_excel('my_file.xlsx', wide=False) + + a b value + a0 b0 0 + a0 b1 1 + a0 b2 2 + a1 b0 3 + a1 b1 4 + a1 b2 5 + + Argument `transpose` has a different purpose than `wide` and is mainly useful to allow multiple axes as header + when exporting arrays with more than 2 dimensions. Closes :issue:`575` and :issue:`371`. + Fixes ----- diff --git a/larray/core/array.py b/larray/core/array.py index ee08ef270..a53d0902c 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -5816,7 +5816,8 @@ def clip(self, a_min, a_max, out=None): from larray.core.ufuncs import clip return clip(self, a_min, a_max, out) - def to_csv(self, filepath, sep=',', na_rep='', transpose=True, dropna=None, dialect='default', **kwargs): + @deprecate_kwarg('transpose', 'wide') + def to_csv(self, filepath, sep=',', na_rep='', wide=True, dropna=None, dialect='default', **kwargs): """ Writes array to a csv file. @@ -5824,15 +5825,16 @@ def to_csv(self, filepath, sep=',', na_rep='', transpose=True, dropna=None, dial ---------- filepath : str path where the csv file has to be written. - sep : str - seperator for the csv file. - na_rep : str - replace NA values with na_rep. - transpose : boolean - transpose = True => transpose over last axis. - transpose = False => no transpose. - dialect : 'default' | 'classic' - Whether or not to write the last axis name (using '\' ) + sep : str, optional + separator for the csv file. Defaults to `,`. + na_rep : str, optional + replace NA values with na_rep. Defaults to ''. + wide : boolean, optional + Whether or not writing arrays in "wide" format. If True, arrays are exported with the last axis + represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one + value column. Defaults to True. + dialect : 'default' | 'classic', optional + Whether or not to write the last axis name (using '\' ). Defaults to 'default'. dropna : None, 'all', 'any' or True, optional Drop lines if 'all' its values are NA, if 'any' value is NA or do not drop any line (default). True is equivalent to 'all'. @@ -5852,7 +5854,7 @@ def to_csv(self, filepath, sep=',', na_rep='', transpose=True, dropna=None, dial nat\\sex,M,F BE,0,1 FO,2,3 - >>> a.to_csv(fname, sep=';', transpose=False) + >>> a.to_csv(fname, sep=';', wide=False) >>> with open(fname) as f: ... print(f.read().strip()) nat;sex;0 @@ -5868,7 +5870,7 @@ def to_csv(self, filepath, sep=',', na_rep='', transpose=True, dropna=None, dial FO,2,3 """ fold = dialect == 'default' - if transpose: + if wide: frame = self.to_frame(fold, dropna) frame.to_csv(filepath, sep=sep, na_rep=na_rep, **kwargs) else: @@ -5900,7 +5902,7 @@ def to_hdf(self, filepath, key, *args, **kwargs): self.to_frame().to_hdf(filepath, key, *args, **kwargs) def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file=False, clear_sheet=False, - header=True, transpose=False, engine=None, *args, **kwargs): + header=True, transpose=False, wide=True, engine=None, *args, **kwargs): """ Writes array in the specified sheet of specified excel workbook. @@ -5923,8 +5925,12 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file header : bool, optional Whether or not to write a header (axes names and labels). Defaults to True. transpose : bool, optional - Whether or not to transpose the resulting array. This can be used, for example, for writing one dimensional - arrays vertically. Defaults to False. + Whether or not to transpose the array transpose over last axis. + This is equivalent to paste with option transpose in Excel. Defaults to False. + wide : boolean, optional + Whether or not writing arrays in "wide" format. If True, arrays are exported with the last axis + represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one + value column. Defaults to True. engine : 'xlwings' | 'openpyxl' | 'xlsxwriter' | 'xlwt' | None, optional Engine to use to make the output. If None (default), it will use 'xlwings' by default if the module is installed and relies on Pandas default writer otherwise. @@ -5943,7 +5949,11 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file """ sheet_name = _translate_sheet_name(sheet_name) - df = self.to_frame(fold_last_axis_name=True) + if wide: + pd_obj = self.to_frame(fold_last_axis_name=True) + else: + pd_obj = self.to_series() + if engine is None: engine = 'xlwings' if xw is not None else None @@ -5977,7 +5987,7 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file sheet = wb.sheets.add(sheet_name, after=wb.sheets[-1]) options = dict(header=header, index=header, transpose=transpose) - sheet[position].options(**options).value = df + sheet[position].options(**options).value = pd_obj # TODO: implement transpose via/in dump # sheet[position] = self.dump(header=header, transpose=transpose) if close: @@ -5988,7 +5998,7 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file sheet_name = 'Sheet1' # TODO: implement position in this case # startrow, startcol - df.to_excel(filepath, sheet_name, *args, engine=engine, **kwargs) + pd_obj.to_excel(filepath, sheet_name, *args, engine=engine, **kwargs) def to_clipboard(self, *args, **kwargs): """Sends the content of the array to clipboard. diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index c6a218c89..b1e399377 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -3187,7 +3187,8 @@ def test_to_csv(self): with open(self.tmp_path('out.csv')) as f: self.assertEqual(f.readlines()[:3], result) - la.to_csv(self.tmp_path('out.csv'), transpose=False) + # stacked data (one column containing all the values and another column listing the context of the value) + la.to_csv(self.tmp_path('out.csv'), wide=False) result = ['arr,age,sex,nat,time,0\n', '1,0,F,1,2007,3722\n', '1,0,F,1,2010,3395\n'] @@ -3217,6 +3218,13 @@ def test_to_excel_xlsxwriter(self): res = read_excel(fpath, engine='xlrd') assert_array_equal(res, a1) + # fpath/Sheet1/A1 + # stacked data (one column containing all the values and another column listing the context of the value) + a1.to_excel(fpath, wide=False, engine='xlsxwriter') + res = read_excel(fpath, engine='xlrd') + stacked_a1 = a1.reshape([a1.a, Axis([0])]) + assert_array_equal(res, stacked_a1) + # 2D a2 = ndtest((2, 3)) @@ -3270,6 +3278,13 @@ def test_to_excel_xlsxwriter(self): res = read_excel(fpath, engine='xlrd') assert_array_equal(res, a1) + # fpath/Sheet1/A1 + # stacked data (one column containing all the values and another column listing the context of the value) + a1.to_excel(fpath, wide=False, engine='xlsxwriter') + res = read_excel(fpath, engine='xlrd') + stacked_a1 = a1.reshape([a1.a, Axis([0])]) + assert_array_equal(res, stacked_a1) + # 2D a2 = ndtest((2, 3)) @@ -3352,6 +3367,12 @@ def test_to_excel_xlwings(self): res = read_excel(fpath, engine='xlrd') assert_array_equal(res, a1) + # fpath/Sheet1/A1 + # stacked data (one column containing all the values and another column listing the context of the value) + a1.to_excel(fpath, wide=False, engine='xlwings') + res = read_excel(fpath, engine='xlrd') + assert_array_equal(res, a1) + # 2D a2 = ndtest((2, 3)) From 61e2bb90fc79124fe204d302a88f11fcb1960de0 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Mon, 5 Feb 2018 15:03:16 +0100 Subject: [PATCH 2/3] added argument 'name' to to_series --- doc/source/changes/version_0_28.rst.inc | 2 ++ larray/core/array.py | 39 +++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/doc/source/changes/version_0_28.rst.inc b/doc/source/changes/version_0_28.rst.inc index 4671da090..ddd5453e7 100644 --- a/doc/source/changes/version_0_28.rst.inc +++ b/doc/source/changes/version_0_28.rst.inc @@ -267,6 +267,8 @@ Miscellaneous improvements Argument `transpose` has a different purpose than `wide` and is mainly useful to allow multiple axes as header when exporting arrays with more than 2 dimensions. Closes :issue:`575` and :issue:`371`. +* added argument `name` to `to_series` method allowing to set a name to the Pandas Series returned by the method. + Fixes ----- diff --git a/larray/core/array.py b/larray/core/array.py index a53d0902c..7e90bd45b 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -975,12 +975,14 @@ def to_frame(self, fold_last_axis_name=False, dropna=None): return df df = property(to_frame) - def to_series(self, dropna=False): + def to_series(self, name=None, dropna=False): """ Converts LArray into Pandas Series. Parameters ---------- + name : str, optional + Name of the series. Defaults to None. dropna : bool, optional. False by default. @@ -991,6 +993,10 @@ def to_series(self, dropna=False): Examples -------- >>> arr = ndtest((2, 3), dtype=float) + >>> arr + a\\b b0 b1 b2 + a0 0.0 1.0 2.0 + a1 3.0 4.0 5.0 >>> arr.to_series() # doctest: +NORMALIZE_WHITESPACE a b a0 b0 0.0 @@ -1000,9 +1006,36 @@ def to_series(self, dropna=False): b1 4.0 b2 5.0 dtype: float64 + + Set a name + + >>> arr.to_series('my_name') # doctest: +NORMALIZE_WHITESPACE + a b + a0 b0 0.0 + b1 1.0 + b2 2.0 + a1 b0 3.0 + b1 4.0 + b2 5.0 + Name: my_name, dtype: float64 + + Drop nan values + + >>> arr['b1'] = np.nan + >>> arr + a\\b b0 b1 b2 + a0 0.0 nan 2.0 + a1 3.0 nan 5.0 + >>> arr.to_series(dropna=True) # doctest: +NORMALIZE_WHITESPACE + a b + a0 b0 0.0 + b2 2.0 + a1 b0 3.0 + b2 5.0 + dtype: float64 """ index = pd.MultiIndex.from_product([axis.labels for axis in self.axes], names=self.axes.names) - series = pd.Series(np.asarray(self).reshape(self.size), index) + series = pd.Series(np.asarray(self).reshape(self.size), index, name=name) if dropna: series.dropna(inplace=True) return series @@ -5874,7 +5907,7 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, dropna=None, dialect=' frame = self.to_frame(fold, dropna) frame.to_csv(filepath, sep=sep, na_rep=na_rep, **kwargs) else: - series = self.to_series(dropna is not None) + series = self.to_series(dropna=dropna is not None) series.to_csv(filepath, sep=sep, na_rep=na_rep, header=True, **kwargs) def to_hdf(self, filepath, key, *args, **kwargs): From 734e095b5de4cf4e81434ebf4f3cdef68c600937 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Mon, 5 Feb 2018 14:41:52 +0100 Subject: [PATCH 3/3] fix #549 : added argument 'value_name' to to_csv/excel to set the name of the last column (i.e. the one containing the values) when exporting to csv/excel with wide=False --- doc/source/changes/version_0_28.rst.inc | 14 +++++++++++++ larray/core/array.py | 28 ++++++++++++++++++------- larray/tests/test_array.py | 7 +++---- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/doc/source/changes/version_0_28.rst.inc b/doc/source/changes/version_0_28.rst.inc index ddd5453e7..f789cf1a5 100644 --- a/doc/source/changes/version_0_28.rst.inc +++ b/doc/source/changes/version_0_28.rst.inc @@ -269,6 +269,20 @@ Miscellaneous improvements * added argument `name` to `to_series` method allowing to set a name to the Pandas Series returned by the method. +* added argument `value_name` to `to_csv` and `to_excel` allowing to change the default name ('value') to + the column containg the values when the argument `wide` is set to False: + + >>> arr.to_csv('my_file.csv', wide=False, value_name='data') + a,b,data + a0,b0,0 + a0,b1,1 + a0,b2,2 + a1,b0,3 + a1,b1,4 + a1,b2,5 + + Closes :issue:`549`. + Fixes ----- diff --git a/larray/core/array.py b/larray/core/array.py index 7e90bd45b..185927a3b 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -5850,7 +5850,7 @@ def clip(self, a_min, a_max, out=None): return clip(self, a_min, a_max, out) @deprecate_kwarg('transpose', 'wide') - def to_csv(self, filepath, sep=',', na_rep='', wide=True, dropna=None, dialect='default', **kwargs): + def to_csv(self, filepath, sep=',', na_rep='', wide=True, value_name='value', dropna=None, dialect='default', **kwargs): """ Writes array to a csv file. @@ -5866,6 +5866,9 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, dropna=None, dialect=' Whether or not writing arrays in "wide" format. If True, arrays are exported with the last axis represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one value column. Defaults to True. + value_name : str, optional + Name of the column containing the values (last column) in the csv file when `wide=False` (see above). + Defaults to 'value'. dialect : 'default' | 'classic', optional Whether or not to write the last axis name (using '\' ). Defaults to 'default'. dropna : None, 'all', 'any' or True, optional @@ -5890,7 +5893,15 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, dropna=None, dialect=' >>> a.to_csv(fname, sep=';', wide=False) >>> with open(fname) as f: ... print(f.read().strip()) - nat;sex;0 + nat;sex;value + BE;M;0 + BE;F;1 + FO;M;2 + FO;F;3 + >>> a.to_csv(fname, sep=';', wide=False, value_name='population') + >>> with open(fname) as f: + ... print(f.read().strip()) + nat;sex;population BE;M;0 BE;F;1 FO;M;2 @@ -5907,7 +5918,7 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, dropna=None, dialect=' frame = self.to_frame(fold, dropna) frame.to_csv(filepath, sep=sep, na_rep=na_rep, **kwargs) else: - series = self.to_series(dropna=dropna is not None) + series = self.to_series(value_name, dropna is not None) series.to_csv(filepath, sep=sep, na_rep=na_rep, header=True, **kwargs) def to_hdf(self, filepath, key, *args, **kwargs): @@ -5935,7 +5946,7 @@ def to_hdf(self, filepath, key, *args, **kwargs): self.to_frame().to_hdf(filepath, key, *args, **kwargs) def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file=False, clear_sheet=False, - header=True, transpose=False, wide=True, engine=None, *args, **kwargs): + header=True, transpose=False, wide=True, value_name='value', engine=None, *args, **kwargs): """ Writes array in the specified sheet of specified excel workbook. @@ -5964,6 +5975,9 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file Whether or not writing arrays in "wide" format. If True, arrays are exported with the last axis represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one value column. Defaults to True. + value_name : str, optional + Name of the column containing the values (last column) in the Excel sheet when `wide=False` (see above). + Defaults to 'value'. engine : 'xlwings' | 'openpyxl' | 'xlsxwriter' | 'xlwt' | None, optional Engine to use to make the output. If None (default), it will use 'xlwings' by default if the module is installed and relies on Pandas default writer otherwise. @@ -5985,7 +5999,7 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file if wide: pd_obj = self.to_frame(fold_last_axis_name=True) else: - pd_obj = self.to_series() + pd_obj = self.to_series(value_name) if engine is None: engine = 'xlwings' if xw is not None else None @@ -6021,8 +6035,8 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file options = dict(header=header, index=header, transpose=transpose) sheet[position].options(**options).value = pd_obj - # TODO: implement transpose via/in dump - # sheet[position] = self.dump(header=header, transpose=transpose) + # TODO: implement wide via/in dump + # sheet[position] = self.dump(header=header, wide=wide) if close: wb.save() wb.close() diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index b1e399377..beaf1e85b 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -3171,7 +3171,6 @@ def test_from_frame(self): assert la.axes.names == ['age', 'sex', 'time'] assert_array_equal(la[0, 'F', :], [3722, 3395, 3347]) - def test_to_csv(self): la = read_csv(inputpath('test5d.csv')) self.assertEqual(la.ndim, 5) @@ -3189,7 +3188,7 @@ def test_to_csv(self): # stacked data (one column containing all the values and another column listing the context of the value) la.to_csv(self.tmp_path('out.csv'), wide=False) - result = ['arr,age,sex,nat,time,0\n', + result = ['arr,age,sex,nat,time,value\n', '1,0,F,1,2007,3722\n', '1,0,F,1,2010,3395\n'] with open(self.tmp_path('out.csv')) as f: @@ -3222,7 +3221,7 @@ def test_to_excel_xlsxwriter(self): # stacked data (one column containing all the values and another column listing the context of the value) a1.to_excel(fpath, wide=False, engine='xlsxwriter') res = read_excel(fpath, engine='xlrd') - stacked_a1 = a1.reshape([a1.a, Axis([0])]) + stacked_a1 = a1.reshape([a1.a, Axis(['value'])]) assert_array_equal(res, stacked_a1) # 2D @@ -3282,7 +3281,7 @@ def test_to_excel_xlsxwriter(self): # stacked data (one column containing all the values and another column listing the context of the value) a1.to_excel(fpath, wide=False, engine='xlsxwriter') res = read_excel(fpath, engine='xlrd') - stacked_a1 = a1.reshape([a1.a, Axis([0])]) + stacked_a1 = a1.reshape([a1.a, Axis(['value'])]) assert_array_equal(res, stacked_a1) # 2D