Skip to content

replace arg 'transpose' by 'wide' in to_csv and to_excel (issues 371 + 549 + 575) #576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion doc/source/changes/version_0_28.rst.inc
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,6 @@ Miscellaneous improvements
a0 0 0
a1 1 1


* renamed argument `nb_index` of `read_csv`, `read_excel`, `read_sas`, `from_lists` and `from_string` functions
as `nb_axes`. The relation between `nb_index` and `nb_axes` is given by `nb_axes = nb_index + 1`:

Expand All @@ -234,6 +233,56 @@ Miscellaneous improvements

Closes :issue:`548`:

* renamed argument `transpose` by `wide` in `to_csv` method.

* added argument `wide` in `to_excel` method. When argument `wide` is set to False, the array is exported
in "narrow" format, i.e. one column per axis plus one value column:

>>> arr = ndtest((2, 3))
>>> arr
a\b b0 b1 b2
a0 0 1 2
a1 3 4 5

Default behavior (`wide=True`):

>>> arr.to_excel('my_file.xlsx')

a\b b0 b1 b2
a0 0 1 2
a1 3 4 5

With `wide=False`:

>>> arr.to_excel('my_file.xlsx', wide=False)

a b value
a0 b0 0
a0 b1 1
a0 b2 2
a1 b0 3
a1 b1 4
a1 b2 5

Argument `transpose` has a different purpose than `wide` and is mainly useful to allow multiple axes as header
when exporting arrays with more than 2 dimensions. Closes :issue:`575` and :issue:`371`.

* added argument `name` to `to_series` method allowing to set a name to the Pandas Series returned by the method.

* added argument `value_name` to `to_csv` and `to_excel` allowing to change the default name ('value') to
the column containg the values when the argument `wide` is set to False:

>>> arr.to_csv('my_file.csv', wide=False, value_name='data')
a,b,data
a0,b0,0
a0,b1,1
a0,b2,2
a1,b0,3
a1,b1,4
a1,b2,5

Closes :issue:`549`.


Fixes
-----
Expand Down
105 changes: 81 additions & 24 deletions larray/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,12 +975,14 @@ def to_frame(self, fold_last_axis_name=False, dropna=None):
return df
df = property(to_frame)

def to_series(self, dropna=False):
def to_series(self, name=None, dropna=False):
"""
Converts LArray into Pandas Series.

Parameters
----------
name : str, optional
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe add a changelog note about this?

Name of the series. Defaults to None.
dropna : bool, optional.
False by default.

Expand All @@ -991,6 +993,10 @@ def to_series(self, dropna=False):
Examples
--------
>>> arr = ndtest((2, 3), dtype=float)
>>> arr
a\\b b0 b1 b2
a0 0.0 1.0 2.0
a1 3.0 4.0 5.0
>>> arr.to_series() # doctest: +NORMALIZE_WHITESPACE
a b
a0 b0 0.0
Expand All @@ -1000,9 +1006,36 @@ def to_series(self, dropna=False):
b1 4.0
b2 5.0
dtype: float64

Set a name

>>> arr.to_series('my_name') # doctest: +NORMALIZE_WHITESPACE
a b
a0 b0 0.0
b1 1.0
b2 2.0
a1 b0 3.0
b1 4.0
b2 5.0
Name: my_name, dtype: float64

Drop nan values

>>> arr['b1'] = np.nan
>>> arr
a\\b b0 b1 b2
a0 0.0 nan 2.0
a1 3.0 nan 5.0
>>> arr.to_series(dropna=True) # doctest: +NORMALIZE_WHITESPACE
a b
a0 b0 0.0
b2 2.0
a1 b0 3.0
b2 5.0
dtype: float64
"""
index = pd.MultiIndex.from_product([axis.labels for axis in self.axes], names=self.axes.names)
series = pd.Series(np.asarray(self).reshape(self.size), index)
series = pd.Series(np.asarray(self).reshape(self.size), index, name=name)
if dropna:
series.dropna(inplace=True)
return series
Expand Down Expand Up @@ -5816,23 +5849,28 @@ def clip(self, a_min, a_max, out=None):
from larray.core.ufuncs import clip
return clip(self, a_min, a_max, out)

def to_csv(self, filepath, sep=',', na_rep='', transpose=True, dropna=None, dialect='default', **kwargs):
@deprecate_kwarg('transpose', 'wide')
def to_csv(self, filepath, sep=',', na_rep='', wide=True, value_name='value', dropna=None, dialect='default', **kwargs):
"""
Writes array to a csv file.

Parameters
----------
filepath : str
path where the csv file has to be written.
sep : str
seperator for the csv file.
na_rep : str
replace NA values with na_rep.
transpose : boolean
transpose = True => transpose over last axis.
transpose = False => no transpose.
dialect : 'default' | 'classic'
Whether or not to write the last axis name (using '\' )
sep : str, optional
separator for the csv file. Defaults to `,`.
na_rep : str, optional
replace NA values with na_rep. Defaults to ''.
wide : boolean, optional
Whether or not writing arrays in "wide" format. If True, arrays are exported with the last axis
represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one
value column. Defaults to True.
value_name : str, optional
Name of the column containing the values (last column) in the csv file when `wide=False` (see above).
Defaults to 'value'.
dialect : 'default' | 'classic', optional
Whether or not to write the last axis name (using '\' ). Defaults to 'default'.
dropna : None, 'all', 'any' or True, optional
Drop lines if 'all' its values are NA, if 'any' value is NA or do not drop any line (default).
True is equivalent to 'all'.
Expand All @@ -5852,10 +5890,18 @@ def to_csv(self, filepath, sep=',', na_rep='', transpose=True, dropna=None, dial
nat\\sex,M,F
BE,0,1
FO,2,3
>>> a.to_csv(fname, sep=';', transpose=False)
>>> a.to_csv(fname, sep=';', wide=False)
>>> with open(fname) as f:
... print(f.read().strip())
nat;sex;0
nat;sex;value
BE;M;0
BE;F;1
FO;M;2
FO;F;3
>>> a.to_csv(fname, sep=';', wide=False, value_name='population')
>>> with open(fname) as f:
... print(f.read().strip())
nat;sex;population
BE;M;0
BE;F;1
FO;M;2
Expand All @@ -5868,11 +5914,11 @@ def to_csv(self, filepath, sep=',', na_rep='', transpose=True, dropna=None, dial
FO,2,3
"""
fold = dialect == 'default'
if transpose:
if wide:
frame = self.to_frame(fold, dropna)
frame.to_csv(filepath, sep=sep, na_rep=na_rep, **kwargs)
else:
series = self.to_series(dropna is not None)
series = self.to_series(value_name, dropna is not None)
series.to_csv(filepath, sep=sep, na_rep=na_rep, header=True, **kwargs)

def to_hdf(self, filepath, key, *args, **kwargs):
Expand Down Expand Up @@ -5900,7 +5946,7 @@ def to_hdf(self, filepath, key, *args, **kwargs):
self.to_frame().to_hdf(filepath, key, *args, **kwargs)

def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file=False, clear_sheet=False,
header=True, transpose=False, engine=None, *args, **kwargs):
header=True, transpose=False, wide=True, value_name='value', engine=None, *args, **kwargs):
"""
Writes array in the specified sheet of specified excel workbook.

Expand All @@ -5923,8 +5969,15 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file
header : bool, optional
Whether or not to write a header (axes names and labels). Defaults to True.
transpose : bool, optional
Whether or not to transpose the resulting array. This can be used, for example, for writing one dimensional
arrays vertically. Defaults to False.
Whether or not to transpose the array transpose over last axis.
This is equivalent to paste with option transpose in Excel. Defaults to False.
wide : boolean, optional
Whether or not writing arrays in "wide" format. If True, arrays are exported with the last axis
represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one
value column. Defaults to True.
value_name : str, optional
Name of the column containing the values (last column) in the Excel sheet when `wide=False` (see above).
Defaults to 'value'.
engine : 'xlwings' | 'openpyxl' | 'xlsxwriter' | 'xlwt' | None, optional
Engine to use to make the output. If None (default), it will use 'xlwings' by default if the module is
installed and relies on Pandas default writer otherwise.
Expand All @@ -5943,7 +5996,11 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file
"""
sheet_name = _translate_sheet_name(sheet_name)

df = self.to_frame(fold_last_axis_name=True)
if wide:
pd_obj = self.to_frame(fold_last_axis_name=True)
else:
pd_obj = self.to_series(value_name)

if engine is None:
engine = 'xlwings' if xw is not None else None

Expand Down Expand Up @@ -5977,9 +6034,9 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file
sheet = wb.sheets.add(sheet_name, after=wb.sheets[-1])

options = dict(header=header, index=header, transpose=transpose)
sheet[position].options(**options).value = df
# TODO: implement transpose via/in dump
# sheet[position] = self.dump(header=header, transpose=transpose)
sheet[position].options(**options).value = pd_obj
# TODO: implement wide via/in dump
# sheet[position] = self.dump(header=header, wide=wide)
if close:
wb.save()
wb.close()
Expand All @@ -5988,7 +6045,7 @@ def to_excel(self, filepath=None, sheet_name=None, position='A1', overwrite_file
sheet_name = 'Sheet1'
# TODO: implement position in this case
# startrow, startcol
df.to_excel(filepath, sheet_name, *args, engine=engine, **kwargs)
pd_obj.to_excel(filepath, sheet_name, *args, engine=engine, **kwargs)

def to_clipboard(self, *args, **kwargs):
"""Sends the content of the array to clipboard.
Expand Down
26 changes: 23 additions & 3 deletions larray/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3171,7 +3171,6 @@ def test_from_frame(self):
assert la.axes.names == ['age', 'sex', 'time']
assert_array_equal(la[0, 'F', :], [3722, 3395, 3347])


def test_to_csv(self):
la = read_csv(inputpath('test5d.csv'))
self.assertEqual(la.ndim, 5)
Expand All @@ -3187,8 +3186,9 @@ def test_to_csv(self):
with open(self.tmp_path('out.csv')) as f:
self.assertEqual(f.readlines()[:3], result)

la.to_csv(self.tmp_path('out.csv'), transpose=False)
result = ['arr,age,sex,nat,time,0\n',
# stacked data (one column containing all the values and another column listing the context of the value)
la.to_csv(self.tmp_path('out.csv'), wide=False)
result = ['arr,age,sex,nat,time,value\n',
'1,0,F,1,2007,3722\n',
'1,0,F,1,2010,3395\n']
with open(self.tmp_path('out.csv')) as f:
Expand Down Expand Up @@ -3217,6 +3217,13 @@ def test_to_excel_xlsxwriter(self):
res = read_excel(fpath, engine='xlrd')
assert_array_equal(res, a1)

# fpath/Sheet1/A1
# stacked data (one column containing all the values and another column listing the context of the value)
a1.to_excel(fpath, wide=False, engine='xlsxwriter')
res = read_excel(fpath, engine='xlrd')
stacked_a1 = a1.reshape([a1.a, Axis(['value'])])
assert_array_equal(res, stacked_a1)

# 2D
a2 = ndtest((2, 3))

Expand Down Expand Up @@ -3270,6 +3277,13 @@ def test_to_excel_xlsxwriter(self):
res = read_excel(fpath, engine='xlrd')
assert_array_equal(res, a1)

# fpath/Sheet1/A1
# stacked data (one column containing all the values and another column listing the context of the value)
a1.to_excel(fpath, wide=False, engine='xlsxwriter')
res = read_excel(fpath, engine='xlrd')
stacked_a1 = a1.reshape([a1.a, Axis(['value'])])
assert_array_equal(res, stacked_a1)

# 2D
a2 = ndtest((2, 3))

Expand Down Expand Up @@ -3352,6 +3366,12 @@ def test_to_excel_xlwings(self):
res = read_excel(fpath, engine='xlrd')
assert_array_equal(res, a1)

# fpath/Sheet1/A1
# stacked data (one column containing all the values and another column listing the context of the value)
a1.to_excel(fpath, wide=False, engine='xlwings')
res = read_excel(fpath, engine='xlrd')
assert_array_equal(res, a1)

# 2D
a2 = ndtest((2, 3))

Expand Down