Skip to content

Commit 7832481

Browse files
committed
fix #548 : renamed 'nb_index' arg of read_XXX + from_lists + from_string functions as 'nb_axes'
note: nb_axes = nb_index + 1
1 parent f5f7e65 commit 7832481

File tree

3 files changed

+85
-46
lines changed

3 files changed

+85
-46
lines changed

doc/source/changes/version_0_28.rst.inc

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,29 @@ Miscellaneous improvements
212212
a1 1 1
213213

214214

215+
* renamed argument `nb_index` of `read_csv`, `read_excel`, `read_sas`, `from_lists` and `from_string` functions
216+
as `nb_axes`. The relation between `nb_index` and `nb_axes` is given by `nb_axes = nb_index + 1`:
217+
218+
For a given file 'arr.csv' with content ::
219+
220+
a,b\c,c0,c1
221+
a0,b0,0,1
222+
a0,b1,2,3
223+
a1,b0,4,5
224+
a1,b1,6,7
225+
226+
previous code to read this array such as :
227+
228+
>>> # deprecated
229+
>>> arr = read_csv('arr.csv', nb_index=2)
230+
231+
must be updated as follow :
232+
233+
>>> arr = read_csv('arr.csv', nb_axes=3)
234+
235+
Closes :issue:`548`:
236+
237+
215238
Fixes
216239
-----
217240

larray/inout/array.py

Lines changed: 60 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from larray.core.axis import Axis
1111
from larray.core.array import LArray, ndtest
1212
from larray.core.group import _translate_sheet_name, _translate_key_hdf
13-
from larray.util.misc import basestring, skip_comment_cells, strip_rows, csv_open, StringIO, decode, unique
13+
from larray.util.misc import (basestring, skip_comment_cells, strip_rows, csv_open, StringIO, decode, unique,
14+
deprecate_kwarg)
1415

1516
try:
1617
import xlwings as xw
@@ -248,7 +249,8 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
248249
unfold_last_axis_name=unfold_last_axis_name, **kwargs)
249250

250251

251-
def read_csv(filepath_or_buffer, nb_index=None, index_col=None, sep=',', headersep=None, fill_value=np.nan,
252+
@deprecate_kwarg('nb_index', 'nb_axes', arg_converter=lambda x: x + 1)
253+
def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headersep=None, fill_value=np.nan,
252254
na=np.nan, sort_rows=False, sort_columns=False, dialect='larray', **kwargs):
253255
"""
254256
Reads csv file and returns an array with the contents.
@@ -267,10 +269,13 @@ def read_csv(filepath_or_buffer, nb_index=None, index_col=None, sep=',', headers
267269
----------
268270
filepath_or_buffer : str or any file-like object
269271
Path where the csv file has to be read or a file handle.
270-
nb_index : int, optional
271-
Number of leading index columns (ex. 4).
272+
nb_axes : int, optional
273+
Number of axes of output array. The first `nb_axes` - 1 columns and the header of the CSV file will be used
274+
to set the axes of the output array. If not specified, the number of axes is given by the position of the
275+
column header including the character `\` plus one. If no column header includes the character `\`, the array
276+
is assumed to have one axis. Defaults to None.
272277
index_col : list, optional
273-
List of columns for the index (ex. [0, 1, 2, 3]).
278+
Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]). Defaults to None (see nb_axes above).
274279
sep : str, optional
275280
Separator.
276281
headersep : str or None, optional
@@ -309,7 +314,7 @@ def read_csv(filepath_or_buffer, nb_index=None, index_col=None, sep=',', headers
309314
FO 3 2
310315
>>> fname = 'no_axis_name.csv'
311316
>>> a.to_csv(fname, dialect='classic')
312-
>>> read_csv(fname, nb_index=1)
317+
>>> read_csv(fname, nb_axes=2)
313318
nat\\{1} M F
314319
BE 0 1
315320
FO 2 3
@@ -328,18 +333,18 @@ def read_csv(filepath_or_buffer, nb_index=None, index_col=None, sep=',', headers
328333
line_stream = skip_comment_cells(strip_rows(reader))
329334
axes_names = next(line_stream)
330335

331-
if nb_index is not None or index_col is not None:
332-
raise ValueError("nb_index and index_col are not compatible with dialect='liam2'")
336+
if nb_axes is not None or index_col is not None:
337+
raise ValueError("nb_axes and index_col are not compatible with dialect='liam2'")
333338
if len(axes_names) > 1:
334-
nb_index = len(axes_names) - 1
339+
nb_axes = len(axes_names)
335340
# use the second data line for column headers (excludes comments and blank lines before counting)
336341
kwargs['header'] = 1
337342
kwargs['comment'] = '#'
338343

339-
if nb_index is not None and index_col is not None:
340-
raise ValueError("cannot specify both nb_index and index_col")
341-
elif nb_index is not None:
342-
index_col = list(range(nb_index))
344+
if nb_axes is not None and index_col is not None:
345+
raise ValueError("cannot specify both nb_axes and index_col")
346+
elif nb_axes is not None:
347+
index_col = list(range(nb_axes - 1))
343348
elif isinstance(index_col, int):
344349
index_col = [index_col]
345350

@@ -422,7 +427,8 @@ def read_hdf(filepath_or_buffer, key, fill_value=np.nan, na=np.nan, sort_rows=Fa
422427
return df_aslarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, parse_header=False)
423428

424429

425-
def read_excel(filepath, sheetname=0, nb_index=None, index_col=None, fill_value=np.nan, na=np.nan,
430+
@deprecate_kwarg('nb_index', 'nb_axes', arg_converter=lambda x: x + 1)
431+
def read_excel(filepath, sheetname=0, nb_axes=None, index_col=None, fill_value=np.nan, na=np.nan,
426432
sort_rows=False, sort_columns=False, engine=None, **kwargs):
427433
"""
428434
Reads excel file from sheet name and returns an LArray with the contents
@@ -434,10 +440,13 @@ def read_excel(filepath, sheetname=0, nb_index=None, index_col=None, fill_value=
434440
sheetname : str, Group or int, optional
435441
Name or index of the Excel sheet containing the array to be read.
436442
By default the array is read from the first sheet.
437-
nb_index : int, optional
438-
Number of leading index columns (ex. 4). Defaults to 1.
443+
nb_axes : int, optional
444+
Number of axes of output array. The first `nb_axes` - 1 columns and the header of the Excel sheet will be used
445+
to set the axes of the output array. If not specified, the number of axes is given by the position of the
446+
column header including the character `\` plus one. If no column header includes the character `\`, the array
447+
is assumed to have one axis. Defaults to None.
439448
index_col : list, optional
440-
List of columns for the index (ex. [0, 1, 2, 3]). Default to [0].
449+
Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]). Defaults to None (see nb_axes above).
441450
fill_value : scalar or LArray, optional
442451
Value used to fill cells corresponding to label combinations which are not present in the input.
443452
Defaults to NaN.
@@ -461,10 +470,10 @@ def read_excel(filepath, sheetname=0, nb_index=None, index_col=None, fill_value=
461470
if engine is None:
462471
engine = 'xlwings' if xw is not None else None
463472

464-
if nb_index is not None and index_col is not None:
465-
raise ValueError("cannot specify both nb_index and index_col")
466-
elif nb_index is not None:
467-
index_col = list(range(nb_index))
473+
if nb_axes is not None and index_col is not None:
474+
raise ValueError("cannot specify both nb_axes and index_col")
475+
elif nb_axes is not None:
476+
index_col = list(range(nb_axes - 1))
468477
elif isinstance(index_col, int):
469478
index_col = [index_col]
470479

@@ -482,43 +491,47 @@ def read_excel(filepath, sheetname=0, nb_index=None, index_col=None, fill_value=
482491
fill_value=fill_value)
483492

484493

485-
def read_sas(filepath, nb_index=None, index_col=None, fill_value=np.nan, na=np.nan, sort_rows=False, sort_columns=False,
494+
@deprecate_kwarg('nb_index', 'nb_axes', arg_converter=lambda x: x + 1)
495+
def read_sas(filepath, nb_axes=None, index_col=None, fill_value=np.nan, na=np.nan, sort_rows=False, sort_columns=False,
486496
**kwargs):
487497
"""
488498
Reads sas file and returns an LArray with the contents
489-
nb_index: number of leading index columns (e.g. 4)
499+
nb_axes: number of axes of the output array
490500
or
491-
index_col: list of columns for the index (e.g. [0, 1, 3])
501+
index_col: Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3])
492502
"""
493503
if not np.isnan(na):
494504
fill_value = na
495505
warnings.warn("read_sas `na` argument has been renamed to `fill_value`. Please use that instead.",
496506
FutureWarning, stacklevel=2)
497507

498-
if nb_index is not None and index_col is not None:
499-
raise ValueError("cannot specify both nb_index and index_col")
500-
elif nb_index is not None:
501-
index_col = list(range(nb_index))
508+
if nb_axes is not None and index_col is not None:
509+
raise ValueError("cannot specify both nb_axes and index_col")
510+
elif nb_axes is not None:
511+
index_col = list(range(nb_axes - 1))
502512
elif isinstance(index_col, int):
503513
index_col = [index_col]
504514

505515
df = pd.read_sas(filepath, index=index_col, **kwargs)
506516
return df_aslarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value)
507517

508518

509-
def from_lists(data, nb_index=None, index_col=None, fill_value=np.nan, sort_rows=False, sort_columns=False):
519+
@deprecate_kwarg('nb_index', 'nb_axes', arg_converter=lambda x: x + 1)
520+
def from_lists(data, nb_axes=None, index_col=None, fill_value=np.nan, sort_rows=False, sort_columns=False):
510521
"""
511522
initialize array from a list of lists (lines)
512523
513524
Parameters
514525
----------
515526
data : sequence (tuple, list, ...)
516527
Input data. All data is supposed to already have the correct type (e.g. strings are not parsed).
517-
nb_index : int, optional
518-
Number of leading index columns (ex. 4). Defaults to None, in which case it guesses the number of index columns
519-
by using the position of the first '\' in the first line.
528+
nb_axes : int, optional
529+
Number of axes of output array. The first `nb_axes` - 1 columns and the header will be used
530+
to set the axes of the output array. If not specified, the number of axes is given by the position of the
531+
column header including the character `\` plus one. If no column header includes the character `\`, the array
532+
is assumed to have one axis. Defaults to None.
520533
index_col : list, optional
521-
List of columns for the index (ex. [0, 1, 2, 3]). Defaults to None (see nb_index above).
534+
Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]). Defaults to None (see nb_axes above).
522535
fill_value : scalar or LArray, optional
523536
Value used to fill cells corresponding to label combinations which are not present in the input.
524537
Defaults to NaN.
@@ -556,7 +569,7 @@ def from_lists(data, nb_index=None, index_col=None, fill_value=np.nan, sort_rows
556569
>>> from_lists([['sex', 'nat', 1991, 1992, 1993],
557570
... [ 'M', 'BE', 1, 0, 0],
558571
... [ 'M', 'FO', 2, 0, 0],
559-
... [ 'F', 'BE', 0, 0, 1]], nb_index=2)
572+
... [ 'F', 'BE', 0, 0, 1]], nb_axes=3)
560573
sex nat\\{2} 1991 1992 1993
561574
M BE 1.0 0.0 0.0
562575
M FO 2.0 0.0 0.0
@@ -572,10 +585,10 @@ def from_lists(data, nb_index=None, index_col=None, fill_value=np.nan, sort_rows
572585
F BE 0 0 1
573586
F FO 42 42 42
574587
"""
575-
if nb_index is not None and index_col is not None:
576-
raise ValueError("cannot specify both nb_index and index_col")
577-
elif nb_index is not None:
578-
index_col = list(range(nb_index))
588+
if nb_axes is not None and index_col is not None:
589+
raise ValueError("cannot specify both nb_axes and index_col")
590+
elif nb_axes is not None:
591+
index_col = list(range(nb_axes - 1))
579592
elif isinstance(index_col, int):
580593
index_col = [index_col]
581594

@@ -587,18 +600,21 @@ def from_lists(data, nb_index=None, index_col=None, fill_value=np.nan, sort_rows
587600
fill_value=fill_value)
588601

589602

590-
def from_string(s, nb_index=None, index_col=None, sep=' ', **kwargs):
603+
@deprecate_kwarg('nb_index', 'nb_axes', arg_converter=lambda x: x + 1)
604+
def from_string(s, nb_axes=None, index_col=None, sep=' ', **kwargs):
591605
"""Create an array from a multi-line string.
592606
593607
Parameters
594608
----------
595609
s : str
596610
input string.
597-
nb_index : int, optional
598-
Number of leading index columns (ex. 4). Defaults to None, in which case it guesses the number of index columns
599-
by using the position of the first '\' in the first line.
611+
nb_axes : int, optional
612+
Number of axes of output array. The first `nb_axes` - 1 columns and the header will be used
613+
to set the axes of the output array. If not specified, the number of axes is given by the position of the
614+
column header including the character `\` plus one. If no column header includes the character `\`, the array
615+
is assumed to have one axis. Defaults to None.
600616
index_col : list, optional
601-
List of columns for the index (ex. [0, 1, 2, 3]). Defaults to None (see nb_index above).
617+
Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]). Defaults to None (see nb_axes above).
602618
sep : str
603619
delimiter used to split each line into cells.
604620
\**kwargs
@@ -654,4 +670,4 @@ def from_string(s, nb_index=None, index_col=None, sep=' ', **kwargs):
654670
BE 0 1
655671
FO 2 3
656672
"""
657-
return read_csv(StringIO(s), nb_index=nb_index, index_col=index_col, sep=sep, skipinitialspace=True, **kwargs)
673+
return read_csv(StringIO(s), nb_axes=nb_axes, index_col=index_col, sep=sep, skipinitialspace=True, **kwargs)

larray/tests/test_array.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,7 +2720,7 @@ def test_read_excel_pandas(self):
27202720
self.assertEqual(la.axes.names, ['time'])
27212721
assert_array_equal(la, [3722, 3395, 3347])
27222722

2723-
la = read_excel(inputpath('test.xlsx'), '2d', nb_index=1, engine='xlrd')
2723+
la = read_excel(inputpath('test.xlsx'), '2d', nb_axes=2, engine='xlrd')
27242724
self.assertEqual(la.ndim, 2)
27252725
self.assertEqual(la.shape, (5, 3))
27262726
self.assertEqual(la.axes.names, ['age', 'time'])
@@ -2744,7 +2744,7 @@ def test_read_excel_pandas(self):
27442744
self.assertEqual(la.axes.names, ['age', 'sex', 'time'])
27452745
assert_array_equal(la[0, 'F', :], [3722, 3395, 3347])
27462746

2747-
la = read_excel(inputpath('test.xlsx'), '5d', nb_index=4, engine='xlrd')
2747+
la = read_excel(inputpath('test.xlsx'), '5d', nb_axes=5, engine='xlrd')
27482748
self.assertEqual(la.ndim, 5)
27492749
self.assertEqual(la.shape, (2, 5, 2, 2, 3))
27502750
self.assertEqual(la.axes.names, ['arr', 'age', 'sex', 'nat', 'time'])

0 commit comments

Comments
 (0)