10
10
from larray .core .axis import Axis
11
11
from larray .core .array import LArray , ndtest
12
12
from larray .core .group import _translate_sheet_name , _translate_key_hdf
13
- from larray .util .misc import basestring , skip_comment_cells , strip_rows , csv_open , StringIO , decode , unique
13
+ from larray .util .misc import (basestring , skip_comment_cells , strip_rows , csv_open , StringIO , decode , unique ,
14
+ deprecate_kwarg )
14
15
15
16
try :
16
17
import xlwings as xw
@@ -248,7 +249,8 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
248
249
unfold_last_axis_name = unfold_last_axis_name , ** kwargs )
249
250
250
251
251
- def read_csv (filepath_or_buffer , nb_index = None , index_col = None , sep = ',' , headersep = None , fill_value = np .nan ,
252
+ @deprecate_kwarg ('nb_index' , 'nb_axes' , arg_converter = lambda x : x + 1 )
253
+ def read_csv (filepath_or_buffer , nb_axes = None , index_col = None , sep = ',' , headersep = None , fill_value = np .nan ,
252
254
na = np .nan , sort_rows = False , sort_columns = False , dialect = 'larray' , ** kwargs ):
253
255
"""
254
256
Reads csv file and returns an array with the contents.
@@ -267,10 +269,13 @@ def read_csv(filepath_or_buffer, nb_index=None, index_col=None, sep=',', headers
267
269
----------
268
270
filepath_or_buffer : str or any file-like object
269
271
Path where the csv file has to be read or a file handle.
270
- nb_index : int, optional
271
- Number of leading index columns (ex. 4).
272
+ nb_axes : int, optional
273
+ Number of axes of output array. The first `nb_axes` - 1 columns and the header of the CSV file will be used
274
+ to set the axes of the output array. If not specified, the number of axes is given by the position of the
275
+ column header including the character `\` plus one. If no column header includes the character `\`, the array
276
+ is assumed to have one axis. Defaults to None.
272
277
index_col : list, optional
273
- List of columns for the index (ex. [0, 1, 2, 3]).
278
+ Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]). Defaults to None (see nb_axes above ).
274
279
sep : str, optional
275
280
Separator.
276
281
headersep : str or None, optional
@@ -309,7 +314,7 @@ def read_csv(filepath_or_buffer, nb_index=None, index_col=None, sep=',', headers
309
314
FO 3 2
310
315
>>> fname = 'no_axis_name.csv'
311
316
>>> a.to_csv(fname, dialect='classic')
312
- >>> read_csv(fname, nb_index=1 )
317
+ >>> read_csv(fname, nb_axes=2 )
313
318
nat\\ {1} M F
314
319
BE 0 1
315
320
FO 2 3
@@ -328,18 +333,18 @@ def read_csv(filepath_or_buffer, nb_index=None, index_col=None, sep=',', headers
328
333
line_stream = skip_comment_cells (strip_rows (reader ))
329
334
axes_names = next (line_stream )
330
335
331
- if nb_index is not None or index_col is not None :
332
- raise ValueError ("nb_index and index_col are not compatible with dialect='liam2'" )
336
+ if nb_axes is not None or index_col is not None :
337
+ raise ValueError ("nb_axes and index_col are not compatible with dialect='liam2'" )
333
338
if len (axes_names ) > 1 :
334
- nb_index = len (axes_names ) - 1
339
+ nb_axes = len (axes_names )
335
340
# use the second data line for column headers (excludes comments and blank lines before counting)
336
341
kwargs ['header' ] = 1
337
342
kwargs ['comment' ] = '#'
338
343
339
- if nb_index is not None and index_col is not None :
340
- raise ValueError ("cannot specify both nb_index and index_col" )
341
- elif nb_index is not None :
342
- index_col = list (range (nb_index ))
344
+ if nb_axes is not None and index_col is not None :
345
+ raise ValueError ("cannot specify both nb_axes and index_col" )
346
+ elif nb_axes is not None :
347
+ index_col = list (range (nb_axes - 1 ))
343
348
elif isinstance (index_col , int ):
344
349
index_col = [index_col ]
345
350
@@ -422,7 +427,8 @@ def read_hdf(filepath_or_buffer, key, fill_value=np.nan, na=np.nan, sort_rows=Fa
422
427
return df_aslarray (df , sort_rows = sort_rows , sort_columns = sort_columns , fill_value = fill_value , parse_header = False )
423
428
424
429
425
- def read_excel (filepath , sheetname = 0 , nb_index = None , index_col = None , fill_value = np .nan , na = np .nan ,
430
+ @deprecate_kwarg ('nb_index' , 'nb_axes' , arg_converter = lambda x : x + 1 )
431
+ def read_excel (filepath , sheetname = 0 , nb_axes = None , index_col = None , fill_value = np .nan , na = np .nan ,
426
432
sort_rows = False , sort_columns = False , engine = None , ** kwargs ):
427
433
"""
428
434
Reads excel file from sheet name and returns an LArray with the contents
@@ -434,10 +440,13 @@ def read_excel(filepath, sheetname=0, nb_index=None, index_col=None, fill_value=
434
440
sheetname : str, Group or int, optional
435
441
Name or index of the Excel sheet containing the array to be read.
436
442
By default the array is read from the first sheet.
437
- nb_index : int, optional
438
- Number of leading index columns (ex. 4). Defaults to 1.
443
+ nb_axes : int, optional
444
+ Number of axes of output array. The first `nb_axes` - 1 columns and the header of the Excel sheet will be used
445
+ to set the axes of the output array. If not specified, the number of axes is given by the position of the
446
+ column header including the character `\` plus one. If no column header includes the character `\`, the array
447
+ is assumed to have one axis. Defaults to None.
439
448
index_col : list, optional
440
- List of columns for the index (ex. [0, 1, 2, 3]). Default to [0] .
449
+ Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]). Defaults to None (see nb_axes above) .
441
450
fill_value : scalar or LArray, optional
442
451
Value used to fill cells corresponding to label combinations which are not present in the input.
443
452
Defaults to NaN.
@@ -461,10 +470,10 @@ def read_excel(filepath, sheetname=0, nb_index=None, index_col=None, fill_value=
461
470
if engine is None :
462
471
engine = 'xlwings' if xw is not None else None
463
472
464
- if nb_index is not None and index_col is not None :
465
- raise ValueError ("cannot specify both nb_index and index_col" )
466
- elif nb_index is not None :
467
- index_col = list (range (nb_index ))
473
+ if nb_axes is not None and index_col is not None :
474
+ raise ValueError ("cannot specify both nb_axes and index_col" )
475
+ elif nb_axes is not None :
476
+ index_col = list (range (nb_axes - 1 ))
468
477
elif isinstance (index_col , int ):
469
478
index_col = [index_col ]
470
479
@@ -482,43 +491,47 @@ def read_excel(filepath, sheetname=0, nb_index=None, index_col=None, fill_value=
482
491
fill_value = fill_value )
483
492
484
493
485
- def read_sas (filepath , nb_index = None , index_col = None , fill_value = np .nan , na = np .nan , sort_rows = False , sort_columns = False ,
494
+ @deprecate_kwarg ('nb_index' , 'nb_axes' , arg_converter = lambda x : x + 1 )
495
+ def read_sas (filepath , nb_axes = None , index_col = None , fill_value = np .nan , na = np .nan , sort_rows = False , sort_columns = False ,
486
496
** kwargs ):
487
497
"""
488
498
Reads sas file and returns an LArray with the contents
489
- nb_index : number of leading index columns (e.g. 4)
499
+ nb_axes : number of axes of the output array
490
500
or
491
- index_col: list of columns for the index (e.g. [0, 1, 3])
501
+ index_col: Positions of columns for the n-1 first axes (ex. [0, 1, 2 , 3])
492
502
"""
493
503
if not np .isnan (na ):
494
504
fill_value = na
495
505
warnings .warn ("read_sas `na` argument has been renamed to `fill_value`. Please use that instead." ,
496
506
FutureWarning , stacklevel = 2 )
497
507
498
- if nb_index is not None and index_col is not None :
499
- raise ValueError ("cannot specify both nb_index and index_col" )
500
- elif nb_index is not None :
501
- index_col = list (range (nb_index ))
508
+ if nb_axes is not None and index_col is not None :
509
+ raise ValueError ("cannot specify both nb_axes and index_col" )
510
+ elif nb_axes is not None :
511
+ index_col = list (range (nb_axes - 1 ))
502
512
elif isinstance (index_col , int ):
503
513
index_col = [index_col ]
504
514
505
515
df = pd .read_sas (filepath , index = index_col , ** kwargs )
506
516
return df_aslarray (df , sort_rows = sort_rows , sort_columns = sort_columns , fill_value = fill_value )
507
517
508
518
509
- def from_lists (data , nb_index = None , index_col = None , fill_value = np .nan , sort_rows = False , sort_columns = False ):
519
+ @deprecate_kwarg ('nb_index' , 'nb_axes' , arg_converter = lambda x : x + 1 )
520
+ def from_lists (data , nb_axes = None , index_col = None , fill_value = np .nan , sort_rows = False , sort_columns = False ):
510
521
"""
511
522
initialize array from a list of lists (lines)
512
523
513
524
Parameters
514
525
----------
515
526
data : sequence (tuple, list, ...)
516
527
Input data. All data is supposed to already have the correct type (e.g. strings are not parsed).
517
- nb_index : int, optional
518
- Number of leading index columns (ex. 4). Defaults to None, in which case it guesses the number of index columns
519
- by using the position of the first '\' in the first line.
528
+ nb_axes : int, optional
529
+ Number of axes of output array. The first `nb_axes` - 1 columns and the header will be used
530
+ to set the axes of the output array. If not specified, the number of axes is given by the position of the
531
+ column header including the character `\` plus one. If no column header includes the character `\`, the array
532
+ is assumed to have one axis. Defaults to None.
520
533
index_col : list, optional
521
- List of columns for the index (ex. [0, 1, 2, 3]). Defaults to None (see nb_index above).
534
+ Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]). Defaults to None (see nb_axes above).
522
535
fill_value : scalar or LArray, optional
523
536
Value used to fill cells corresponding to label combinations which are not present in the input.
524
537
Defaults to NaN.
@@ -556,7 +569,7 @@ def from_lists(data, nb_index=None, index_col=None, fill_value=np.nan, sort_rows
556
569
>>> from_lists([['sex', 'nat', 1991, 1992, 1993],
557
570
... [ 'M', 'BE', 1, 0, 0],
558
571
... [ 'M', 'FO', 2, 0, 0],
559
- ... [ 'F', 'BE', 0, 0, 1]], nb_index=2 )
572
+ ... [ 'F', 'BE', 0, 0, 1]], nb_axes=3 )
560
573
sex nat\\ {2} 1991 1992 1993
561
574
M BE 1.0 0.0 0.0
562
575
M FO 2.0 0.0 0.0
@@ -572,10 +585,10 @@ def from_lists(data, nb_index=None, index_col=None, fill_value=np.nan, sort_rows
572
585
F BE 0 0 1
573
586
F FO 42 42 42
574
587
"""
575
- if nb_index is not None and index_col is not None :
576
- raise ValueError ("cannot specify both nb_index and index_col" )
577
- elif nb_index is not None :
578
- index_col = list (range (nb_index ))
588
+ if nb_axes is not None and index_col is not None :
589
+ raise ValueError ("cannot specify both nb_axes and index_col" )
590
+ elif nb_axes is not None :
591
+ index_col = list (range (nb_axes - 1 ))
579
592
elif isinstance (index_col , int ):
580
593
index_col = [index_col ]
581
594
@@ -587,18 +600,21 @@ def from_lists(data, nb_index=None, index_col=None, fill_value=np.nan, sort_rows
587
600
fill_value = fill_value )
588
601
589
602
590
- def from_string (s , nb_index = None , index_col = None , sep = ' ' , ** kwargs ):
603
+ @deprecate_kwarg ('nb_index' , 'nb_axes' , arg_converter = lambda x : x + 1 )
604
+ def from_string (s , nb_axes = None , index_col = None , sep = ' ' , ** kwargs ):
591
605
"""Create an array from a multi-line string.
592
606
593
607
Parameters
594
608
----------
595
609
s : str
596
610
input string.
597
- nb_index : int, optional
598
- Number of leading index columns (ex. 4). Defaults to None, in which case it guesses the number of index columns
599
- by using the position of the first '\' in the first line.
611
+ nb_axes : int, optional
612
+ Number of axes of output array. The first `nb_axes` - 1 columns and the header will be used
613
+ to set the axes of the output array. If not specified, the number of axes is given by the position of the
614
+ column header including the character `\` plus one. If no column header includes the character `\`, the array
615
+ is assumed to have one axis. Defaults to None.
600
616
index_col : list, optional
601
- List of columns for the index (ex. [0, 1, 2, 3]). Defaults to None (see nb_index above).
617
+ Positions of columns for the n-1 first axes (ex. [0, 1, 2, 3]). Defaults to None (see nb_axes above).
602
618
sep : str
603
619
delimiter used to split each line into cells.
604
620
\**kwargs
@@ -654,4 +670,4 @@ def from_string(s, nb_index=None, index_col=None, sep=' ', **kwargs):
654
670
BE 0 1
655
671
FO 2 3
656
672
"""
657
- return read_csv (StringIO (s ), nb_index = nb_index , index_col = index_col , sep = sep , skipinitialspace = True , ** kwargs )
673
+ return read_csv (StringIO (s ), nb_axes = nb_axes , index_col = index_col , sep = sep , skipinitialspace = True , ** kwargs )
0 commit comments