Skip to content

Commit 4bf6aaa

Browse files
committed
fix #574 : added argument 'wide' to read_csv, read_excel, from_lists and from_strings functions + and updated df_aslarray so as to be able to load arrays stored in narrow format
1 parent 474c0f8 commit 4bf6aaa

10 files changed

+305
-59
lines changed

doc/source/changes/version_0_28.rst.inc

+11
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,17 @@ Miscellaneous improvements
289289
Argument `transpose` has a different purpose than `wide` and is mainly useful to allow multiple axes as header
290290
when exporting arrays with more than 2 dimensions. Closes :issue:`575` and :issue:`371`.
291291

292+
* added argument `wide` to `read_csv` and `read_excel` functions. If False, the array to be loaded is assumed to
293+
be stored in "narrow" format:
294+
295+
>>> # assuming the array was saved using command: arr.to_excel('my_file.xlsx', wide=False)
296+
>>> read_excel('my_file.xlsx', wide=False)
297+
a\b b0 b1 b2
298+
a0 0 1 2
299+
a1 3 4 5
300+
301+
Closes :issue:`574`.
302+
292303
* added argument `name` to `to_series` method allowing to set a name to the Pandas Series returned by the method.
293304

294305
* added argument `value_name` to `to_csv` and `to_excel` allowing to change the default name ('value') to

larray/inout/array.py

+148-52
Large diffs are not rendered by default.

larray/inout/excel.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -418,9 +418,9 @@ def __setattr__(self, key, value):
418418
setattr(self.xw_sheet, key, value)
419419

420420
def load(self, header=True, convert_float=True, nb_index=None, index_col=None, fill_value=np.nan,
421-
sort_rows=False, sort_columns=False):
421+
sort_rows=False, sort_columns=False, wide=True):
422422
return self[:].load(header=header, convert_float=convert_float, nb_index=nb_index, index_col=index_col,
423-
sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value)
423+
fill_value=fill_value, sort_rows=sort_rows, sort_columns=sort_columns, wide=wide)
424424

425425
# TODO: generalize to more than 2 dimensions or scrap it
426426
def array(self, data, row_labels=None, column_labels=None, names=None):
@@ -547,15 +547,15 @@ def __str__(self):
547547
__repr__ = __str__
548548

549549
def load(self, header=True, convert_float=True, nb_index=None, index_col=None, fill_value=np.nan,
550-
sort_rows=False, sort_columns=False):
550+
sort_rows=False, sort_columns=False, wide=True):
551551
if not self.ndim:
552552
return LArray([])
553553

554554
list_data = self._converted_value(convert_float=convert_float)
555555

556556
if header:
557557
return from_lists(list_data, nb_index=nb_index, index_col=index_col, fill_value=fill_value,
558-
sort_rows=sort_rows, sort_columns=sort_columns)
558+
sort_rows=sort_rows, sort_columns=sort_columns, wide=wide)
559559
else:
560560
return LArray(list_data)
561561

larray/tests/data/test1d_narrow.csv

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
a,value
2+
a0,0
3+
a1,1
4+
a2,2

larray/tests/data/test2d_narrow.csv

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
a,b,value
2+
1,b0,0
3+
1,b1,1
4+
2,b0,2
5+
2,b1,3
6+
3,b0,4
7+
3,b1,5

larray/tests/data/test3d_narrow.csv

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
a,b,c,value
2+
1,b0,c0,0
3+
1,b0,c1,1
4+
1,b0,c2,2
5+
1,b1,c0,3
6+
1,b1,c1,4
7+
1,b1,c2,5
8+
2,b0,c0,6
9+
2,b0,c1,7
10+
2,b0,c2,8
11+
2,b1,c0,9
12+
2,b1,c1,10
13+
2,b1,c2,11
14+
3,b0,c0,12
15+
3,b0,c1,13
16+
3,b0,c2,14
17+
3,b1,c0,15
18+
3,b1,c1,16
19+
3,b1,c2,17

larray/tests/data/test_narrow.xlsx

12.1 KB
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
a,b,c,value
2+
1,b0,c0,0
3+
1,b0,c1,1
4+
1,b0,c2,2
5+
1,b1,c0,3
6+
1,b1,c1,4
7+
1,b1,c2,5
8+
2,b1,c0,9
9+
2,b1,c2,11
10+
3,b0,c0,12
11+
3,b0,c1,13
12+
3,b0,c2,14
+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
a,b,c,value
2+
3,b1,c2,0
3+
3,b1,c1,1
4+
3,b1,c0,2
5+
3,b0,c2,3
6+
3,b0,c1,4
7+
3,b0,c0,5
8+
2,b1,c2,6
9+
2,b1,c1,7
10+
2,b1,c0,8
11+
2,b0,c2,9
12+
2,b0,c1,10
13+
2,b0,c0,11
14+
1,b1,c2,12
15+
1,b1,c1,13
16+
1,b1,c0,14
17+
1,b0,c2,15
18+
1,b0,c1,16
19+
1,b0,c0,17

larray/tests/test_array.py

+81-3
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ def setUp(self):
9696
self.io_missing_values = ndtest("a=1..3; b=b0,b1; c=c0..c2", dtype=float)
9797
self.io_missing_values[2, 'b0'] = np.nan
9898
self.io_missing_values[3, 'b1'] = np.nan
99+
self.io_narrow_missing_values = self.io_missing_values.copy()
100+
self.io_narrow_missing_values[2, 'b1', 'c1'] = np.nan
99101

100102
@pytest.fixture(autouse=True)
101103
def setup(self, tmpdir):
@@ -2637,6 +2639,26 @@ def test_read_csv(self):
26372639
res = read_csv(StringIO('a,a2,a0,a1\n,2,0,1\n'), sort_columns=True)
26382640
assert_array_equal(res, ndtest(3))
26392641

2642+
#################
2643+
# narrow format #
2644+
#################
2645+
res = read_csv(inputpath('test1d_narrow.csv'), wide=False)
2646+
assert_array_equal(res, self.io_1d)
2647+
2648+
res = read_csv(inputpath('test2d_narrow.csv'), wide=False)
2649+
assert_array_equal(res, self.io_2d)
2650+
2651+
res = read_csv(inputpath('test3d_narrow.csv'), wide=False)
2652+
assert_array_equal(res, self.io_3d)
2653+
2654+
# missing values
2655+
res = read_csv(inputpath('testmissing_values_narrow.csv'), wide=False)
2656+
assert_array_nan_equal(res, self.io_narrow_missing_values)
2657+
2658+
# unsorted values
2659+
res = read_csv(inputpath('testunsorted_narrow.csv'), wide=False)
2660+
assert_array_equal(res, self.io_unsorted)
2661+
26402662
def test_read_eurostat(self):
26412663
la = read_eurostat(inputpath('test5d_eurostat.csv'))
26422664
self.assertEqual(la.ndim, 5)
@@ -2667,19 +2689,48 @@ def test_read_excel_xlwings(self):
26672689
axis = Axis('dim=1d,2d,3d,5d')
26682690

26692691
arr = read_excel(inputpath('test.xlsx'), axis['1d'])
2670-
assert_array_equal(arr, ndtest(3))
2692+
assert_array_equal(arr, self.io_1d)
26712693

26722694
# missing rows + fill_value argument
26732695
arr = read_excel(inputpath('test.xlsx'), 'missing_values', fill_value=42)
26742696
expected = self.io_missing_values.copy()
26752697
expected[isnan(expected)] = 42
26762698
assert_array_equal(arr, expected)
26772699

2678-
# invalid keyword argument
2700+
#################
2701+
# narrow format #
2702+
#################
2703+
arr = read_excel(inputpath('test_narrow.xlsx'), '1d', wide=False)
2704+
assert_array_equal(arr, self.io_1d)
2705+
2706+
arr = read_excel(inputpath('test_narrow.xlsx'), '2d', wide=False)
2707+
assert_array_equal(arr, self.io_2d)
2708+
2709+
arr = read_excel(inputpath('test_narrow.xlsx'), '3d', wide=False)
2710+
assert_array_equal(arr, self.io_3d)
2711+
2712+
# missing rows + fill_value argument
2713+
arr = read_excel(inputpath('test_narrow.xlsx'), 'missing_values', fill_value=42, wide=False)
2714+
expected = self.io_narrow_missing_values.copy()
2715+
expected[isnan(expected)] = 42
2716+
assert_array_equal(arr, expected)
2717+
2718+
# unsorted values
2719+
arr = read_excel(inputpath('test_narrow.xlsx'), 'unsorted', wide=False)
2720+
assert_array_equal(arr, self.io_unsorted)
2721+
2722+
##############################
2723+
# invalid keyword argument #
2724+
##############################
2725+
26792726
with self.assertRaisesRegexp(TypeError, "'dtype' is an invalid keyword argument for this function when using "
26802727
"the xlwings backend"):
26812728
read_excel(inputpath('test.xlsx'), engine='xlwings', dtype=float)
26822729

2730+
#################
2731+
# blank cells #
2732+
#################
2733+
26832734
# Excel sheet with blank cells on right/bottom border of the array to read
26842735
fpath = inputpath('test_blank_cells.xlsx')
26852736
good = read_excel(fpath, 'good')
@@ -2722,14 +2773,41 @@ def test_read_excel_pandas(self):
27222773
axis = Axis('dim=1d,2d,3d,5d')
27232774

27242775
arr = read_excel(inputpath('test.xlsx'), axis['1d'], engine='xlrd')
2725-
assert_array_equal(arr, ndtest(3))
2776+
assert_array_equal(arr, self.io_1d)
27262777

27272778
# missing rows + fill_value argument
27282779
arr = read_excel(inputpath('test.xlsx'), 'missing_values', fill_value=42, engine='xlrd')
27292780
expected = self.io_missing_values.copy()
27302781
expected[isnan(expected)] = 42
27312782
assert_array_equal(arr, expected)
27322783

2784+
#################
2785+
# narrow format #
2786+
#################
2787+
arr = read_excel(inputpath('test_narrow.xlsx'), '1d', wide=False, engine='xlrd')
2788+
assert_array_equal(arr, self.io_1d)
2789+
2790+
arr = read_excel(inputpath('test_narrow.xlsx'), '2d', wide=False, engine='xlrd')
2791+
assert_array_equal(arr, self.io_2d)
2792+
2793+
arr = read_excel(inputpath('test_narrow.xlsx'), '3d', wide=False, engine='xlrd')
2794+
assert_array_equal(arr, self.io_3d)
2795+
2796+
# missing rows + fill_value argument
2797+
arr = read_excel(inputpath('test_narrow.xlsx'), 'missing_values',
2798+
fill_value=42, wide=False, engine='xlrd')
2799+
expected = self.io_narrow_missing_values
2800+
expected[isnan(expected)] = 42
2801+
assert_array_equal(arr, expected)
2802+
2803+
# unsorted values
2804+
arr = read_excel(inputpath('test_narrow.xlsx'), 'unsorted', wide=False, engine='xlrd')
2805+
assert_array_equal(arr, self.io_unsorted)
2806+
2807+
#################
2808+
# blank cells #
2809+
#################
2810+
27332811
# Excel sheet with blank cells on right/bottom border of the array to read
27342812
fpath = inputpath('test_blank_cells.xlsx')
27352813
good1 = read_excel(fpath, 'good', engine='xlrd')

0 commit comments

Comments
 (0)