1
1
import warnings
2
+ import sys
3
+ import os
2
4
3
5
import numpy as np
4
6
import pandas as pd
5
7
try :
6
8
import xlwings as xw
7
9
except ImportError :
8
10
xw = None
11
+ try :
12
+ import xlsxwriter
13
+ except ImportError :
14
+ xlsxwriter = None
9
15
10
16
from larray .core .array import Array , asarray
11
17
from larray .core .constants import nan
22
28
__all__ = ['read_excel' ]
23
29
24
30
25
- # We use "# doctest: +SKIP" for all tests because they work only if xlrd (an *optional* dependency) is installed
26
31
@deprecate_kwarg ('nb_index' , 'nb_axes' , arg_converter = lambda x : x + 1 )
27
32
@deprecate_kwarg ('sheetname' , 'sheet' )
33
+ # We use "# doctest: +SKIP" for all tests because they work only if openpyxl (an *optional* dependency) is installed
28
34
def read_excel (filepath , sheet = 0 , nb_axes = None , index_col = None , fill_value = nan , na = nan ,
29
35
sort_rows = False , sort_columns = False , wide = True , engine = None , range = slice (None ), ** kwargs ):
30
36
r"""
@@ -56,9 +62,11 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
56
62
Whether or not to assume the array is stored in "wide" format.
57
63
If False, the array is assumed to be stored in "narrow" format: one column per axis plus one value column.
58
64
Defaults to True.
59
- engine : {'xlrd', 'xlwings'}, optional
60
- Engine to use to read the Excel file. If None (default), it will use 'xlwings' by default if the module is
61
- installed and relies on Pandas default reader otherwise.
65
+ engine : {'xlwings', 'openpyxl', 'xlrd'}, optional
66
+ Engine to use to read the Excel file.
67
+ The 'xlrd' engine must be used to read Excel files with the old '.xls' extension.
68
+ Either 'xlwings' or 'openpyxl' can be used to read Excel files with the standard '.xlsx' extension.
69
+ Defaults to 'xlwings' if the module is installed, 'openpyxl' otherwise.
62
70
range : str, optional
63
71
Range to load the array from (only supported for the 'xlwings' engine). Defaults to slice(None) which loads
64
72
the whole sheet, ignoring blank cells in the bottom right corner.
@@ -75,7 +83,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
75
83
Read array from first sheet
76
84
77
85
>>> # The data below is derived from a subset of the demo_pjan table from Eurostat
78
- >>> read_excel(fname) # doctest: +SKIP
86
+ >>> read_excel(fname) # doctest: +SKIP
79
87
country gender\time 2013 2014 2015
80
88
Belgium Male 5472856 5493792 5524068
81
89
Belgium Female 5665118 5687048 5713206
@@ -87,7 +95,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
87
95
Read array from a specific sheet
88
96
89
97
>>> # The data below is derived from a subset of the demo_fasec table from Eurostat
90
- >>> read_excel(fname, 'births') # doctest: +SKIP
98
+ >>> read_excel(fname, 'births') # doctest: +SKIP
91
99
country gender\time 2013 2014 2015
92
100
Belgium Male 64371 64173 62561
93
101
Belgium Female 61235 60841 59713
@@ -110,7 +118,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
110
118
By default, cells associated with missing label combinations are filled with NaN. In that case, an int array
111
119
is converted to a float array.
112
120
113
- >>> read_excel(fname, sheet='population_missing_values') # doctest: +SKIP
121
+ >>> read_excel(fname, sheet='population_missing_values') # doctest: +SKIP
114
122
country gender\time 2013 2014 2015
115
123
Belgium Male 5472856.0 5493792.0 5524068.0
116
124
Belgium Female 5665118.0 5687048.0 5713206.0
@@ -121,7 +129,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
121
129
122
130
Using the ``fill_value`` argument, you can choose another value to use to fill missing cells.
123
131
124
- >>> read_excel(fname, sheet='population_missing_values', fill_value=0) # doctest: +SKIP
132
+ >>> read_excel(fname, sheet='population_missing_values', fill_value=0) # doctest: +SKIP
125
133
country gender\time 2013 2014 2015
126
134
Belgium Male 5472856 5493792 5524068
127
135
Belgium Female 5665118 5687048 5713206
@@ -143,19 +151,19 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
143
151
Germany Female 41142770 41210540 41362080
144
152
145
153
>>> # read the array stored in the sheet 'population_missing_axis_name' as is
146
- >>> arr = read_excel(fname, sheet='population_missing_axis_name') # doctest: +SKIP
154
+ >>> arr = read_excel(fname, sheet='population_missing_axis_name') # doctest: +SKIP
147
155
>>> # we expected a 3 x 2 x 3 array with data of type int
148
156
>>> # but we got a 6 x 4 array with data of type object
149
- >>> arr.info # doctest: +SKIP
157
+ >>> arr.info # doctest: +SKIP
150
158
6 x 4
151
159
country [6]: 'Belgium' 'Belgium' 'France' 'France' 'Germany' 'Germany'
152
160
{1} [4]: 'gender' '2013' '2014' '2015'
153
161
dtype: object
154
162
memory used: 192 bytes
155
163
>>> # using argument 'nb_axes', you can force the number of axes of the output array
156
- >>> arr = read_excel(fname, sheet='population_missing_axis_name', nb_axes=3) # doctest: +SKIP
164
+ >>> arr = read_excel(fname, sheet='population_missing_axis_name', nb_axes=3) # doctest: +SKIP
157
165
>>> # as expected, we have a 3 x 2 x 3 array with data of type int
158
- >>> arr.info # doctest: +SKIP
166
+ >>> arr.info # doctest: +SKIP
159
167
3 x 2 x 3
160
168
country [3]: 'Belgium' 'France' 'Germany'
161
169
gender [2]: 'Male' 'Female'
@@ -176,14 +184,14 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
176
184
France 2015 66458153
177
185
178
186
>>> # to read arrays stored in 'narrow' format, you must pass wide=False to read_excel
179
- >>> read_excel(fname, 'population_narrow_format', wide=False) # doctest: +SKIP
187
+ >>> read_excel(fname, 'population_narrow_format', wide=False) # doctest: +SKIP
180
188
country\time 2013 2014 2015
181
189
Belgium 11137974 11180840 11237274
182
190
France 65600350 66165980 66458153
183
191
184
192
Extract array from a given range (xlwings only)
185
193
186
- >>> read_excel(fname, 'population_births_deaths', range='A9:E15') # doctest: +SKIP
194
+ >>> read_excel(fname, 'population_births_deaths', range='A9:E15') # doctest: +SKIP
187
195
country gender\time 2013 2014 2015
188
196
Belgium Male 64371 64173 62561
189
197
Belgium Female 61235 60841 59713
@@ -200,7 +208,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
200
208
sheet = _translate_sheet_name (sheet )
201
209
202
210
if engine is None :
203
- engine = 'xlwings' if xw is not None else None
211
+ engine = 'xlwings' if xw is not None else 'openpyxl'
204
212
205
213
index_col = _get_index_col (nb_axes , index_col , wide )
206
214
@@ -228,10 +236,13 @@ def __init__(self, fname, overwrite_file=False):
228
236
super (PandasExcelHandler , self ).__init__ (fname , overwrite_file )
229
237
230
238
def _open_for_read (self ):
231
- self .handle = pd .ExcelFile (self .fname )
239
+ engine = 'openpyxl' if sys .version_info < (3 , 7 ) else None
240
+ self .handle = pd .ExcelFile (self .fname , engine = engine )
232
241
233
242
def _open_for_write (self ):
234
- self .handle = pd .ExcelWriter (self .fname )
243
+ _ , ext = os .path .splitext (self .fname )
244
+ engine = 'xlsxwriter' if ext == '.xlsx' and xlsxwriter is not None else None
245
+ self .handle = pd .ExcelWriter (self .fname , engine = engine )
235
246
236
247
def list_items (self ):
237
248
sheet_names = self .handle .sheet_names
@@ -251,7 +262,7 @@ def _read_item(self, key, type, *args, **kwargs):
251
262
raise TypeError ()
252
263
253
264
def _dump_item (self , key , value , * args , ** kwargs ):
254
- kwargs ['engine' ] = 'xlsxwriter'
265
+ kwargs ['engine' ] = self . handle . engine
255
266
if isinstance (value , Array ):
256
267
value .to_excel (self .handle , key , * args , ** kwargs )
257
268
else :
@@ -260,15 +271,15 @@ def _dump_item(self, key, value, *args, **kwargs):
260
271
def _read_metadata (self ):
261
272
sheet_meta = '__metadata__'
262
273
if sheet_meta in self .handle .sheet_names :
263
- meta = read_excel (self .handle , sheet_meta , engine = 'xlrd' , wide = False )
274
+ meta = read_excel (self .handle , sheet_meta , engine = self . handle . engine , wide = False )
264
275
return Metadata .from_array (meta )
265
276
else :
266
277
return Metadata ()
267
278
268
279
def _dump_metadata (self , metadata ):
269
280
if len (metadata ) > 0 :
270
281
metadata = asarray (metadata )
271
- metadata .to_excel (self .handle , '__metadata__' , engine = 'xlsxwriter' , wide = False , value_name = '' )
282
+ metadata .to_excel (self .handle , '__metadata__' , engine = self . handle . engine , wide = False , value_name = '' )
272
283
273
284
def save (self ):
274
285
pass
0 commit comments