From 3f1e63af96b61fba3f1aa258e69247ac183e9653 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Thu, 9 Jan 2020 14:38:58 +0100 Subject: [PATCH 1/4] bump version to 0.33-dev --- condarecipe/larray/meta.yaml | 4 +- doc/source/changes.rst | 14 ++++++ doc/source/changes/version_0_33.rst.inc | 58 +++++++++++++++++++++++++ larray/__init__.py | 2 +- setup.py | 2 +- 5 files changed, 76 insertions(+), 4 deletions(-) create mode 100644 doc/source/changes/version_0_33.rst.inc diff --git a/condarecipe/larray/meta.yaml b/condarecipe/larray/meta.yaml index 150cdd70a..7153efa46 100644 --- a/condarecipe/larray/meta.yaml +++ b/condarecipe/larray/meta.yaml @@ -1,9 +1,9 @@ package: name: larray - version: 0.32.1 + version: 0.33-dev source: - git_tag: 0.32.1 + git_tag: 0.33-dev git_url: https://github.com/larray-project/larray.git # git_tag: master # git_url: file://c:/Users/gdm/devel/larray/.git diff --git a/doc/source/changes.rst b/doc/source/changes.rst index e531b1f51..f031c7827 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -1,6 +1,20 @@ Change log ########## +Version 0.33 +============ + +In development. + +CORE +---- +.. include:: ./changes/version_0_33.rst.inc + +EDITOR +------ +.. include:: ./changes/editor/version_0_33.rst.inc + + Version 0.32.1 ============== diff --git a/doc/source/changes/version_0_33.rst.inc b/doc/source/changes/version_0_33.rst.inc new file mode 100644 index 000000000..a23f5c467 --- /dev/null +++ b/doc/source/changes/version_0_33.rst.inc @@ -0,0 +1,58 @@ +.. py:currentmodule:: larray + + +Syntax changes +^^^^^^^^^^^^^^ + +* renamed ``Array.old_method_name()`` to :py:obj:`Array.new_method_name()` (closes :issue:`1`). + +* renamed ``old_argument_name`` argument of :py:obj:`Array.method_name()` to ``new_argument_name``. + + +Backward incompatible changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* other backward incompatible changes + + +New features +^^^^^^^^^^^^ + +* added a feature (see the :ref:`miscellaneous section ` for details). It works on :ref:`api-axis` and + :ref:`api-group` objects. + + Here is an example of the new feature: + + >>> arr = ndtest((2, 3)) + >>> arr + a\b b0 b1 b2 + a0 0 1 2 + a1 3 4 5 + + And it can also be used like this: + + >>> arr = ndtest("a=a0..a2") + >>> arr + a a0 a1 a2 + 0 1 2 + +* added another feature in the editor (closes :editor_issue:`1`). + + .. note:: + + - It works for foo bar ! + - It does not work for foo baz ! + + +.. _misc: + +Miscellaneous improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* improved something. + + +Fixes +^^^^^ + +* fixed something (closes :issue:`1`). diff --git a/larray/__init__.py b/larray/__init__.py index 3310c1824..3006e6bc8 100644 --- a/larray/__init__.py +++ b/larray/__init__.py @@ -1,6 +1,6 @@ from __future__ import absolute_import, division, print_function -__version__ = '0.32.1' +__version__ = '0.33-dev' from larray.core.axis import Axis, AxisCollection, X diff --git a/setup.py b/setup.py index 6160da31b..db97a1522 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def readlocal(fname): DISTNAME = 'larray' -VERSION = '0.32.1' +VERSION = '0.33-dev' AUTHOR = 'Gaetan de Menten, Geert Bryon, Johan Duyck, Alix Damman' AUTHOR_EMAIL = 'gdementen@gmail.com' DESCRIPTION = "N-D labeled arrays in Python" From 354e297da073b9c0a32c8a519425de383f31903a Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Wed, 8 Jan 2020 11:03:55 +0100 Subject: [PATCH 2/4] fixed test_init_session_xlsx() --- larray/tests/test_session.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py index 55fbb0a99..06fb7bc78 100644 --- a/larray/tests/test_session.py +++ b/larray/tests/test_session.py @@ -67,7 +67,8 @@ def test_init_session(meta): @needs_xlwings def test_init_session_xlsx(): s = Session(inputpath('demography_eurostat.xlsx')) - assert s.names == ['births', 'deaths', 'immigration', 'pop', 'pop_benelux'] + assert s.names == ['births', 'deaths', 'immigration', 'population', + 'population_5_countries', 'population_benelux'] @needs_pytables From 194c92a536fcb07de553475e375f15e371d17bb0 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Wed, 8 Jan 2020 10:40:22 +0100 Subject: [PATCH 3/4] fix #842 : included scalars when dumping or loading a Session object (hdf5 + pickle formats) --- doc/source/changes/version_0_33.rst.inc | 3 +- larray/core/session.py | 257 ++++++++++++++++-------- larray/inout/common.py | 15 ++ larray/inout/hdf.py | 57 ++++-- larray/inout/pickle.py | 13 +- larray/tests/test_session.py | 18 +- 6 files changed, 248 insertions(+), 115 deletions(-) diff --git a/doc/source/changes/version_0_33.rst.inc b/doc/source/changes/version_0_33.rst.inc index a23f5c467..4d5f5bad4 100644 --- a/doc/source/changes/version_0_33.rst.inc +++ b/doc/source/changes/version_0_33.rst.inc @@ -49,7 +49,8 @@ New features Miscellaneous improvements ^^^^^^^^^^^^^^^^^^^^^^^^^^ -* improved something. +* scalar objects (i.e of type int, float, bool, string, date, time or datetime) belonging to a session + are now also saved and loaded when using the HDF5 or pickle format (closes :issue:`842`). Fixes diff --git a/larray/core/session.py b/larray/core/session.py index a65306841..9c40f2028 100644 --- a/larray/core/session.py +++ b/larray/core/session.py @@ -46,6 +46,8 @@ class Session(object): Examples -------- + >>> # scalars + >>> i, s = 5, 'string' >>> # axes >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") >>> # groups @@ -55,30 +57,31 @@ class Session(object): create a Session by passing a list of pairs (name, object) - >>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)]) + >>> ses = Session([('i', i), ('s', s), ('a', a), ('b', b), ('a01', a01), + ... ('arr1', arr1), ('arr2', arr2)]) create a Session using keyword arguments (but you lose order on Python < 3.6) - >>> s = Session(a=a, b=b, a01=a01, arr1=arr1, arr2=arr2) + >>> ses = Session(i=i, s=s, a=a, b=b, a01=a01, arr1=arr1, arr2=arr2) create a Session by passing a dictionary (but you lose order on Python < 3.6) - >>> s = Session({'a': a, 'b': b, 'a01': a01, 'arr1': arr1, 'arr2': arr2}) + >>> ses = Session({'i': i, 's': s, 'a': a, 'b': b, 'a01': a01, 'arr1': arr1, 'arr2': arr2}) load Session from file - >>> s = Session('my_session.h5') # doctest: +SKIP + >>> ses = Session('my_session.h5') # doctest: +SKIP create a session with metadata >>> # Python <= 3.5 - >>> s = Session([('arr1', arr1), ('arr2', arr2)], meta=[('title', 'my title'), ('author', 'John Smith')]) - >>> s.meta + >>> ses = Session([('arr1', arr1), ('arr2', arr2)], meta=[('title', 'my title'), ('author', 'John Smith')]) + >>> ses.meta title: my title author: John Smith >>> # Python 3.6+ - >>> s = Session(arr1=arr1, arr2=arr2, meta=Metadata(title='my title', author='John Smith')) # doctest: +SKIP - >>> s.meta + >>> ses = Session(arr1=arr1, arr2=arr2, meta=Metadata(title='my title', author='John Smith')) # doctest: +SKIP + >>> ses.meta title: my title author: John Smith """ @@ -345,8 +348,8 @@ def __setstate__(self, d): def load(self, fname, names=None, engine='auto', display=False, **kwargs): r""" - Load Array objects from a file, or several .csv files (all formats). - Load also Axis and Group objects from a file (HDF and pickle formats). + Load objects from a file, or several .csv files. + The Excel and CSV formats can only contain objects of Array type (plus metadata). WARNING: never load a file using the pickle engine (.pkl or .pickle) from an untrusted source, as it can lead to arbitrary code execution. @@ -369,46 +372,57 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs): -------- In one module: + >>> # scalars + >>> i, s = 5, 'string' # doctest: +SKIP >>> # axes - >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP + >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP >>> # groups - >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP + >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP >>> # arrays - >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP - >>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP + >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP + >>> ses = Session([('i', i), ('s', s), ('a', a), ('b', b), ('a01', a01), + ... ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP >>> # metadata - >>> s.meta.title = 'my title' # doctest: +SKIP - >>> s.meta.author = 'John Smith' # doctest: +SKIP + >>> ses.meta.title = 'my title' # doctest: +SKIP + >>> ses.meta.author = 'John Smith' # doctest: +SKIP >>> # save the session in an HDF5 file - >>> s.save('input.h5') # doctest: +SKIP + >>> ses.save('input.h5') # doctest: +SKIP In another module: load the whole session >>> # the load method is automatically called when passing >>> # the path of file to the Session constructor - >>> s = Session('input.h5') # doctest: +SKIP - >>> s # doctest: +SKIP - Session(a, b, a01, arr1, arr2) - >>> s.meta # doctest: +SKIP + >>> ses = Session('input.h5') # doctest: +SKIP + >>> ses # doctest: +SKIP + Session(a, a01, arr1, arr2, b, i, s) + >>> ses.meta # doctest: +SKIP title: my title author: John Smith Load only some objects - >>> s = Session() # doctest: +SKIP - >>> s.load('input.h5', ['a', 'b', 'arr1', 'arr2']) # doctest: +SKIP - >>> a, b, arr1, arr2 = s['a', 'b', 'arr1', 'arr2'] # doctest: +SKIP - >>> # only if you know the order of arrays stored in session - >>> a, b, a01, arr1, arr2 = s.values() # doctest: +SKIP + >>> ses = Session() + >>> ses.load('input.h5', names=['s', 'a', 'b', 'arr1', 'arr2'], display=True) # doctest: +SKIP + opening input.h5 + loading Axis object a ... done + loading Array object arr1 ... done + loading Array object arr2 ... done + loading Axis object b ... done + loading str object s ... done Using .csv files (assuming the same session as above) - >>> s.save('data') # doctest: +SKIP - >>> s = Session() # doctest: +SKIP - >>> # load all .csv files starting with "output" in the data directory - >>> s.load('data') # doctest: +SKIP - >>> # or only arrays (i.e. all CSV files starting with 'arr') - >>> s.load('data/arr*.csv') # doctest: +SKIP + >>> ses.save('data') # doctest: +SKIP + >>> ses = Session() # doctest: +SKIP + >>> # load all .csv files from the 'data' directory + >>> ses.load('data', display=True) # doctest: +SKIP + opening data + loading Array object arr1 ... done + loading Array object arr2 ... done + >>> # or only arrays containing the character '1' in their names + >>> ses.load('data/*1.csv', display=True) # doctest: +SKIP + opening data/*1.csv + loading Array object arr1 ... done """ if display: print("opening", fname) @@ -433,8 +447,8 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs): def save(self, fname, names=None, engine='auto', overwrite=True, display=False, **kwargs): r""" - Dumps Array objects from the current session to a file (all formats). - Dumps also Axis and Group objects from the current session to a file (HDF and pickle format). + Dumps objects from the current session to a file, or several .csv files. + The Excel and CSV formats only dump objects of Array type (plus metadata). Parameters ---------- @@ -442,7 +456,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False, Path of the file for the dump. If objects are saved in CSV files, the path corresponds to a directory. names : list of str or None, optional - List of names of Array/Axis/Group objects to dump. + List of names of objects to dump. If `fname` is None, list of paths to CSV files. Defaults to all objects present in the Session. engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional @@ -455,31 +469,48 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False, Examples -------- + >>> # scalars + >>> i, s = 5, 'string' # doctest: +SKIP >>> # axes - >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP + >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP >>> # groups - >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP + >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP >>> # arrays - >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP - >>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP + >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP + >>> ses = Session([('i', i), ('s', s), ('a', a), ('b', b), ('a01', a01), + ... ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP >>> # metadata - >>> s.meta.title = 'my title' # doctest: +SKIP - >>> s.meta.author = 'John Smith' # doctest: +SKIP + >>> ses.meta.title = 'my title' # doctest: +SKIP + >>> ses.meta.author = 'John Smith' # doctest: +SKIP Save all objects - >>> s.save('output.h5') # doctest: +SKIP + >>> ses.save('output.h5', display=True) # doctest: +SKIP + dumping i ... done + dumping s ... done + dumping a ... done + dumping b ... done + dumping a01 ... done + dumping arr1 ... done + dumping arr2 ... done Save only some objects - >>> s.save('output.h5', ['a', 'b', 'arr1']) # doctest: +SKIP + >>> ses.save('output.h5', names=['s', 'a', 'b', 'arr1', 'arr2'], display=True) # doctest: +SKIP + dumping s ... done + dumping a ... done + dumping b ... done + dumping arr1 ... done + dumping arr2 ... done Update file - >>> arr1, arr4 = ndtest((3, 3)), ndtest((2, 3)) # doctest: +SKIP - >>> s2 = Session([('arr1', arr1), ('arr4', arr4)]) # doctest: +SKIP + >>> arr1, arr4 = ndtest((3, 3)), ndtest((2, 3)) # doctest: +SKIP + >>> ses2 = Session([('arr1', arr1), ('arr4', arr4)]) # doctest: +SKIP >>> # replace arr1 and add arr4 in file output.h5 - >>> s2.save('output.h5', overwrite=False) # doctest: +SKIP + >>> ses2.save('output.h5', overwrite=False, display=True) # doctest: +SKIP + dumping arr1 ... done + dumping arr4 ... done """ if engine == 'auto': _, ext = os.path.splitext(fname) @@ -564,7 +595,7 @@ def to_globals(self, names=None, depth=0, warn=True, inplace=False): def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs): r""" - Dumps Array, Axis and Group objects from the current session to a file using pickle. + Dumps objects from the current session to a file using pickle. WARNING: never load a pickle file (.pkl or .pickle) from an untrusted source, as it can lead to arbitrary code execution. @@ -574,7 +605,7 @@ def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs): fname : str Path for the dump. names : list of str or None, optional - Names of Array/Axis/Group objects to dump. + Names of objects to dump. Defaults to all objects present in the Session. overwrite: bool, optional Whether or not to overwrite an existing file, if any. @@ -584,24 +615,39 @@ def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs): Examples -------- + >>> # scalars + >>> i, s = 5, 'string' # doctest: +SKIP >>> # axes - >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP + >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP >>> # groups - >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP + >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP >>> # arrays - >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP - >>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP + >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP + >>> ses = Session([('i', i), ('s', s), ('a', a), ('b', b), ('a01', a01), + ... ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP >>> # metadata - >>> s.meta.title = 'my title' # doctest: +SKIP - >>> s.meta.author = 'John Smith' # doctest: +SKIP + >>> ses.meta.title = 'my title' # doctest: +SKIP + >>> ses.meta.author = 'John Smith' # doctest: +SKIP - Save all arrays + Save all objects - >>> s.to_pickle('output.pkl') # doctest: +SKIP + >>> ses.to_pickle('output.pkl', display=True) # doctest: +SKIP + dumping i ... done + dumping s ... done + dumping a ... done + dumping b ... done + dumping a01 ... done + dumping arr1 ... done + dumping arr2 ... done Save only some objects - >>> s.to_pickle('output.pkl', ['a', 'b', 'arr1']) # doctest: +SKIP + >>> ses.to_pickle('output.pkl', names=['s', 'a', 'b', 'arr1', 'arr2'], display=True) # doctest: +SKIP + dumping s ... done + dumping a ... done + dumping b ... done + dumping arr1 ... done + dumping arr2 ... done """ self.save(fname, names, ext_default_engine['pkl'], overwrite, display, **kwargs) @@ -609,14 +655,14 @@ def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs): def to_hdf(self, fname, names=None, overwrite=True, display=False, **kwargs): r""" - Dumps Array, Axis and Group objects from the current session to an HDF file. + Dumps objects from the current session to an HDF file. Parameters ---------- fname : str Path of the file for the dump. names : list of str or None, optional - Names of Array/Axis/Group objects to dump. + Names of objects to dump. Defaults to all objects present in the Session. overwrite: bool, optional Whether or not to overwrite an existing file, if any. @@ -626,24 +672,39 @@ def to_hdf(self, fname, names=None, overwrite=True, display=False, **kwargs): Examples -------- + >>> # scalars + >>> i, s = 5, 'string' # doctest: +SKIP >>> # axes - >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP + >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP >>> # groups - >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP + >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP >>> # arrays - >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP - >>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP + >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP + >>> ses = Session([('i', i), ('s', s), ('a', a), ('b', b), ('a01', a01), + ... ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP >>> # metadata - >>> s.meta.title = 'my title' # doctest: +SKIP - >>> s.meta.author = 'John Smith' # doctest: +SKIP + >>> ses.meta.title = 'my title' # doctest: +SKIP + >>> ses.meta.author = 'John Smith' # doctest: +SKIP - Save all arrays + Save all objects - >>> s.to_hdf('output.h5') # doctest: +SKIP + >>> ses.to_hdf('output.h5', display=True) # doctest: +SKIP + dumping i ... done + dumping s ... done + dumping a ... done + dumping b ... done + dumping a01 ... done + dumping arr1 ... done + dumping arr2 ... done Save only some objects - >>> s.to_hdf('output.h5', ['a', 'b', 'arr1']) # doctest: +SKIP + >>> ses.to_hdf('output.h5', names=['s', 'a', 'b', 'arr1', 'arr2'], display=True) # doctest: +SKIP + dumping s ... done + dumping a ... done + dumping b ... done + dumping arr1 ... done + dumping arr2 ... done """ self.save(fname, names, ext_default_engine['hdf'], overwrite, display, **kwargs) @@ -672,24 +733,35 @@ def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs): Examples -------- + >>> # scalars + >>> i, s = 5, 'string' # doctest: +SKIP >>> # axes - >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP + >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP >>> # groups - >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP + >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP >>> # arrays - >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP - >>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP + >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP + >>> ses = Session([('i', i), ('s', s), ('a', a), ('b', b), ('a01', a01), + ... ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP >>> # metadata - >>> s.meta.title = 'my title' # doctest: +SKIP - >>> s.meta.author = 'John Smith' # doctest: +SKIP + >>> ses.meta.title = 'my title' # doctest: +SKIP + >>> ses.meta.author = 'John Smith' # doctest: +SKIP - Save all arrays + Save all arrays (and arrays only) - >>> s.to_excel('output.xlsx') # doctest: +SKIP + >>> ses.to_excel('output.xlsx', display=True) # doctest: +SKIP + dumping i ... Cannot dump i. int is not a supported type + dumping s ... Cannot dump s. str is not a supported type + dumping a ... Cannot dump a. Axis is not a supported type + dumping b ... Cannot dump b. Axis is not a supported type + dumping a01 ... Cannot dump a01. LGroup is not a supported type + dumping arr1 ... done + dumping arr2 ... done - Save only some objects + Save only some arrays - >>> s.to_excel('output.xlsx', ['a', 'b', 'arr1']) # doctest: +SKIP + >>> ses.to_excel('output.xlsx', names=['arr1'], display=True) # doctest: +SKIP + dumping arr1 ... done """ self.save(fname, names, ext_default_engine['xlsx'], overwrite, display, **kwargs) @@ -716,24 +788,35 @@ def to_csv(self, fname, names=None, display=False, **kwargs): Examples -------- + >>> # scalars + >>> i, s = 5, 'string' # doctest: +SKIP >>> # axes - >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP + >>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP >>> # groups - >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP + >>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP >>> # arrays - >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP - >>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP + >>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP + >>> ses = Session([('i', i), ('s', s), ('a', a), ('b', b), ('a01', a01), + ... ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP >>> # metadata - >>> s.meta.title = 'my title' # doctest: +SKIP - >>> s.meta.author = 'John Smith' # doctest: +SKIP + >>> ses.meta.title = 'my title' # doctest: +SKIP + >>> ses.meta.author = 'John Smith' # doctest: +SKIP - Save all arrays + Save all arrays (and arrays only) - >>> s.to_csv('./Output') # doctest: +SKIP + >>> ses.to_csv('output', display=True) # doctest: +SKIP + dumping i ... Cannot dump i. int is not a supported type + dumping s ... Cannot dump s. str is not a supported type + dumping a ... Cannot dump a. Axis is not a supported type + dumping b ... Cannot dump b. Axis is not a supported type + dumping a01 ... Cannot dump a01. LGroup is not a supported type + dumping arr1 ... done + dumping arr2 ... done Save only some arrays - >>> s.to_csv('./Output', ['a', 'b', 'arr1']) # doctest: +SKIP + >>> ses.to_csv('output', names=['arr1'], display=True) # doctest: +SKIP + dumping arr1 ... done """ self.save(fname, names, ext_default_engine['csv'], display=display, **kwargs) diff --git a/larray/inout/common.py b/larray/inout/common.py index eb9056c8d..e05fa5c17 100644 --- a/larray/inout/common.py +++ b/larray/inout/common.py @@ -1,11 +1,26 @@ from __future__ import absolute_import, print_function import os +from datetime import date, time, datetime from collections import OrderedDict +from larray.util.compat import bytes, unicode +from larray.core.axis import Axis +from larray.core.group import Group from larray.core.array import Array +# all formats +_supported_larray_types = (Axis, Group, Array) + +# only for HDF5 and pickle formats +# support list, tuple and dict? +# replace unicode by str when Python 2.7 will no longer be supported +_supported_scalars_types = (int, float, bool, bytes, unicode, date, time, datetime) +_supported_types = _supported_larray_types + _supported_scalars_types +_supported_typenames = {cls.__name__ for cls in _supported_types} + + def _get_index_col(nb_axes=None, index_col=None, wide=True): if not wide: if nb_axes is not None or index_col is not None: diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py index b0eae1d9f..b2c64315a 100644 --- a/larray/inout/hdf.py +++ b/larray/inout/hdf.py @@ -3,6 +3,7 @@ import warnings import numpy as np +import pandas as pd from pandas import HDFStore from larray.core.array import Array @@ -12,21 +13,27 @@ from larray.core.metadata import Metadata from larray.util.misc import LHDFStore from larray.inout.session import register_file_handler -from larray.inout.common import FileHandler +from larray.inout.common import FileHandler, _supported_typenames, _supported_scalars_types from larray.inout.pandas import df_asarray from larray.example import get_example_filepath +# for backward compatibility (larray < 0.29) but any object read from an hdf file should have +# an attribute 'type' +def _get_type_from_attrs(attrs): + return attrs.type if 'type' in attrs else 'Array' + + def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, sort_columns=False, name=None, **kwargs): - r"""Reads an axis or group or array named key from a HDF5 file in filepath (path+name) + r"""Reads a scalar or an axis or group or array named key from a HDF5 file in filepath (path+name) Parameters ---------- filepath_or_buffer : str or pandas.HDFStore Path and name where the HDF5 file is stored or a HDFStore object. key : str or Group - Name of the array. + Name of the scalar or axis or group or array. fill_value : scalar or Array, optional Value used to fill cells corresponding to label combinations which are not present in the input. Defaults to NaN. @@ -70,11 +77,14 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s key = _translate_group_key_hdf(key) res = None with LHDFStore(filepath_or_buffer) as store: - pd_obj = store.get(key) + try: + pd_obj = store.get(key) + except KeyError: + filepath = filepath_or_buffer if isinstance(filepath_or_buffer, HDFStore) else store.filename + raise KeyError('No item with name {} has been found in file {}'.format(key, filepath)) attrs = store.get_storer(key).attrs writer = attrs.writer if 'writer' in attrs else None - # for backward compatibility but any object read from an hdf file should have an attribute 'type' - _type = attrs.type if 'type' in attrs else 'Array' + _type = _get_type_from_attrs(attrs) _meta = attrs.metadata if 'metadata' in attrs else None if _type == 'Array': # cartesian product is not necessary if the array was written by LArray @@ -110,6 +120,10 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s key = np.char.decode(key, 'utf-8') axis = read_hdf(filepath_or_buffer, attrs['axis_key']) res = LGroup(key=key, name=name, axis=axis) + elif _type in _supported_typenames: + res = pd_obj.values + assert len(res) == 1 + res = res[0] return res @@ -126,36 +140,37 @@ def _open_for_write(self): def list_items(self): keys = [key.strip('/') for key in self.handle.keys()] + items = [(key, _get_type_from_attrs(self.handle.get_storer(key).attrs)) for key in keys if '/' not in key] + # ---- for backward compatibility (LArray < 0.33) ---- # axes - items = [(key.split('/')[-1], 'Axis') for key in keys if '__axes__' in key] + items += [(key.split('/')[-1], 'Axis_Backward_Comp') for key in keys if '__axes__' in key] # groups - items += [(key.split('/')[-1], 'Group') for key in keys if '__groups__' in key] - # arrays - items += [(key, 'Array') for key in keys if '/' not in key] + items += [(key.split('/')[-1], 'Group_Backward_Comp') for key in keys if '__groups__' in key] return items - def _read_item(self, key, type, *args, **kwargs): - if type == 'Array': + def _read_item(self, key, typename, *args, **kwargs): + if typename in _supported_typenames: hdf_key = '/' + key - elif type == 'Axis': + # ---- for backward compatibility (LArray < 0.33) ---- + elif typename == 'Axis_Backward_Comp': hdf_key = '__axes__/' + key - elif type == 'Group': + elif typename == 'Group_Backward_Comp': hdf_key = '__groups__/' + key else: raise TypeError() return read_hdf(self.handle, hdf_key, *args, **kwargs) def _dump_item(self, key, value, *args, **kwargs): - if isinstance(value, Array): - hdf_key = '/' + key - value.to_hdf(self.handle, hdf_key, *args, **kwargs) - elif isinstance(value, Axis): - hdf_key = '__axes__/' + key + hdf_key = '/' + key + if isinstance(value, (Array, Axis)): value.to_hdf(self.handle, hdf_key, *args, **kwargs) elif isinstance(value, Group): - hdf_key = '__groups__/' + key - hdf_axis_key = '__axes__/' + value.axis.name + hdf_axis_key = '/' + value.axis.name value.to_hdf(self.handle, hdf_key, hdf_axis_key, *args, **kwargs) + elif isinstance(value, _supported_scalars_types): + s = pd.Series(data=value) + self.handle.put(hdf_key, s) + self.handle.get_storer(hdf_key).attrs.type = type(value).__name__ else: raise TypeError() diff --git a/larray/inout/pickle.py b/larray/inout/pickle.py index 971738217..3f39c72a3 100644 --- a/larray/inout/pickle.py +++ b/larray/inout/pickle.py @@ -9,7 +9,7 @@ from larray.core.metadata import Metadata from larray.util.compat import pickle from larray.inout.session import register_file_handler -from larray.inout.common import FileHandler +from larray.inout.common import FileHandler, _supported_types, _supported_typenames, _supported_scalars_types @register_file_handler('pickle', ['pkl', 'pickle']) @@ -25,22 +25,25 @@ def _open_for_write(self): self.data = OrderedDict() def list_items(self): + # scalar + items = [(key, type(value).__name__) for key, value in self.data.items() + if isinstance(value, _supported_scalars_types)] # axes - items = [(key, 'Axis') for key, value in self.data.items() if isinstance(value, Axis)] + items += [(key, 'Axis') for key, value in self.data.items() if isinstance(value, Axis)] # groups items += [(key, 'Group') for key, value in self.data.items() if isinstance(value, Group)] # arrays items += [(key, 'Array') for key, value in self.data.items() if isinstance(value, Array)] return items - def _read_item(self, key, type, *args, **kwargs): - if type in {'Array', 'Axis', 'Group'}: + def _read_item(self, key, typename, *args, **kwargs): + if typename in _supported_typenames: return self.data[key] else: raise TypeError() def _dump_item(self, key, value, *args, **kwargs): - if isinstance(value, (Array, Axis, Group)): + if isinstance(value, _supported_types): self.data[key] = value else: raise TypeError() diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py index 06fb7bc78..9d9f22410 100644 --- a/larray/tests/test_session.py +++ b/larray/tests/test_session.py @@ -2,6 +2,7 @@ import os import shutil +from datetime import date, time, datetime import numpy as np import pandas as pd @@ -9,6 +10,7 @@ from larray.tests.common import (assert_array_nan_equal, inputpath, tmp_path, meta, needs_xlwings, needs_pytables, needs_xlrd) +from larray.inout.common import _supported_scalars_types from larray import (Session, Axis, Array, Group, isnan, zeros_like, ndtest, ones_like, ones, full, local_arrays, global_arrays, arrays) from larray.util.compat import pickle, PY2 @@ -178,7 +180,7 @@ def test_names(session): def _test_io(fpath, session, meta, engine): is_excel_or_csv = 'excel' in engine or 'csv' in engine - kind = Array if is_excel_or_csv else (Axis, Group, Array) + kind = Array if is_excel_or_csv else (Axis, Group, Array) + _supported_scalars_types session = session.filter(kind=kind) session.meta = meta @@ -226,8 +228,21 @@ def _test_io(fpath, session, meta, engine): assert s.meta == meta +def _add_scalars_to_session(s): + # 's' for scalar + s['s_int'] = 5 + s['s_float'] = 5.5 + s['s_bool'] = True + s['s_str'] = 'string' + s['s_date'] = date(2020, 1, 10) + s['s_time'] = time(11, 23, 54) + s['s_datetime'] = datetime(2020, 1, 10, 11, 23, 54) + return s + + @needs_pytables def test_h5_io(tmpdir, session, meta): + session = _add_scalars_to_session(session) fpath = tmp_path(tmpdir, 'test_session.h5') _test_io(fpath, session, meta, engine='pandas_hdf') @@ -276,6 +291,7 @@ def test_csv_io(tmpdir, session, meta): def test_pickle_io(tmpdir, session, meta): + session = _add_scalars_to_session(session) fpath = tmp_path(tmpdir, 'test_session.pkl') _test_io(fpath, session, meta, engine='pickle') From d8a1561132e9586bbb661a6171eb782305600a0d Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Wed, 15 Jan 2020 15:46:57 +0100 Subject: [PATCH 4/4] .travis.yml: workaround for Python 2.7 to install tqdm explicitly before updating conda --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index 3e3e20017..72e6a9cdf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,6 +27,10 @@ before_install: - hash -r - conda config --add channels conda-forge - conda config --set always_yes yes --set changeps1 no + # workaround for conda >= 4.8 + - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then + pip install tqdm; + fi - conda update -q conda # Useful for debugging any issues with conda