larray-project · gdementen · Jun 14, 2017 · Jun 13, 2017 · Jun 13, 2017 · Jun 13, 2017
diff --git a/doc/source/changes/version_0_24.rst.inc b/doc/source/changes/version_0_24.rst.inc
@@ -18,6 +18,26 @@
      a0   0   1
      a1   2   3
 
+* added new boolean argument 'overwrite' to Session.save, Session.to_hdf, Session.to_excel and Session.to_pickle
+  methods (closes issue:`293`). If overwrite=True, the file is removed and replaced by a new one if it already existed.
+  This is the default behavior. If overwrite=False, its content is updated :
+
+    >>> arr1, arr2, arr3 = ndtest((2, 2)), ndtest(4), ndtest((3, 2))
+    >>> s = Session([('arr1', arr1), ('arr2', arr2), ('arr3', arr3)])
+
+    >>> # save arr1, arr2 and arr3 in file output.h5
+    >>> s.save('output.h5')
+
+    >>> # replace arr1 and create arr4 + put them in an second session
+    >>> arr1, arr4 = ndtest((3, 3)), ndtest((2, 3))
+    >>> s2 = Session([('arr1', arr1), ('arr4', arr4)])
+
+    >>> # replace arr1 and add arr4 in file output.h5
+    >>> s2.save('output.h5', overwrite=False)
+
+    >>> # erase content of 'output.h5' and save only arrays contained in the second session
+    >>> s2.save('output.h5')
+
 
 Miscellaneous improvements
 --------------------------
@@ -63,3 +83,11 @@ Fixes
 * fixed getting float data instead of int when converting an Excel Sheet or Range to an larray or numpy array.
 
 * fixed some warning messages to point to the correct line in user code.
+
+* fixed crash of Session.save method when it contains a 0D array.
+  0D arrays are now skipped when saving a session (closes issue:`291`).
+
+* fixed Session.save and Session.to_excel failing to create new Excel file
+  (it only worked if the file already existed). Closes issue:`313`.
+
+* fixed Session.load(file, engine='pandas_excel') : axes were considered as anonymous.
diff --git a/larray/core/session.py b/larray/core/session.py
@@ -194,7 +194,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
         names : list of str, optional
             List of arrays to load. If `fname` is None, list of paths to CSV files.
             Defaults to all valid objects present in the file/directory.
-        engine : str, optional
+        engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
             Load using `engine`. Defaults to 'auto' (use default engine for
             the format guessed from the file extension).
         display : bool, optional
@@ -234,7 +234,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
         for k, v in arrays.items():
             self[k] = v
 
-    def save(self, fname, names=None, engine='auto', display=False, **kwargs):
+    def save(self, fname, names=None, engine='auto', overwrite=True, display=False, **kwargs):
         """
         Dumps all array objects from the current session to a file.
 
@@ -245,12 +245,14 @@ def save(self, fname, names=None, engine='auto', display=False, **kwargs):
         names : list of str or None, optional
             List of names of objects to dump. If `fname` is None, list of paths to CSV files.
             Defaults to all objects present in the Session.
-        engine : str, optional
+        engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
             Dump using `engine`. Defaults to 'auto' (use default engine for
             the format guessed from the file extension).
+        overwrite: bool, optional
+            Whether or not to overwrite an existing file, if any. Ignored for CSV files. 
+            If False, file is updated. Defaults to True.
         display : bool, optional
-            Whether or not to display which file is being worked on. Defaults
-            to False.
+            Whether or not to display which file is being worked on. Defaults to False.
 
         Examples
         --------
@@ -264,11 +266,20 @@ def save(self, fname, names=None, engine='auto', display=False, **kwargs):
         Save only some arrays
 
         >>> s.save('output.h5', ['arr1', 'arr3'])  # doctest: +SKIP
+
+        Update file
+
+        >>> arr1, arr4 = ndtest((3, 3)), ndtest((2, 3))     # doctest: +SKIP
+        >>> s2 = Session([('arr1', arr1), ('arr4', arr4)])  # doctest: +SKIP
+        >>> # replace arr1 and add arr4 in file output.h5
+        >>> s2.save('output.h5', overwrite=False)           # doctest: +SKIP
         """
         if engine == 'auto':
             _, ext = os.path.splitext(fname)
             ext = ext.strip('.') if '.' in ext else 'csv'
             engine = ext_default_engine[ext]
+        if overwrite and engine != ext_default_engine['csv'] and os.path.isfile(fname):
+            os.remove(fname)
         handler_cls = handler_classes[engine]
         handler = handler_cls(fname)
         items = self.filter(kind=LArray).items()
@@ -326,7 +337,7 @@ def to_globals(self, names=None, depth=0, warn=True):
         for k, v in items:
             d[k] = v
 
-    def to_pickle(self, fname, names=None, *args, **kwargs):
+    def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs):
         """
         Dumps all array objects from the current session to a file using pickle.
 
@@ -340,6 +351,11 @@ def to_pickle(self, fname, names=None, *args, **kwargs):
         names : list of str or None, optional
             List of names of objects to dump. Defaults to all objects
             present in the Session.
+        overwrite: bool, optional
+            Whether or not to overwrite an existing file, if any. 
+            If False, file is updated. Defaults to True.
+        display : bool, optional
+            Whether or not to display which file is being worked on. Defaults to False.
 
         Examples
         --------
@@ -354,13 +370,13 @@ def to_pickle(self, fname, names=None, *args, **kwargs):
 
         >>> s.to_pickle('output.pkl', ['arr1', 'arr3'])  # doctest: +SKIP
         """
-        self.save(fname, names, ext_default_engine['pkl'], *args, **kwargs)
+        self.save(fname, names, ext_default_engine['pkl'], overwrite, display, **kwargs)
 
     def dump(self, fname, names=None, engine='auto', display=False, **kwargs):
         warnings.warn("Method dump is deprecated. Use method save instead.", DeprecationWarning, stacklevel=2)
         self.save(fname, names, engine, display, **kwargs)
 
-    def to_hdf(self, fname, names=None, *args, **kwargs):
+    def to_hdf(self, fname, names=None, overwrite=True, display=False, **kwargs):
         """
         Dumps all array objects from the current session to an HDF file.
 
@@ -371,6 +387,11 @@ def to_hdf(self, fname, names=None, *args, **kwargs):
         names : list of str or None, optional
             List of names of objects to dump. Defaults to all objects
             present in the Session.
+        overwrite: bool, optional
+            Whether or not to overwrite an existing file, if any. 
+            If False, file is updated. Defaults to True.
+        display : bool, optional
+            Whether or not to display which file is being worked on. Defaults to False.
 
         Examples
         --------
@@ -385,13 +406,13 @@ def to_hdf(self, fname, names=None, *args, **kwargs):
 
         >>> s.to_hdf('output.h5', ['arr1', 'arr3'])  # doctest: +SKIP
         """
-        self.save(fname, names, ext_default_engine['hdf'], *args, **kwargs)
+        self.save(fname, names, ext_default_engine['hdf'], overwrite, display, **kwargs)
 
     def dump_hdf(self, fname, names=None, *args, **kwargs):
         warnings.warn("Method dump_hdf is deprecated. Use method to_hdf instead.", DeprecationWarning, stacklevel=2)
         self.to_hdf(fname, names, *args, **kwargs)
 
-    def to_excel(self, fname, names=None, *args, **kwargs):
+    def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs):
         """
         Dumps all array objects from the current session to an Excel file.
 
@@ -402,6 +423,11 @@ def to_excel(self, fname, names=None, *args, **kwargs):
         names : list of str or None, optional
             List of names of objects to dump. Defaults to all objects
             present in the Session.
+        overwrite: bool, optional
+            Whether or not to overwrite an existing file, if any. 
+            If False, file is updated. Defaults to True.
+        display : bool, optional
+            Whether or not to display which file is being worked on. Defaults to False.
 
         Examples
         --------
@@ -416,13 +442,13 @@ def to_excel(self, fname, names=None, *args, **kwargs):
 
         >>> s.to_excel('output.xlsx', ['arr1', 'arr3'])  # doctest: +SKIP
         """
-        self.save(fname, names, ext_default_engine['xlsx'], *args, **kwargs)
+        self.save(fname, names, ext_default_engine['xlsx'], overwrite, display, **kwargs)
 
     def dump_excel(self, fname, names=None, *args, **kwargs):
         warnings.warn("Method dump_excel is deprecated. Use method to_excel instead.", DeprecationWarning, stacklevel=2)
         self.to_excel(fname, names, *args, **kwargs)
 
-    def to_csv(self, fname, names=None, *args, **kwargs):
+    def to_csv(self, fname, names=None, display=False, **kwargs):
         """
         Dumps all array objects from the current session to CSV files.
 
@@ -433,6 +459,8 @@ def to_csv(self, fname, names=None, *args, **kwargs):
         names : list of str or None, optional
             List of names of objects to dump. Defaults to all objects
             present in the Session.
+        display : bool, optional
+            Whether or not to display which file is being worked on. Defaults to False.
 
         Examples
         --------
@@ -447,7 +475,7 @@ def to_csv(self, fname, names=None, *args, **kwargs):
 
         >>> s.to_csv('./Output', ['arr1', 'arr3'])  # doctest: +SKIP
         """
-        self.save(fname, names, ext_default_engine['csv'], *args, **kwargs)
+        self.save(fname, names, ext_default_engine['csv'], display=display, **kwargs)
 
     def dump_csv(self, fname, names=None, *args, **kwargs):
         warnings.warn("Method dump_csv is deprecated. Use method to_csv instead.", DeprecationWarning, stacklevel=2)

diff --git a/larray/io/session.py b/larray/io/session.py
@@ -4,6 +4,7 @@
 from collections import OrderedDict
 from pandas import ExcelWriter, ExcelFile, HDFStore
 
+from larray.core.abc import ABCLArray
 from larray.util.misc import pickle
 from larray.io.excel import open_excel
 from larray.io.array import df_aslarray, read_csv, read_hdf
@@ -114,6 +115,10 @@ def dump_arrays(self, key_values, *args, **kwargs):
         display = kwargs.pop('display', False)
         self._open_for_write()
         for key, value in key_values:
+            if isinstance(value, ABCLArray) and value.ndim == 0:
+                if display:
+                    print('Cannot dump {}. Dumping 0D arrays is currently not supported.'.format(key))
+                continue
             if display:
                 print("dumping", key, "...", end=' ')
             self._dump(key, value, *args, **kwargs)
@@ -164,7 +169,7 @@ def list(self):
 
     def _read_array(self, key, *args, **kwargs):
         df = self.handle.parse(key, *args, **kwargs)
-        return df_aslarray(df)
+        return df_aslarray(df, raw=True)
 
     def _dump(self, key, value, *args, **kwargs):
         kwargs['engine'] = 'xlsxwriter'
@@ -182,7 +187,8 @@ def _open_for_read(self):
         self.handle = open_excel(self.fname)
 
     def _open_for_write(self):
-        self.handle = open_excel(self.fname)
+        overwrite_file = not os.path.isfile(self.fname)
+        self.handle = open_excel(self.fname, overwrite_file=overwrite_file)
 
     def list(self):
         return self.handle.sheet_names()

diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py
@@ -1,5 +1,6 @@
 from __future__ import absolute_import, division, print_function
 
+import os
 from unittest import TestCase
 
 import numpy as np
@@ -31,6 +32,7 @@ def setUp(self):
         self.c = 'c'
         self.d = {}
         self.e = ndrange([(2, 'a0'), (3, 'a1')])
+        self.e2 = ndrange(('a=a0..a2', 'b=b0..b2'))
         self.f = ndrange([(3, 'a0'), (2, 'a1')])
         self.g = ndrange([(2, 'a0'), (4, 'a1')])
         self.session = Session([
@@ -134,24 +136,37 @@ def test_names(self):
 
     def test_h5_io(self):
         fpath = abspath('test_session.h5')
-
         self.session.save(fpath)
+
         s = Session()
         s.load(fpath)
         # HDF does *not* keep ordering (ie, keys are always sorted)
         self.assertEqual(list(s.keys()), ['e', 'f', 'g'])
 
+        # update an array (overwrite=False)
+        Session(e=self.e2).save(fpath, overwrite=False)
+        s.load(fpath)
+        self.assertEqual(list(s.keys()), ['e', 'f', 'g'])
+        assert_array_nan_equal(s['e'], self.e2)
+
         s = Session()
         s.load(fpath, ['e', 'f'])
         self.assertEqual(list(s.keys()), ['e', 'f'])
 
     def test_xlsx_pandas_io(self):
         fpath = abspath('test_session.xlsx')
         self.session.save(fpath, engine='pandas_excel')
+
         s = Session()
         s.load(fpath, engine='pandas_excel')
         self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
 
+        # update an array (overwrite=False)
+        Session(e=self.e2).save(fpath, engine='pandas_excel', overwrite=False)
+        s.load(fpath, engine='pandas_excel')
+        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
+        assert_array_nan_equal(s['e'], self.e2)
+
         fpath = abspath('test_session_ef.xlsx')
         self.session.save(fpath, ['e', 'f'], engine='pandas_excel')
         s = Session()
@@ -161,12 +176,20 @@ def test_xlsx_pandas_io(self):
     @pytest.mark.skipif(xw is None, reason="xlwings is not available")
     def test_xlsx_xlwings_io(self):
         fpath = abspath('test_session_xw.xlsx')
+        # test save when Excel file does not exist
         self.session.save(fpath, engine='xlwings_excel')
+
         s = Session()
         s.load(fpath, engine='xlwings_excel')
         # ordering is only kept if the file did not exist previously (otherwise the ordering is left intact)
         self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
 
+        # update an array (overwrite=False)
+        Session(e=self.e2).save(fpath, engine='xlwings_excel', overwrite=False)
+        s.load(fpath, engine='xlwings_excel')
+        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
+        assert_array_nan_equal(s['e'], self.e2)
+
         fpath = abspath('test_session_ef_xw.xlsx')
         self.session.save(fpath, ['e', 'f'], engine='xlwings_excel')
         s = Session()
@@ -184,12 +207,18 @@ def test_csv_io(self):
 
     def test_pickle_io(self):
         fpath = abspath('test_session.pkl')
-
         self.session.save(fpath)
+
         s = Session()
         s.load(fpath, engine='pickle')
         self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
 
+        # update an array (overwrite=False)
+        Session(e=self.e2).save(fpath, overwrite=False)
+        s.load(fpath, engine='pickle')
+        self.assertEqual(list(s.keys()), ['e', 'g', 'f'])
+        assert_array_nan_equal(s['e'], self.e2)
+
     def test_to_globals(self):
         with pytest.warns(RuntimeWarning) as caught_warnings:
             self.session.to_globals()