From 2a32a395dc89c9eb17baadbd9fd56c2d14408ccf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 16:59:47 +0200
Subject: [PATCH 01/44] fixed arr1d @ arr1d returning a 0D LArray instead of a
 scalar

---
 larray/core/array.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index d74826374..c1ca45c3c 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -5081,7 +5081,10 @@ def __matmul__(self, other):
             res_axes += [axes[-2]]
         if other.ndim > 1:
             res_axes += [other_axes[-1].copy()]
-        return LArray(res_data, res_axes)
+        if res_axes:
+            return LArray(res_data, res_axes)
+        else:
+            return res_data
 
     def __rmatmul__(self, other):
         if isinstance(other, np.ndarray):

From ca76975aeb9671af2cc1bd2347849ad73a675a3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 15:52:58 +0200
Subject: [PATCH 02/44] fixed Axis(values, name=np.str_)

---
 larray/core/axis.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/larray/core/axis.py b/larray/core/axis.py
index be97a1224..0021b29b7 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -92,8 +92,11 @@ def __init__(self, labels, name=None):
 
         # make sure we do not have np.str_ as it causes problems down the
         # line with xlwings. Cannot use isinstance to check that though.
-        is_python_str = type(name) is unicode or type(name) is bytes
-        assert name is None or isinstance(name, int) or is_python_str, type(name)
+        name_is_python_str = type(name) is unicode or type(name) is bytes
+        if isinstance(name, str) and not name_is_python_str:
+            name = str(name)
+        if name is not None and not isinstance(name, (int, str)):
+            raise TypeError("Axis name should be None, int or str but is: %s (%s)" % (name, type(name).__name__))
         self.name = name
         self._labels = None
         self.__mapping = None

From b39c87b4634d3c0deba452dc166fac003a3ee893 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 21 Nov 2018 09:53:34 +0100
Subject: [PATCH 03/44] WIP: fixed loading pd.Dataframe with non string index
 names (needs test & changelog)

---
 larray/inout/pandas.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py
index 017c400a4..c40801977 100644
--- a/larray/inout/pandas.py
+++ b/larray/inout/pandas.py
@@ -211,7 +211,8 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
     a1   b0   4   5
     a1   b1   6   7
     """
-    axes_names = [decode(name, 'utf8') for name in df.index.names]
+    axes_names = [decode(name, 'utf8') if isinstance(name, basestring) else name
+                  for name in df.index.names]
 
     # handle 2 or more dimensions with the last axis name given using \
     if unfold_last_axis_name:
@@ -303,7 +304,8 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
             raise ValueError('sort_rows=True is not valid for 1D arrays. Please use sort_columns instead.')
         return from_series(series, sort_rows=sort_columns)
     else:
-        axes_names = [decode(name, 'utf8') for name in df.index.names]
+        axes_names = [decode(name, 'utf8') if isinstance(name, basestring) else name
+                      for name in df.index.names]
         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
         return from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
                           unfold_last_axis_name=unfold_last_axis_name, **kwargs)

From c7ddd6a3a47d9d1f473b7d1074c602880accc101 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 14 Dec 2018 08:12:31 +0100
Subject: [PATCH 04/44] WIP: fixed LArray.broadcast_with with out= (needs test)

---
 larray/core/array.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/larray/core/array.py b/larray/core/array.py
index c1ca45c3c..04012789a 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -5509,6 +5509,8 @@ def expand(self, target_axes=None, out=None, readonly=False):
                 return LArray(np.broadcast_to(broadcasted, target_axes.shape), target_axes)
             else:
                 out = empty(target_axes, dtype=self.dtype)
+        else:
+            broadcasted = self
         out[:] = broadcasted
         return out
 

From afc7ebb4cedfec776c17aca59701830d4d915cb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 14 Nov 2018 10:48:21 +0100
Subject: [PATCH 05/44] fixed many warning messages when running the test suite
 (invalid escape)

used raw strings in a lot of place, especially in regex
---
 larray/core/array.py       | 94 +++++++++++++++++++-------------------
 larray/core/axis.py        |  2 +-
 larray/core/group.py       |  8 ++--
 larray/tests/test_array.py | 26 +++++------
 4 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 04012789a..e260c6201 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -401,7 +401,7 @@ def __setitem__(self, key, value):
 
 
 def get_axis(obj, i):
-    """
+    r"""
     Returns an axis according to its position.
 
     Parameters
@@ -497,7 +497,7 @@ def _doc_agg_method(func, by=False, long_name='', action_verb='perform', extra_a
 
     doc_args = "".join(_arg_agg[arg] for arg in extra_args)
     doc_kwargs = "".join(_kwarg_agg[kw]['doc'] for kw in kwargs)
-    doc_varargs = """
+    doc_varargs = r"""
         \*axes_and_groups : None or int or str or Axis or Group or any combination of those
             {specific}
             The default (no axis or group) is to {action_verb} the {long_name} over all the dimensions of the input
@@ -1193,7 +1193,7 @@ def describe(self, *args, **kwargs):
                      [self.percentile(p, *args) for p in percentiles], Axis(labels, 'statistic'))
 
     def describe_by(self, *args, **kwargs):
-        """
+        r"""
         Descriptive summary statistics, excluding NaN values, along axes or for groups.
 
         By default, it includes the number of non-NaN values, the mean, standard deviation, minimum, maximum and
@@ -1465,7 +1465,7 @@ def get_labels(self_axis):
             return res
 
     def align(self, other, join='outer', fill_value=nan, axes=None):
-        """Align two arrays on their axes with the specified join method.
+        r"""Align two arrays on their axes with the specified join method.
 
         In other words, it ensure all common axes are compatible. Those arrays can then be used in binary operations.
 
@@ -1500,14 +1500,14 @@ def align(self, other, join='outer', fill_value=nan, axes=None):
         --------
         >>> arr1 = ndtest((2, 3))
         >>> arr1
-        a\\b  b0  b1  b2
+        a\b  b0  b1  b2
          a0   0   1   2
          a1   3   4   5
         >>> arr2 = -ndtest((3, 2))
         >>> # reorder array to make the test more interesting
         >>> arr2 = arr2[['b1', 'b0']]
         >>> arr2
-        a\\b  b1  b0
+        a\b  b1  b0
          a0  -1   0
          a1  -3  -2
          a2  -5  -4
@@ -1516,12 +1516,12 @@ def align(self, other, join='outer', fill_value=nan, axes=None):
 
         >>> aligned1, aligned2 = arr1.align(arr2)
         >>> aligned1
-        a\\b   b0   b1   b2
+        a\b   b0   b1   b2
          a0  0.0  1.0  2.0
          a1  3.0  4.0  5.0
          a2  nan  nan  nan
         >>> aligned2
-        a\\b    b0    b1   b2
+        a\b    b0    b1   b2
          a0   0.0  -1.0  nan
          a1  -2.0  -3.0  nan
          a2  -4.0  -5.0  nan
@@ -1529,7 +1529,7 @@ def align(self, other, join='outer', fill_value=nan, axes=None):
         After aligning all common axes, one can then do operations between the two arrays
 
         >>> aligned1 + aligned2
-        a\\b   b0   b1   b2
+        a\b   b0   b1   b2
          a0  0.0  0.0  nan
          a1  1.0  1.0  nan
          a2  nan  nan  nan
@@ -1538,30 +1538,30 @@ def align(self, other, join='outer', fill_value=nan, axes=None):
 
         >>> aligned1, aligned2 = arr1.align(arr2, join='inner')
         >>> aligned1
-        a\\b   b0   b1
+        a\b   b0   b1
          a0  0.0  1.0
          a1  3.0  4.0
         >>> aligned2
-        a\\b    b0    b1
+        a\b    b0    b1
          a0   0.0  -1.0
          a1  -2.0  -3.0
         >>> aligned1, aligned2 = arr1.align(arr2, join='left')
         >>> aligned1
-        a\\b   b0   b1   b2
+        a\b   b0   b1   b2
          a0  0.0  1.0  2.0
          a1  3.0  4.0  5.0
         >>> aligned2
-        a\\b    b0    b1   b2
+        a\b    b0    b1   b2
          a0   0.0  -1.0  nan
          a1  -2.0  -3.0  nan
         >>> aligned1, aligned2 = arr1.align(arr2, join='right')
         >>> aligned1
-        a\\b   b1   b0
+        a\b   b1   b0
          a0  1.0  0.0
          a1  4.0  3.0
          a2  nan  nan
         >>> aligned2
-        a\\b    b1    b0
+        a\b    b1    b0
          a0  -1.0   0.0
          a1  -3.0  -2.0
          a2  -5.0  -4.0
@@ -1570,17 +1570,17 @@ def align(self, other, join='outer', fill_value=nan, axes=None):
 
         >>> aligned1, aligned2 = arr1.align(arr2, fill_value=0)
         >>> aligned1
-        a\\b  b0  b1  b2
+        a\b  b0  b1  b2
          a0   0   1   2
          a1   3   4   5
          a2   0   0   0
         >>> aligned2
-        a\\b  b0  b1  b2
+        a\b  b0  b1  b2
          a0   0  -1   0
          a1  -2  -3   0
          a2  -4  -5   0
         >>> aligned1 + aligned2
-        a\\b  b0  b1  b2
+        a\b  b0  b1  b2
          a0   0   0   2
          a1   1   1   5
          a2  -4  -5   0
@@ -1589,11 +1589,11 @@ def align(self, other, join='outer', fill_value=nan, axes=None):
 
         >>> arr3 = ndtest((3, 2, 2))
         >>> arr1
-        a\\b  b0  b1  b2
+        a\b  b0  b1  b2
          a0   0   1   2
          a1   3   4   5
         >>> arr3
-         a  b\\c  c0  c1
+         a  b\c  c0  c1
         a0   b0   0   1
         a0   b1   2   3
         a1   b0   4   5
@@ -1602,7 +1602,7 @@ def align(self, other, join='outer', fill_value=nan, axes=None):
         a2   b1  10  11
         >>> aligned1, aligned2 = arr1.align(arr3, join='inner')
         >>> aligned1
-        a\\b   b0   b1
+        a\b   b0   b1
          a0  0.0  1.0
          a1  3.0  4.0
         >>> aligned2
@@ -1612,7 +1612,7 @@ def align(self, other, join='outer', fill_value=nan, axes=None):
         a1   b0  4.0  5.0
         a1   b1  6.0  7.0
         >>> aligned1 + aligned2
-         a  b\\c    c0    c1
+         a  b\c    c0    c1
         a0   b0   0.0   1.0
         a0   b1   3.0   4.0
         a1   b0   7.0   8.0
@@ -1622,11 +1622,11 @@ def align(self, other, join='outer', fill_value=nan, axes=None):
 
         >>> aligned1, aligned2 = arr1.align(arr2, axes='b')
         >>> aligned1
-        a\\b   b0   b1   b2
+        a\b   b0   b1   b2
          a0  0.0  1.0  2.0
          a1  3.0  4.0  5.0
         >>> aligned2
-        a\\b    b0    b1   b2
+        a\b    b0    b1   b2
          a0   0.0  -1.0  nan
          a1  -2.0  -3.0  nan
          a2  -4.0  -5.0  nan
@@ -5515,7 +5515,7 @@ def expand(self, target_axes=None, out=None, readonly=False):
         return out
 
     def append(self, axis, value, label=None):
-        """Adds an array to self along an axis.
+        r"""Adds an array to self along an axis.
 
         The two arrays must have compatible axes.
 
@@ -5537,15 +5537,15 @@ def append(self, axis, value, label=None):
         --------
         >>> a = ones('nat=BE,FO;sex=M,F')
         >>> a
-        nat\\sex    M    F
+        nat\sex    M    F
              BE  1.0  1.0
              FO  1.0  1.0
         >>> a.append('sex', a.sum('sex'), 'M+F')
-        nat\\sex    M    F  M+F
+        nat\sex    M    F  M+F
              BE  1.0  1.0  2.0
              FO  1.0  1.0  2.0
         >>> a.append('nat', 2, 'Other')
-        nat\\sex    M    F
+        nat\sex    M    F
              BE  1.0  1.0
              FO  1.0  1.0
           Other  2.0  2.0
@@ -5554,7 +5554,7 @@ def append(self, axis, value, label=None):
         type  type1  type2
                 0.0    0.0
         >>> a.append('nat', b, 'Other')
-          nat  sex\\type  type1  type2
+          nat  sex\type  type1  type2
            BE         M    1.0    1.0
            BE         F    1.0    1.0
            FO         M    1.0    1.0
@@ -5566,7 +5566,7 @@ def append(self, axis, value, label=None):
         return self.insert(value, before=IGroup(len(axis), axis=axis), label=label)
 
     def prepend(self, axis, value, label=None):
-        """Adds an array before self along an axis.
+        r"""Adds an array before self along an axis.
 
         The two arrays must have compatible axes.
 
@@ -5592,11 +5592,11 @@ def prepend(self, axis, value, label=None):
              BE  1.0  1.0
              FO  1.0  1.0
         >>> a.prepend('sex', a.sum('sex'), 'M+F')
-        nat\\sex  M+F    M    F
+        nat\sex  M+F    M    F
              BE  2.0  1.0  1.0
              FO  2.0  1.0  1.0
         >>> a.prepend('nat', 2, 'Other')
-        nat\\sex    M    F
+        nat\sex    M    F
           Other  2.0  2.0
              BE  1.0  1.0
              FO  1.0  1.0
@@ -5605,7 +5605,7 @@ def prepend(self, axis, value, label=None):
         type  type1  type2
                 0.0    0.0
         >>> a.prepend('sex', b, 'Other')
-        nat  sex\\type  type1  type2
+        nat  sex\type  type1  type2
          BE     Other    0.0    0.0
          BE         M    1.0    1.0
          BE         F    1.0    1.0
@@ -6318,7 +6318,7 @@ def to_clipboard(self, *args, **kwargs):
 
     @property
     def plot(self):
-        """Plots the data of the array into a graph (window pop-up).
+        r"""Plots the data of the array into a graph (window pop-up).
 
         The graph can be tweaked to achieve the desired formatting and can be saved to a .png file.
 
@@ -7137,7 +7137,7 @@ def larray_nan_equal(a1, a2):
 
 
 def aslarray(a, meta=None):
-    """
+    r"""
     Converts input as LArray if possible.
 
     Parameters
@@ -7201,7 +7201,7 @@ def wrapper(*args, **kwargs):
 
 @_check_axes_argument
 def zeros(axes, title=None, dtype=float, order='C', meta=None):
-    """Returns an array with the specified axes and filled with zeros.
+    r"""Returns an array with the specified axes and filled with zeros.
 
     Parameters
     ----------
@@ -7516,7 +7516,7 @@ def full_like(array, fill_value, title=None, dtype=None, order='K', meta=None):
     meta = _handle_deprecated_argument_title(meta, title)
     # cannot use full() because order == 'K' is not understood
     # cannot use np.full_like() because it would not handle LArray fill_value
-    res = empty_like(array, dtype, meta=meta)
+    res = empty_like(array, dtype=dtype, meta=meta)
     res[:] = fill_value
     return res
 
@@ -8122,7 +8122,7 @@ def eye(rows, columns=None, k=0, title=None, dtype=None, meta=None):
 
 
 def stack(elements=None, axis=None, title=None, meta=None, **kwargs):
-    """
+    r"""
     Combines several arrays or sessions along an axis.
 
     Parameters
@@ -8163,41 +8163,41 @@ def stack(elements=None, axis=None, title=None, meta=None, **kwargs):
     In the case the axis to create has already been defined in a variable (Axis or Group)
 
     >>> stack({'BE': arr1, 'FO': arr2}, nat)
-    sex\\nat   BE   FO
+    sex\nat   BE   FO
           M  1.0  0.0
           F  1.0  0.0
     >>> all_nat = Axis('nat=BE,DE,FR,NL,UK')
     >>> stack({'BE': arr1, 'DE': arr2}, all_nat[:'DE'])
-    sex\\nat   BE   DE
+    sex\nat   BE   DE
           M  1.0  0.0
           F  1.0  0.0
 
     Otherwise (when one wants to create an axis from scratch), any of these syntaxes works:
 
     >>> stack([arr1, arr2], 'nat=BE,FO')
-    sex\\nat   BE   FO
+    sex\nat   BE   FO
           M  1.0  0.0
           F  1.0  0.0
     >>> stack({'BE': arr1, 'FO': arr2}, 'nat=BE,FO')
-    sex\\nat   BE   FO
+    sex\nat   BE   FO
           M  1.0  0.0
           F  1.0  0.0
     >>> stack([('BE', arr1), ('FO', arr2)], 'nat=BE,FO')
-    sex\\nat   BE   FO
+    sex\nat   BE   FO
           M  1.0  0.0
           F  1.0  0.0
 
     When stacking arrays with different axes, the result has the union of all axes present:
 
     >>> stack({'BE': arr1, 'FO': 0}, nat)
-    sex\\nat   BE   FO
+    sex\nat   BE   FO
           M  1.0  0.0
           F  1.0  0.0
 
     Creating an axis without name nor labels can be done using:
 
     >>> stack((arr1, arr2))
-    sex\\{1}*    0    1
+    sex\{1}*    0    1
            M  1.0  0.0
            F  1.0  0.0
 
@@ -8205,7 +8205,7 @@ def stack(elements=None, axis=None, title=None, meta=None, **kwargs):
     arguments can be an attractive alternative.
 
     >>> stack(FO=arr2, BE=arr1, axis=nat)
-    sex\\nat   BE   FO
+    sex\nat   BE   FO
           M  1.0  0.0
           F  1.0  0.0
 
@@ -8214,7 +8214,7 @@ def stack(elements=None, axis=None, title=None, meta=None, **kwargs):
 
     >>> # use this only on Python 3.6 and later
     >>> stack(BE=arr1, FO=arr2, axis='nat')   # doctest: +SKIP
-    sex\\nat   BE   FO
+    sex\nat   BE   FO
           M  1.0  0.0
           F  1.0  0.0
 
diff --git a/larray/core/axis.py b/larray/core/axis.py
index 0021b29b7..7b636a00c 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -2648,7 +2648,7 @@ def names(self):
 
     @property
     def display_names(self):
-        """
+        r"""
         Returns the list of (display) names of the axes.
 
         Returns
diff --git a/larray/core/group.py b/larray/core/group.py
index 8c95d07ed..2e6d703e1 100644
--- a/larray/core/group.py
+++ b/larray/core/group.py
@@ -196,7 +196,7 @@ def generalized_range(start, stop, step=1):
         return irange(start, stop, step)
 
 
-_range_str_pattern = re.compile('(?P<start>[^\s.]+)?\s*\.\.\s*(?P<stop>[^\s.]+)?(\s+step\s+(?P<step>\d+))?')
+_range_str_pattern = re.compile(r'(?P<start>[^\s.]+)?\s*\.\.\s*(?P<stop>[^\s.]+)?(\s+step\s+(?P<step>\d+))?')
 
 
 def _range_str_to_range(s, stack_depth=1):
@@ -435,7 +435,7 @@ def _to_ticks(s, parse_single_int=False):
     return np.asarray(ticks)
 
 
-_axis_name_pattern = re.compile('\s*(([A-Za-z0-9]\w*)(\.i)?\s*\[)?(.*)')
+_axis_name_pattern = re.compile(r'\s*(([A-Za-z0-9]\w*)(\.i)?\s*\[)?(.*)')
 
 
 def _seq_str_to_seq(s, stack_depth=1, parse_single_int=False):
@@ -645,7 +645,7 @@ def _to_keys(value, stack_depth=1):
 
 
 # forbidden characters in sheet names
-_sheet_name_pattern = re.compile('[\\\/?*\[\]:]')
+_sheet_name_pattern = re.compile(r'[\\/?*\[\]:]')
 
 
 def _translate_sheet_name(sheet_name):
@@ -659,7 +659,7 @@ def _translate_sheet_name(sheet_name):
 
 
 # forbidden characters for dataset names in HDF files
-_key_hdf_pattern = re.compile('[\\\/]')
+_key_hdf_pattern = re.compile(r'[\\/]')
 
 
 def _translate_group_key_hdf(key):
diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py
index 652d4637e..c54497c09 100644
--- a/larray/tests/test_array.py
+++ b/larray/tests/test_array.py
@@ -290,12 +290,12 @@ def test_str(small_array, array):
          0    1    2"""
     # two dimensions
     assert str(small_array.filter(lipro=lipro3)) == """\
-sex\lipro  P01  P02  P03
+sex\\lipro  P01  P02  P03
         M    0    1    2
         F   15   16   17"""
     # four dimensions (too many rows)
     assert str(array.filter(lipro=lipro3)) == """\
-age  geo  sex\lipro       P01       P02       P03
+age  geo  sex\\lipro       P01       P02       P03
   0  A11          M       0.0       1.0       2.0
   0  A11          F      15.0      16.0      17.0
   0  A12          M      30.0      31.0      32.0
@@ -477,21 +477,21 @@ def test_getitem_guess_axis(array):
         array[[1, 2], 999]
 
     # key with invalid label list (ie list of labels not found on any axis)
-    with pytest.raises(ValueError, message="\[998, 999\] is not a valid label for any axis"):
+    with pytest.raises(ValueError, message=r"\[998, 999\] is not a valid label for any axis"):
         array[[1, 2], [998, 999]]
 
     # key with partial invalid list (ie list containing a label not found
     # on any axis)
     # FIXME: the message should be the same as for 999, 4 (ie it should NOT mention age).
-    with pytest.raises(ValueError, message="age\[3, 999\] is not a valid label for any axis"):
+    with pytest.raises(ValueError, message=r"age\[3, 999\] is not a valid label for any axis"):
         array[[1, 2], [3, 999]]
 
-    with pytest.raises(ValueError, message="\[999, 4\] is not a valid label for any axis"):
+    with pytest.raises(ValueError, message=r"\[999, 4\] is not a valid label for any axis"):
         array[[1, 2], [999, 4]]
 
     # ambiguous key
     arr = ndtest("a=l0,l1;b=l1,l2")
-    with pytest.raises(ValueError, message="l1 is ambiguous \(valid in a, b\)"):
+    with pytest.raises(ValueError, message=r"l1 is ambiguous \(valid in a, b\)"):
         arr['l1']
 
     # ambiguous key disambiguated via string
@@ -2176,7 +2176,7 @@ def test_sum_with_groups_from_other_axis(small_array):
     # use a group (from another axis) which is incompatible with the axis of
     # the same name in the array
     lipro4 = Axis('lipro=P01,P03,P16')
-    with pytest.raises(ValueError, message="lipro\['P01', 'P16'\] is not a valid label for any axis"):
+    with pytest.raises(ValueError, message=r"lipro\['P01', 'P16'\] is not a valid label for any axis"):
         small_array.sum(lipro4['P01,P16'])
 
 
@@ -2901,7 +2901,7 @@ def test_hdf_roundtrip(tmpdir, meta):
     group = a3.c['c0,c2'] >> 'even'
     a3[group].to_hdf(fpath, group)
     # group with name containing special characters (replaced by _)
-    group = a3.c['c0,c2'] >> ':name?with*special/\[characters]'
+    group = a3.c['c0,c2'] >> r':name?with*special/\[characters]'
     a3[group].to_hdf(fpath, group)
 
     # passing group as key to read_hdf
@@ -3773,7 +3773,7 @@ def test_to_excel_xlsxwriter(tmpdir):
     group = a3.c['c0,c2'] >> 'even'
     a3[group].to_excel(fpath, group, engine='xlsxwriter')
     # group with name containing special characters (replaced by _)
-    group = a3.c['c0,c2'] >> ':name?with*special/\[char]'
+    group = a3.c['c0,c2'] >> r':name?with*special/\[char]'
     a3[group].to_excel(fpath, group, engine='xlsxwriter')
 
 
@@ -3863,7 +3863,7 @@ def test_to_excel_xlwings(tmpdir):
     group = a3.c['c0,c2'] >> 'even'
     a3[group].to_excel(fpath, group, engine='xlwings')
     # group with name containing special characters (replaced by _)
-    group = a3.c['c0,c2'] >> ':name?with*special/\[char]'
+    group = a3.c['c0,c2'] >> r':name?with*special/\[char]'
     a3[group].to_excel(fpath, group, engine='xlwings')
     # checks sheet names
     sheet_names = sorted(open_excel(fpath).sheet_names())
@@ -4259,7 +4259,7 @@ def test_matmul():
     # different axes
     a1 = ndtest('a=a0..a1;b=b0..b2')
     a2 = ndtest('b=b0..b2;c=c0..c3')
-    res = from_lists([['a\c', 'c0', 'c1', 'c2', 'c3'],
+    res = from_lists([[r'a\c', 'c0', 'c1', 'c2', 'c3'],
                       ['a0', 20, 23, 26, 29],
                       ['a1', 56, 68, 80, 92]])
     assert_array_equal(a1.__matmul__(a2), res)
@@ -4509,7 +4509,7 @@ def test_split_axes():
     assert_array_equal(res.transpose('a', 'b', 'c', 'd'), arr)
 
     # regex
-    res = combined.split_axes('b_d', names=['b', 'd'], regex='(\w+)_(\w+)')
+    res = combined.split_axes('b_d', names=['b', 'd'], regex=r'(\w+)_(\w+)')
     assert res.axes.names == ['a', 'b', 'd', 'c']
     assert res.shape == (2, 3, 5, 4)
     assert_array_equal(res.transpose('a', 'b', 'c', 'd'), arr)
@@ -4563,7 +4563,7 @@ def test_split_axes():
 
     # using regex
     arr = ndtest('ab=a0b0..a1b2; c=c0..c3; d=d0..d3; ef=e0f0..e2f1')
-    res = arr.split_axes({'ab': ('a', 'b'), 'ef': ('e', 'f')}, regex='(\w{2})(\w{2})')
+    res = arr.split_axes({'ab': ('a', 'b'), 'ef': ('e', 'f')}, regex=r'(\w{2})(\w{2})')
     assert res.axes.names == ['a', 'b', 'c', 'd', 'e', 'f']
     assert res.size == arr.size
     assert res.shape == (2, 3, 4, 4, 3, 2)

From 0922d64565cb230f6cd42d6d477612a8e93fc5f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Thu, 21 Feb 2019 15:43:58 +0100
Subject: [PATCH 06/44] WIP: fixed creating an LSet from an IGroup with a
 scalar key (needs test & changelog)

---
 larray/core/group.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/larray/core/group.py b/larray/core/group.py
index 2e6d703e1..15ce0aa65 100644
--- a/larray/core/group.py
+++ b/larray/core/group.py
@@ -668,6 +668,7 @@ def _translate_group_key_hdf(key):
     return key
 
 
+# TODO: kill this function
 def union(*args):
     # TODO: add support for LGroup and lists
     """
@@ -1613,7 +1614,7 @@ class LSet(LGroup):
 
     def __init__(self, key, name=None, axis=None):
         key = _to_key(key)
-        if isinstance(key, LGroup):
+        if isinstance(key, Group):
             if name is None:
                 name = key.name
             if axis is None:

From 32505d263e220cac6dcf099e74872c27600743e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Thu, 4 Oct 2018 14:15:37 +0200
Subject: [PATCH 07/44] avoid a few DeprecationWarnings in tests

by using Axis&Group.matching(regex=pattern) instead of matching(pattern)
---
 larray/core/axis.py        | 2 +-
 larray/tests/test_axis.py  | 2 +-
 larray/tests/test_group.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/larray/core/axis.py b/larray/core/axis.py
index 7b636a00c..46ab1d79d 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -801,7 +801,7 @@ def index(self, key, bool_passthrough=True):
         >>> people = Axis(['John Doe', 'Bruce Wayne', 'Bruce Willis', 'Waldo', 'Arthur Dent', 'Harvey Dent'], 'people')
         >>> people.index('Waldo')
         3
-        >>> people.index(people.matching('Bruce'))
+        >>> people.index(people.containing('Bruce'))
         array([1, 2])
         """
         mapping = self._mapping
diff --git a/larray/tests/test_axis.py b/larray/tests/test_axis.py
index 53fcb854c..95af5351d 100644
--- a/larray/tests/test_axis.py
+++ b/larray/tests/test_axis.py
@@ -291,7 +291,7 @@ def test_init_from_group():
 
 def test_matching():
     sutcode = Axis(['A23', 'A2301', 'A25', 'A2501'], 'sutcode')
-    assert sutcode.matching('^...$') == LGroup(['A23', 'A25'])
+    assert sutcode.matching(regex='^...$') == LGroup(['A23', 'A25'])
     assert sutcode.startingwith('A23') == LGroup(['A23', 'A2301'])
     assert sutcode.endingwith('01') == LGroup(['A2301', 'A2501'])
 
diff --git a/larray/tests/test_group.py b/larray/tests/test_group.py
index f24dbc4ce..6cb9ff6c5 100644
--- a/larray/tests/test_group.py
+++ b/larray/tests/test_group.py
@@ -86,7 +86,7 @@ def test_init_lgroup(lgroups):
     group = age['1:5'] >> group2
     assert group.name == group2.name
     axis = Axis('axis=a,a0..a3,b,b0..b3,c,c0..c3')
-    for code in axis.matching('^.$'):
+    for code in axis.matching(regex='^.$'):
         group = axis.startingwith(code) >> code
         assert group.equals(axis.startingwith(code) >> str(code))
 

From 13790f1b240f41d477bf82a21ac89c5d2efa10b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 17:42:57 +0200
Subject: [PATCH 08/44] added TODO/XXX/FIXME/misc comment

---
 larray/core/abstractbases.py |  1 +
 larray/core/array.py         | 10 ++++++++--
 larray/core/axis.py          |  4 ++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/larray/core/abstractbases.py b/larray/core/abstractbases.py
index 0546bb6a6..d5d766673 100644
--- a/larray/core/abstractbases.py
+++ b/larray/core/abstractbases.py
@@ -5,6 +5,7 @@
 
 # define abstract base classes to enable isinstance type checking on our objects
 # idea taken from https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/generic.py
+# FIXME: __metaclass__ is ignored in Python 3
 class ABCAxis(object):
     __metaclass__ = ABCMeta
 
diff --git a/larray/core/array.py b/larray/core/array.py
index e260c6201..53e4d8195 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -17,7 +17,7 @@
 
 # * Axis.sequence? geo.seq('A31', 'A38') (equivalent to geo['A31..A38'])
 
-# * re-implement row_totals/col_totals? or what do we do with them?
+# ? re-implement row_totals/col_totals? or what do we do with them?
 
 # * time specific API so that we know if we go for a subclass or not
 
@@ -396,7 +396,7 @@ def __getitem__(self, key):
 
     def __setitem__(self, key, value):
         # we still need to prepare the key instead of letting numpy handle everything so that
-        # existing (integer)LArray keys are handled correctly (broadcasted using axes names).
+        # existing (integer)LArray keys are broadcasted correctly (using axes names).
         self.array.__setitem__(self._prepare_key(key, wildcard=True), value, translate_key=False)
 
 
@@ -1272,6 +1272,9 @@ def __bool__(self):
     # Python 2
     __nonzero__ = __bool__
 
+    # TODO: this should be a thin wrapper around a method in AxisCollection
+    # TODO: either support a list (of axes names) as first argument here (and set_labels)
+    #       or don't support that in set_axes
     def rename(self, renames=None, to=None, inplace=False, **kwargs):
         """Renames axes of the array.
 
@@ -8252,6 +8255,7 @@ def stack(elements=None, axis=None, title=None, meta=None, **kwargs):
         axis = Axis(axis)
     if elements is None:
         if not isinstance(axis, Axis) and sys.version_info[:2] < (3, 6):
+            # XXX: this should probably be a warning, not an error
             raise TypeError("axis argument should provide label order when using keyword arguments on Python < 3.6")
         elements = kwargs.items()
     elif kwargs:
@@ -8267,6 +8271,8 @@ def stack(elements=None, axis=None, title=None, meta=None, **kwargs):
         axis = elements.axes[axis]
         values = [elements[k] for k in axis]
     elif isinstance(elements, dict):
+        # TODO: support having no Axis object for Python3.7 (without error or warning)
+        # XXX: we probably want to support this with a warning on Python < 3.7
         assert isinstance(axis, Axis)
         values = [elements[v] for v in axis.labels]
     elif isinstance(elements, Iterable):
diff --git a/larray/core/axis.py b/larray/core/axis.py
index 46ab1d79d..a6c7b0058 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -104,6 +104,7 @@ def __init__(self, labels, name=None):
         self.__sorted_values = None
         self._length = None
         self._iswildcard = False
+        # set _labels, _length and _iswildcard via the property
         self.labels = labels
 
     @property
@@ -2059,6 +2060,9 @@ def copy(self):
         """
         return self[:]
 
+    # XXX: what's the point in supporting a list of Axis or AxisCollection in axes_to_replace?
+    #      it is used in LArray.set_axes but if it is only there, shouldn't the support for that be
+    #      moved there?
     def replace(self, axes_to_replace=None, new_axis=None, inplace=False, **kwargs):
         """Replace one, several or all axes of the collection.
 

From 319a124ea68910e072fb00064c4ba9ad337f6939 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 15:38:34 +0200
Subject: [PATCH 09/44] better docstrings in various functions & methods

---
 larray/core/array.py    | 45 +++++++++++++++++++++++++++--------------
 larray/core/metadata.py |  4 +---
 2 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 53e4d8195..4ee884cf9 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -250,7 +250,7 @@ def concat(arrays, axis=0, dtype=None):
     arrays : tuple of LArray
         Arrays to concatenate.
     axis : axis reference (int, str or Axis), optional
-        Axis along which to concatenate. Defaults to the first axis.
+        Axis along which to concatenate. All arrays must have that axis. Defaults to the first axis.
     dtype : dtype, optional
         Result data type. Defaults to the "closest" type which can hold all arrays types without loss of information.
 
@@ -383,7 +383,7 @@ def __setitem__(self, key, value):
 # TODO: rename to LArrayIndexPointsIndexer or something like that
 class LArrayPositionalPointsIndexer(object):
     """
-    the closer to numpy indexing we get, but not 100% the same.
+    the closest to numpy indexing we get, but not 100% the same.
     """
     def __init__(self, array):
         self.array = array
@@ -5444,7 +5444,7 @@ def divnot0(self, other):
     # XXX: rename/change to "add_axes" ?
     # TODO: add a flag copy=True to force a new array.
     def expand(self, target_axes=None, out=None, readonly=False):
-        """Expands array to target_axes.
+        r"""Expands array to target_axes.
 
         Target axes will be added to array if not present.
         In most cases this function is not needed because LArray can do operations with arrays having different
@@ -5452,7 +5452,7 @@ def expand(self, target_axes=None, out=None, readonly=False):
 
         Parameters
         ----------
-        target_axes : list of Axis or AxisCollection, optional
+        target_axes : string, list of Axis or AxisCollection, optional
             Self can contain axes not present in `target_axes`.
             The result axes will be: [self.axes not in target_axes] + target_axes
         out : LArray, optional
@@ -5471,22 +5471,37 @@ def expand(self, target_axes=None, out=None, readonly=False):
         >>> b = Axis('b=b1,b2')
         >>> arr = ndtest([a, b])
         >>> arr
-        a\\b  b1  b2
+        a\b  b1  b2
          a1   0   1
          a2   2   3
+
+        Adding one or several axes will append the new axes at the end
+
         >>> c = Axis('c=c1,c2')
+        >>> arr.expand(c)
+         a  b\c  c1  c2
+        a1   b1   0   0
+        a1   b2   1   1
+        a2   b1   2   2
+        a2   b2   3   3
+
+        If you want to new axes to be inserted in a particular order, you have to give that order
+
         >>> arr.expand([a, c, b])
-         a  c\\b  b1  b2
+         a  c\b  b1  b2
+        a1   c1   0   1
+        a1   c2   0   1
+        a2   c1   2   3
+        a2   c2   2   3
+
+        But it is enough to list only the added axes and the axes after them:
+
+        >>> arr.expand([c, b])
+         a  c\b  b1  b2
         a1   c1   0   1
         a1   c2   0   1
         a2   c1   2   3
         a2   c2   2   3
-        >>> arr.expand([b, c])
-         a  b\\c  c1  c2
-        a1   b1   0   0
-        a1   b2   1   1
-        a2   b1   2   2
-        a2   b2   3   3
         """
         if target_axes is None and out is None or target_axes is not None and out is not None:
             raise ValueError("either target_axes or out must be defined (not both)")
@@ -6700,8 +6715,8 @@ def shift(self, axis, n=1):
         ----------
         axis : int, str or Axis
             Axis for which we want to perform the shift.
-        n : int
-            Number of cells to shift.
+        n : int, optional
+            Number of cells to shift. Defaults to 1.
 
         Returns
         -------
@@ -8148,7 +8163,7 @@ def stack(elements=None, axis=None, title=None, meta=None, **kwargs):
     Returns
     -------
     LArray
-        A single array combining arrays.
+        A single array combining arrays. The new (stacked) axes will be the last axes of the new array.
 
     Examples
     --------
diff --git a/larray/core/metadata.py b/larray/core/metadata.py
index 1a7387f53..d208c0f31 100644
--- a/larray/core/metadata.py
+++ b/larray/core/metadata.py
@@ -89,7 +89,6 @@ def __repr__(self):
 
 else:
     class AttributeDict(OrderedDict):
-
         def __getattr__(self, key):
             try:
                 return self[key]
@@ -124,7 +123,7 @@ class Metadata(AttributeDict):
     >>> # Python 2 or <= 3.5
     >>> arr = ndtest((3, 3), meta=[('title', 'the title'), ('author', 'John Smith')])
     >>> # Python 3.6+
-    >>> arr = ndtest((3, 3), meta=Metadata(title = 'the title', author = 'John Smith'))  # doctest: +SKIP
+    >>> arr = ndtest((3, 3), meta=Metadata(title='the title', author='John Smith'))  # doctest: +SKIP
 
     Add metadata after array initialization
 
@@ -143,7 +142,6 @@ class Metadata(AttributeDict):
 
     >>> del arr.meta.creation_date
     """
-
     # TODO: use LArray.from_dict once ready (issue 581)
     def __larray__(self):
         from larray.core.array import LArray

From 018867611fc4156aa53f0768f23eaa8781889317 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 17 Oct 2018 15:58:46 +0200
Subject: [PATCH 10/44] document LArray.as_table(light=True)

---
 larray/core/array.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/larray/core/array.py b/larray/core/array.py
index 4ee884cf9..763a60642 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -2303,6 +2303,9 @@ def as_table(self, maxlines=None, edgeitems=5, light=False, wide=True, value_nam
             only the first and last `edgeitems` lines are displayed.
             Only active if `maxlines` is not None.
             Equals to 5 by default.
+        light : bool, optional
+            Whether or not to hide repeated labels. In other words, only show a label if it is different from the
+            previous one. Defaults to False.
         wide : boolean, optional
             Whether or not to write arrays in "wide" format. If True, arrays are exported with the last axis
             represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one

From fc7f32da7b5d66fb110529a86a708fe6f9b82c81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 17:43:41 +0200
Subject: [PATCH 11/44] updated design notes

---
 design.txt | 383 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 230 insertions(+), 153 deletions(-)

diff --git a/design.txt b/design.txt
index 3bd221c9d..07d407f99 100644
--- a/design.txt
+++ b/design.txt
@@ -1,6 +1,4 @@
-
-a(sex, age)
-age_limit(sex)
+assuming the following arrays: a(sex, age) and age_limit(sex)
 
 step 1:
 
@@ -10,10 +8,10 @@ b = a * (age > age_limit)
 
 step 2:
 
-a[x.age > age_limit]
-# this is also possible ("x.age > age_limit" return an Expr, expr is evaluated
+a[X.age > age_limit]
+# this is also possible ("X.age > age_limit" return an Expr, expr is evaluated
 # during the binop (axes ref replace by real axe)
-b = a * (x.age > age_limit)
+b = a * (X.age > age_limit)
 
 ==============
 in general:
@@ -55,31 +53,28 @@ in general:
 
 # API for ND groups (my example is mixing label with positional):
 
-# union (bands): x.axis1[5:10] | x.axis2.i[3:4]
-# intersection/cross/default: x.axis1[5:10] & x.axis2.i[3:4]
-# points: x.axis1[5:10] ^ x.axis2.i[1:6]
-# ----> this prevents symetric difference. this is little used but...
-# ----> Points(x.axis[5:10], x.axis2.i[1:6])
+# union (bands): X.axis1[5:10] | X.axis2.i[3:4]
+# intersection/cross/default: X.axis1[5:10] & X.axis2.i[3:4]
+# points:
+# * X.axis1[5:10] ^ X.axis2.i[1:6] --> this prevents symetric difference. this is little used but...
+# * Points(X.axis[5:10], X.axis2.i[1:6])
+# * X.axis[5:10].combine(X.axis2.i[1:6])
 
 # this is very nice and would have orderedset-like semantics
 
 # it does not seem to conflict with the axis methods (even though that might be
 # confusing):
 
-# x.axis1 | x.axis2 would have a very different meaning than
-# x.axis1[:] | x.axis2[:]
+# X.axis1 | X.axis2 would have a very different meaning than
+# X.axis1[:] | X.axis2[:]
 
 # Note that cross sections is the default and it is useless to introduce
 # another API **except to give a name**, so the & syntax is useless unless
 # we allow naming groups after the fact
 
-# => NDGroup((x.axis1[5:10], x.axis2.i[2.5]), 'exports')
-# => Group((x.axis1[5:10], x.axis2.i[2.5]), 'exports')
-# => (x.axis1[5:10] & x.axis2.i[2.5]).named('exports')
-
-# generalizing "named" and suppressing .group seems like a good idea!
-# => x.axis1.group([5, 7, 10], name='brussels')
-# => x.axis1[5, 7, 10].named('brussels')
+# => NDGroup((X.axis1[5:10], X.axis2.i[2.5]), 'exports')
+# => Group((X.axis1[5:10], X.axis2.i[2.5]), 'exports')
+# => (X.axis1[5:10] & X.axis2.i[2.5]).named('exports')
 
 # http://xarray.pydata.org/en/stable/indexing.html#pointwise-indexing
 # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.lookup.html#pandas.DataFrame.lookup
@@ -92,14 +87,14 @@ in general:
 
 # I wonder if, for axes subscripting, I could not allow tuples as sequences,
 # which would make it a bit nicer:
-# x.axis1[5, 7, 10].named('brussels')
+# X.axis1[5, 7, 10].named('brussels')
 # instead of
-# x.axis1[[5, 7, 10]].named('brussels')
+# X.axis1[[5, 7, 10]].named('brussels')
 # since axes are always 1D, this is not a direct problem. However the
 # question is whether this would lead to an inconsistent API/confuse users
 # because they would still have to write the brackets when no axis is present
 # a[[5, 7, 9]]
-# a[x.axis1[5, 7, 9]]
+# a[X.axis1[5, 7, 9]]
 # in practice, this syntax is little used anyway
 
 # options
@@ -210,7 +205,7 @@ ou alors on utilise une méthode spécifique pour split (split ou groups ou
 multi):
 
 G.split[2, 5] == G[2], G[5]
-G.clength.split[2, 5:10, 20] == G.clength[2], G.clength[5:10], G.clength[5]
+G.clength.split[2, 5:10, 20] == G.clength[2], G.clength[5:10], G.clength[20]
 G.clength.split[2, 5] == G.clength[2], G.clength[5]
 
 
@@ -347,14 +342,14 @@ G.clength.split[2, 5] == G.clength[2], G.clength[5]
 
 # m = {G[2:7, 'M']: 1, G[2:7, 'F']: 2, G[5:10, 'M']: 3, G[5:10, 'F']: 4}
 # breaks if combination of axes
-# a.set(x.age[m])
+# a.set(X.age[m])
 
 2) multiple range in same [] means "and"
 =========================================
-   => set op if same axis, ND group otherwise
+   => and set op if same axis, ND group otherwise
 
-   G.age[5, 7, 9] == G.age[5] & G.age[7] & G.age[9] => BREAKS !
-   => must use G.age[[5, 7, 9]]
+   G.age[5, 7, 9] == G.age[5] & G.age[7] & G.age[9] => EMPTY group !
+   => MUST use double brackets: G.age[[5, 7, 9]]
 
    G.age[:20, 10:30] == G.age[:20] & G.age[10:30] == G.age[20:30]
    G[2:7, 'M', ['P01', 'P05']] == G[2:7] & G['M'] & G['P01', 'PO5']
@@ -394,16 +389,50 @@ G.clength.split[2, 5] == G.clength[2], G.clength[5]
    or whether slice or scalar
 ===========================================================================
 
+7) multiple range in same [] are only allowed for same axis (and means "or")
+============================================================================
+   => set op if same axis, different axis not allowed
+   => the definition of a Group is: a list of labels of one axis
+      implies more or less that we must have a different object for ND Groups
+   => implies more or less that we will not support
+          array['5, 7, 11, P01,P05, M']
+          array[5, 7, 11, 'P01, P05, M']
+      this is fine though:
+          array['5, 7, 11; P01, P05; M']
+          array[[5, 7, 11], ['P01', 'P05'], 'M']
+          array[[5, 7, 11], 'P01, P05', 'M']
+      and maybe this too:
+          array[[5, 7, 11], 'P01, P05; M']
+
+   G.age[5, 7, 11] == G.age[[5, 7, 11]] == G.age[5] | G.age[7] | G.age[9]
+   G.age[5, 7:9, 11] == G.age[5, 7, 8, 9, 11]
+   G.age[:20, 10:30] == G.age[:20] | G.age[10:30] == G.age[:30]
+   G[5, 7:9, 'M', ['P01', 'P05']]       --> fails because it tries to find a single axis containing all of those
+   G[5, 7:9] & G['M'] & G['P01', 'P05'] --> works (returns NDGroup)
+   G['5, 7:9; P01,P05; M']              --> returns NDGroup (same as above)
+   G[[5, 7:9], ['P01', 'P05'], 'M']     --> fails 7:9 is sadly an invalid syntax
+   G[[5 7,8,9], 'P01, P05', 'M']        --> works too
+   == age[5, 7, 8, 9] & sex['M'] & lipro['P01', 'PO5']
+   == NDGroup([[5, 7, 8, 9], 'M', ['P01', 'P05']], axes=['age', 'sex', 'lipro'])
+   OR
+   == NDGroup({'age': [5, 7, 8, 9],
+               'sex': 'M'
+               'lipro': ['P01', 'P05']})
+
+   '5,7:9; M; P01,P05'
+   'age[5,7:9]; sex[M]; lipro[P01,P05]'
+
 
 # use cases
 
 # 1) simple get/set
 
+a['2:7; M; P01,P02']
 a[2:7, 'M', ['P01', 'P02']]
 
 # 2) boolean selection
 
-a[(x.age < 10) | (x.clength > 5)]
+a[(X.age < 10) | (X.clength > 5)]
 
 # 3) simple with ambiguous values
 
@@ -413,16 +442,19 @@ a[G.age[2:7], G.clength[5, 7, 9], 'M', ['P01', 'P02']]
 
 a[G.age[2:4] ^ G.clength[5, 7, 9], 'M', ['P01', 'P02']]
 a[G[2, 9, 3] ^ G['M', 'F', 'M'], ['P01', 'P02']]
+# set "diagonal" to 0
+countries = ...
+use[src[countries] ^ dst[countries]] = 0
+
 
 # 4b) lookup (this is a form of point-selection), wh potentially repeated values
 person_age = [5, 1, 2, 1, 6, 5]
 person_gender = ['M', 'F', 'F', 'M', 'M', 'F']
 person_workstate = [1, 3, 1, 2, 1, 3]
-income = mean_income[person_age, person_gender]  # <-- no ! does cross product
+income = mean_income[person_age, person_gender]  # <-- FAILS ! (it does a cross product)
 income = mean_income[G[person_age] ^ G[person_gender]]
-income = mean_income[G.points[person_age, person_gender]] # <-- disallow having
-                                                          # an axis named
-                                                          # "points"
+income = mean_income[G[person_age].combine(G[person_gender])]
+income = mean_income[G.points[person_age, person_gender]] # <-- disallow having an axis named "points"
 income = mean_income.points[person_age, person_gender]
 # if ambiguous
 income = mean_income.points[G.age[person_age], person_gender]
@@ -453,7 +485,7 @@ income = extra_income[LK[person_gender] & (LK[workstate] == 1)]
 #    .points by default.
 # A: yes, that's an option but would not solve the "set" problem.
 
-# => I NEED a way to set the axis on an LKey. maybe x.abc[LK] should not
+# => I NEED a way to set the axis on an LKey. maybe X.abc[LK] should not
 # return an LSet? but an LKey with an axis.
 
 
@@ -520,8 +552,8 @@ act3 = table(['sub', '40+',   '-39',    '40+'],
 
 # 6) multi slices, aggregate (one group per slice)
 
-# groups = (x.clength[1:15], x.clength[16:25], x.clength[26:30],
-#           x.clength[31:35], x.clength[36:40], x.clength[41:50])
+# groups = (X.clength[1:15], X.clength[16:25], X.clength[26:30],
+#           X.clength[31:35], X.clength[36:40], X.clength[41:50])
 # agg = arr.sum(groups)
 
 # groups = G.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]
@@ -532,46 +564,46 @@ act3 = table(['sub', '40+',   '-39',    '40+'],
 # 7) multi slices, assign one value per slice
 
 # multip_mat_min = zeros([clength, year])
-# multip_mat_min[x.clength[1:15], x.year[first_year_p:2024]] = 7 / 7
-# multip_mat_min[x.clength[16:25], x.year[first_year_p:2024]] = 20 / 20
-# multip_mat_min[x.clength[26:30], x.year[first_year_p:2024]] = 27 / 27
-# multip_mat_min[x.clength[31:35], x.year[first_year_p:2024]] = 32 / 32
-# multip_mat_min[x.clength[36:40], x.year[first_year_p:2024]] = 37 / 37
-# multip_mat_min[x.clength[41:50], x.year[first_year_p:2024]] = 42 / 42
-# multip_mat_min[x.clength[1:15], x.year[2025:2029]] = 8 / 7
-# multip_mat_min[x.clength[16:25], x.year[2025:2029]] = 21 / 20
-# multip_mat_min[x.clength[26:30], x.year[2025:2029]] = 28 / 27
-# multip_mat_min[x.clength[31:35], x.year[2025:2029]] = 33 / 32
-# multip_mat_min[x.clength[36:40], x.year[2025:2029]] = 38 / 37
-# multip_mat_min[x.clength[41:50], x.year[2025:2029]] = 43 / 42
-# multip_mat_min[x.clength[1:15], x.year[2030:]] = 9 / 7
-# multip_mat_min[x.clength[16:25], x.year[2030:]] = 22 / 20
-# multip_mat_min[x.clength[26:30], x.year[2030:]] = 29 / 27
-# multip_mat_min[x.clength[31:35], x.year[2030:]] = 34 / 32
-# multip_mat_min[x.clength[36:40], x.year[2030:]] = 39 / 37
-# multip_mat_min[x.clength[41:50], x.year[2030:]] = 44 / 42
+# multip_mat_min[X.clength[1:15], X.year[first_year_p:2024]] = 7 / 7
+# multip_mat_min[X.clength[16:25], X.year[first_year_p:2024]] = 20 / 20
+# multip_mat_min[X.clength[26:30], X.year[first_year_p:2024]] = 27 / 27
+# multip_mat_min[X.clength[31:35], X.year[first_year_p:2024]] = 32 / 32
+# multip_mat_min[X.clength[36:40], X.year[first_year_p:2024]] = 37 / 37
+# multip_mat_min[X.clength[41:50], X.year[first_year_p:2024]] = 42 / 42
+# multip_mat_min[X.clength[1:15], X.year[2025:2029]] = 8 / 7
+# multip_mat_min[X.clength[16:25], X.year[2025:2029]] = 21 / 20
+# multip_mat_min[X.clength[26:30], X.year[2025:2029]] = 28 / 27
+# multip_mat_min[X.clength[31:35], X.year[2025:2029]] = 33 / 32
+# multip_mat_min[X.clength[36:40], X.year[2025:2029]] = 38 / 37
+# multip_mat_min[X.clength[41:50], X.year[2025:2029]] = 43 / 42
+# multip_mat_min[X.clength[1:15], X.year[2030:]] = 9 / 7
+# multip_mat_min[X.clength[16:25], X.year[2030:]] = 22 / 20
+# multip_mat_min[X.clength[26:30], X.year[2030:]] = 29 / 27
+# multip_mat_min[X.clength[31:35], X.year[2030:]] = 34 / 32
+# multip_mat_min[X.clength[36:40], X.year[2030:]] = 39 / 37
+# multip_mat_min[X.clength[41:50], X.year[2030:]] = 44 / 42
 #
 # # already possible
 # m = zeros(clength)
-# m[x.clength[1:15]] = 7
-# m[x.clength[16:25]] = 20
-# m[x.clength[26:30]] = 27
-# m[x.clength[31:35]] = 32
-# m[x.clength[36:40]] = 37
-# m[x.clength[41:50]] = 42
+# m[X.clength[1:15]] = 7
+# m[X.clength[16:25]] = 20
+# m[X.clength[26:30]] = 27
+# m[X.clength[31:35]] = 32
+# m[X.clength[36:40]] = 37
+# m[X.clength[41:50]] = 42
 # multip_mat_min = zeros([clength, year])
-# multip_mat_min[x.year[:2024]] = m / m
-# multip_mat_min[x.year[2025:2029]] = (m + 1) / m
-# multip_mat_min[x.year[2030:]] = (m + 2) / m
+# multip_mat_min[X.year[:2024]] = m / m
+# multip_mat_min[X.year[2025:2029]] = (m + 1) / m
+# multip_mat_min[X.year[2030:]] = (m + 2) / m
 
 # >>> very nice for this case but it does not scale very well with number of
 #     values to set. On the other hand, splitting it in case it does not fit
 #     on a line is not TOO horrible (just a bit horrible ;-))
 # m = zeros(clength)
-# m[x.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]] = \
+# m[X.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]] = \
 #            [   7,    20,    27,    32,    37,     42]
 # multip_mat_min = zeros([clength, year])
-# multip_mat_min[x.year[:2024,   2025:2029,       2030:]] = \
+# multip_mat_min[X.year[:2024,   2025:2029,       2030:]] = \
 #                      [m / m, (m + 1) / m, (m + 2) / m]
 
 # m = zeros(clength)
@@ -613,12 +645,12 @@ act3 = table(['sub', '40+',   '-39',    '40+'],
 # a[G.age[5, 7, 9]]
 # a[G.geo[5, 7, 9].named('brussels')]
 
-# a[x.age[G[5, 7, 9]]]
-# a[x.age[G[5, 7, 9].named('brussels')]]
+# a[X.age[G[5, 7, 9]]]
+# a[X.age[G[5, 7, 9].named('brussels')]]
 
 # a[G.get('strange axis')[5, 7, 9].named('Brussels')]
 
-# a[x.age[5, 7, 9]]
+# a[X.age[5, 7, 9]]
 
 # positional groups *without axis* (G.i, P[], or I[]) does not make much sense,
 # because it will matches all axes, but might be useful as an intermediate
@@ -673,29 +705,29 @@ act3 = table(['sub', '40+',   '-39',    '40+'],
 # we also need the best possible syntax to handle, "arbitrary" resampling
 
 # pure_min_w1_comp_agg = zeros(result_axes)
-# pure_min_w1_comp_agg[x.LBMosesXLS[1]] = pure_min_w1_comp.sum(x.clength[1:15])
-# pure_min_w1_comp_agg[x.LBMosesXLS[2]] = pure_min_w1_comp.sum(x.clength[16:25])
-# pure_min_w1_comp_agg[x.LBMosesXLS[3]] = pure_min_w1_comp.sum(x.clength[26:30])
-# pure_min_w1_comp_agg[x.LBMosesXLS[4]] = pure_min_w1_comp.sum(x.clength[31:35])
-# pure_min_w1_comp_agg[x.LBMosesXLS[5]] = pure_min_w1_comp.sum(x.clength[36:40])
-# pure_min_w1_comp_agg[x.LBMosesXLS[6]] = pure_min_w1_comp.sum(x.clength[41:50])
+# pure_min_w1_comp_agg[X.LBMosesXLS[1]] = pure_min_w1_comp.sum(X.clength[1:15])
+# pure_min_w1_comp_agg[X.LBMosesXLS[2]] = pure_min_w1_comp.sum(X.clength[16:25])
+# pure_min_w1_comp_agg[X.LBMosesXLS[3]] = pure_min_w1_comp.sum(X.clength[26:30])
+# pure_min_w1_comp_agg[X.LBMosesXLS[4]] = pure_min_w1_comp.sum(X.clength[31:35])
+# pure_min_w1_comp_agg[X.LBMosesXLS[5]] = pure_min_w1_comp.sum(X.clength[36:40])
+# pure_min_w1_comp_agg[X.LBMosesXLS[6]] = pure_min_w1_comp.sum(X.clength[41:50])
 #
-# clength_groups = (x.clength[1:15], x.clength[16:25], x.clength[26:30],
-#                   x.clength[31:35], x.clength[36:40], x.clength[41:50])
+# clength_groups = (X.clength[1:15], X.clength[16:25], X.clength[26:30],
+#                   X.clength[31:35], X.clength[36:40], X.clength[41:50])
 # pure_min_w1_comp_agg2 = pure_min_w1_comp.sum(clength_groups).rename(
-#     x.clength, x.LBMosesXLS)
+#     X.clength, X.LBMosesXLS)
 
 # clength_groups = (L[1:15], L[16:25], L[26:30],
 #                   L[31:35], L[36:40], L[41:50])
 # pure_min_w1_comp_agg2 = pure_min_w1_comp.sum(clength_groups).rename(
-#     x.clength, x.LBMosesXLS)
+#     X.clength, X.LBMosesXLS)
 #
-# clength_groups = x.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]
+# clength_groups = X.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]
 # pure_min_w1_comp_agg2 = pure_min_w1_comp.sum(clength_groups)
 #
 # clength_groups = G[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]
 # pure_min_w1_comp_agg2 = pure_min_w1_comp.sum(clength_groups) \
-#                                         .replace(x.clength, LBMosesXLS)
+#                                         .replace(X.clength, LBMosesXLS)
 
 # XXX: what if I want to sum on all the slices (as if it was a single slice)
 # clength_groups = G[1:15] | G[16:25] | G[26:30] | G[31:35] | G[36:40] | G[41:50]
@@ -715,40 +747,40 @@ act3 = table(['sub', '40+',   '-39',    '40+'],
 # ])
 #
 # multip_mat_min = zeros([clength, year])
-# multip_mat_min[x.clength[1:15], x.year[first_year_p:2024]] = 7 / 7
-# multip_mat_min[x.clength[16:25], x.year[first_year_p:2024]] = 20 / 20
-# multip_mat_min[x.clength[26:30], x.year[first_year_p:2024]] = 27 / 27
-# multip_mat_min[x.clength[31:35], x.year[first_year_p:2024]] = 32 / 32
-# multip_mat_min[x.clength[36:40], x.year[first_year_p:2024]] = 37 / 37
-# multip_mat_min[x.clength[41:50], x.year[first_year_p:2024]] = 42 / 42
-# multip_mat_min[x.clength[1:15], x.year[2025:2029]] = 8 / 7
-# multip_mat_min[x.clength[16:25], x.year[2025:2029]] = 21 / 20
-# multip_mat_min[x.clength[26:30], x.year[2025:2029]] = 28 / 27
-# multip_mat_min[x.clength[31:35], x.year[2025:2029]] = 33 / 32
-# multip_mat_min[x.clength[36:40], x.year[2025:2029]] = 38 / 37
-# multip_mat_min[x.clength[41:50], x.year[2025:2029]] = 43 / 42
-# multip_mat_min[x.clength[1:15], x.year[2030:]] = 9 / 7
-# multip_mat_min[x.clength[16:25], x.year[2030:]] = 22 / 20
-# multip_mat_min[x.clength[26:30], x.year[2030:]] = 29 / 27
-# multip_mat_min[x.clength[31:35], x.year[2030:]] = 34 / 32
-# multip_mat_min[x.clength[36:40], x.year[2030:]] = 39 / 37
-# multip_mat_min[x.clength[41:50], x.year[2030:]] = 44 / 42
+# multip_mat_min[X.clength[1:15], X.year[first_year_p:2024]] = 7 / 7
+# multip_mat_min[X.clength[16:25], X.year[first_year_p:2024]] = 20 / 20
+# multip_mat_min[X.clength[26:30], X.year[first_year_p:2024]] = 27 / 27
+# multip_mat_min[X.clength[31:35], X.year[first_year_p:2024]] = 32 / 32
+# multip_mat_min[X.clength[36:40], X.year[first_year_p:2024]] = 37 / 37
+# multip_mat_min[X.clength[41:50], X.year[first_year_p:2024]] = 42 / 42
+# multip_mat_min[X.clength[1:15], X.year[2025:2029]] = 8 / 7
+# multip_mat_min[X.clength[16:25], X.year[2025:2029]] = 21 / 20
+# multip_mat_min[X.clength[26:30], X.year[2025:2029]] = 28 / 27
+# multip_mat_min[X.clength[31:35], X.year[2025:2029]] = 33 / 32
+# multip_mat_min[X.clength[36:40], X.year[2025:2029]] = 38 / 37
+# multip_mat_min[X.clength[41:50], X.year[2025:2029]] = 43 / 42
+# multip_mat_min[X.clength[1:15], X.year[2030:]] = 9 / 7
+# multip_mat_min[X.clength[16:25], X.year[2030:]] = 22 / 20
+# multip_mat_min[X.clength[26:30], X.year[2030:]] = 29 / 27
+# multip_mat_min[X.clength[31:35], X.year[2030:]] = 34 / 32
+# multip_mat_min[X.clength[36:40], X.year[2030:]] = 39 / 37
+# multip_mat_min[X.clength[41:50], X.year[2030:]] = 44 / 42
 #
 # # already possible
 # m = zeros(clength)
-# m[x.clength[1:15]] = 7
-# m[x.clength[16:25]] = 20
-# m[x.clength[26:30]] = 27
-# m[x.clength[31:35]] = 32
-# m[x.clength[36:40]] = 37
-# m[x.clength[41:50]] = 42
+# m[X.clength[1:15]] = 7
+# m[X.clength[16:25]] = 20
+# m[X.clength[26:30]] = 27
+# m[X.clength[31:35]] = 32
+# m[X.clength[36:40]] = 37
+# m[X.clength[41:50]] = 42
 # multip_mat_min = zeros([clength, year])
-# multip_mat_min[x.year[:2024]] = m / m
-# multip_mat_min[x.year[2025:2029]] = (m + 1) / m
-# multip_mat_min[x.year[2030:]] = (m + 2) / m
+# multip_mat_min[X.year[:2024]] = m / m
+# multip_mat_min[X.year[2025:2029]] = (m + 1) / m
+# multip_mat_min[X.year[2030:]] = (m + 2) / m
 
 # TODO: it would be nice to be able to say:
-# m[x.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]] = [7, 20, 27, 32, 37, 42]
+# m[X.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]] = [7, 20, 27, 32, 37, 42]
 # but I am unsure it is possible/unambiguous
 
 # this kind of pattern is not supported by numpy
@@ -773,13 +805,12 @@ act3 = table(['sub', '40+',   '-39',    '40+'],
 # (and if not, whether or not we can come up with a syntax that is both nice
 #  and not ambiguous)
 
-# m[x.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]] = \
-#            [7, 20, 27, 32, 37, 42]
-# multip_mat_min[x.year[:2024, 2025:2029, 2030:]] = [m / m, (m + 1) / m,
+# m[X.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]] = [7, 20, 27, 32, 37, 42]
+# multip_mat_min[X.year[:2024, 2025:2029, 2030:]] = [m / m, (m + 1) / m,
 #                                                   (m + 2) / m]
 
 # for the multi-value case to work I would probably have to make
-# m[x.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]]
+# m[X.clength[1:15, 16:25, 26:30, 31:35, 36:40, 41:50]]
 # return multiple arrays (as a tuple of arrays or an array of arrays)
 # with pandas/MI support, we could just return an array with
 # a (second) clength axis
@@ -798,18 +829,19 @@ act3 = table(['sub', '40+',   '-39',    '40+'],
 # aggregate)
 
 # ideally s.sum() would first sum each array then sum those sums
-# and s.sum(x.age) would sum each array along age
-# and s.sum(x.arrays) would try to add arrays together (and fail in
+# and s.sum(X.age) would sum each array along age
+# and s.sum(X.arrays) would try to add arrays together (and fail in
 # some/most cases)
 # the problem is that one important use case is not covered:
-# aggregating along all dimensions of the arrays but NOT on x.arrays
+# aggregating along all dimensions of the arrays but NOT on X.arrays
+# but see below for solutions
 
 # Q: s.elements.sum() (or s.arrays.sum()) vs s.sum() solve this?
 # A1: s.arrays.sum() would dispatch to each array and return a new Session
 #     s.sum() would try to do s.arrays.sum().sum()
 #     seems doable...
 
-# A2: s.sum_by(x.arrays) (like Pandas default aggregate) would solve
+# A2: s.sum_by(X.arrays) (like Pandas default aggregate) would solve
 #     the issue even more nicely, but this is a bit more work (is it?) and
 #     can be safely added later.
 
@@ -821,7 +853,7 @@ act3 = table(['sub', '40+',   '-39',    '40+'],
 # A:
 
 # Q: what happens when you do s1 + s2 ?
-# A: same than s1.arrays + s2.arrays
+# A: same than [a1 + a2 for a1, a2 in zip(s1, s2)]
 #    if we view s1 as a big array with an extra dimension, it would give
 #    that result (modulo union of names until we are Pandas based)
 
@@ -835,7 +867,7 @@ act3 = table(['sub', '40+',   '-39',    '40+'],
 #    (s1 == s2).all()
 
 # Q: what if I want to know which arrays are equal and which are not?
-# A: (s1 == s2).all_by(x.arrays)
+# A: (s1 == s2).all_by(X.arrays)
 
 # boolean ops
 # ===========
@@ -908,20 +940,20 @@ act3 = table(['sub', '40+',   '-39',    '40+'],
     >>> a.sum('10:19 > 10_19 ; 20:29 > 20_29 ; year=#-1')
     >>> a.sum('(10:19 > 10_19 ; 20:29 > 20_29) & year=#-1')
     >>> teens = G['age=10:19 >> teens']
-    >>> teens = x.age[10:19].named('teens')
+    >>> teens = X.age[10:19].named('teens')
     >>> twenties = G['age=20:29']
     >>> a.sum('({teens}, {twenties})')
     >>> a.sum((teens, twenties))
     # will we ever want to support this?
     >>> a.sum('age > clength')
     >>> a.sum('age > {ext}')
-    >>> a.sum(x.age > ext)
+    >>> a.sum(X.age > ext)
     >>> a.sum('age > 10')
     LGroup(['a', 'b', 'c'], name='abc')
 
 
-expend_flow[x.cat_from['married_women'], x.cat_to['retirement_survival_women'], y] = \
-        flow[x.cat_from['married_women'], x.cat_to['retirement_survival_women'], y] * \
+expend_flow[X.cat_from['married_women'], X.cat_to['retirement_survival_women'], y] = \
+        flow[X.cat_from['married_women'], X.cat_to['retirement_survival_women'], y] * \
         pension_age_diff_lag['married_men', y] * 1.1 * (45 / average_clength_survival['married_men', y])
 
 expend_flow['cat_from[married_women], cat_to[retirement_survival_women]', y] = \
@@ -957,45 +989,45 @@ expend_flow['cat_from[married_women], cat_to[retirement_survival_women]', y] = \
 # ================ set operation on groups ===============
 # ========================================================
 
-we want + and - ops on groups to be both set operation
-or arithmetic operation depending on the case.
+PROBLEM: we want __sub__ op on groups to be both a set operation or arithmetic operation depending on the case.
 
 
 for y in time[start_year + 1:]:
     res = a[y + 1]
 
-for c in sutcode.matches('^...$') + sutcode.matches('^..$') - 'ND':
-    g = sutcode.startswith(c) - c
+for c in sutcode.matching('^...$') + sutcode.matching('^..$') - 'ND':
+    g = sutcode.startingwith(c) - c
 
-# option 1
-# ========
 
-op on evaluated key by default (whatever it is -- scalar or ndarray)
+# option 1 (current)
+# ==================
+
+execute __op__ on key.eval() by default (whatever it is -- scalar or ndarray)
 set ops must use specific methods
 
 for y in time[start_year + 1:]:
     res = a[y + 1]
 
-for c in sutcode.matches('^...$').union(sutcode.matches('^..$')).setdiff('ND'):
-    g = sutcode.startswith(c).setdiff(c)
-for c in sutcode.matches('^...$').union(sutcode.matches('^..$')).difference('ND'):
-    g = sutcode.startswith(c).difference(c)
+for c in sutcode.matching('^...$').union(sutcode.matching('^..$')).difference('ND'):
+    g = sutcode.startingwith(c).difference(c)
 
-# option 2
-# ========
 
-op on evaluated key by default (whatever it is -- scalar or ndarray)
-convert LGroup to LSet using method a specific method
+# option 2 (current too)
+# ======================
+
+execute __op__ on key.eval() by default (whatever it is -- scalar or ndarray)
+convert LGroup to LSet using a specific method
 
 for y in time[start_year + 1:]:
     res = a[y + 1]
 
 # the second .set() is optional
-for c in sutcode.matches('^...$').set() | sutcode.matches('^..$').set() - 'ND':
-    g = sutcode.startswith(c).set() - c
+for c in sutcode.matching('^...$').set() | sutcode.matching('^..$').set() - 'ND':
+    g = sutcode.startingwith(c).set() - c
 
-# option 3 (current)
-# ==================
+
+# option 3 (before)
+# =================
 
 set op on evaluated key by default
 need to use .labels on the axis or .eval() on the group to do arithmetic ops
@@ -1005,31 +1037,78 @@ for y in time.labels:
 for y in time[start_year + 1:].eval():
     res = a[y + 1]
 
-for c in sutcode.matches('^...$') | sutcode.matches('^..$') - 'ND':
-    g = sutcode.startswith(c) - c
+for c in sutcode.matching('^...$') | sutcode.matching('^..$') - 'ND':
+    g = sutcode.startingwith(c) - c
+
 
 # option 4
 # ========
 
-set op if sequence, arithmetic if scalar. This looks good in our example and is usually what people want
-but this is not the path of least surprise !
+set op if "current" (left object) is a sequence, arithmetic if scalar.
+This looks good in our example and is usually what people want but this is not the path of least surprise !
+
+for y in time[start_year + 1:]:
+    # expected result (arithmetic)
+    res = a[y + 1]
+
+# expected result (set op)
+for c in sutcode.matching('^...$') | sutcode.matching('^..$') - 'ND':
+    # expected result (set op)
+    g = sutcode.startingwith(c) - c
+
+# UNEXPECTED result (set op)
+for age in age[1:] + 1:
+
+
+# option 5
+# ========
+
+set op if string type (scalar or sequence), arithmetic if *numeric* (scalar or sequence).
+This also looks good in our example and is usually what people want but this can lead to surprises too !
 
 for y in time[start_year + 1:]:
+    # expected result (arithmetic)
     res = a[y + 1]
 
-for c in sutcode.matches('^...$') | sutcode.matches('^..$') - 'ND':
-    g = sutcode.startswith(c) - c
+# expected result (set op)
+for c in sutcode.matching('^...$') | sutcode.matching('^..$') - 'ND':
+    # expected result (set op)
+    g = sutcode.startingwith(c) - c
 
 
-# an example of unexpected result would be:
-age[1:] + 1
+# expected result (arith op)
+for age in age[1:] + 1:
+    ...
 
+# unexpected result (arith op)
+codes.in_([1, 2, 5]) - bad_code
+
+
+# option 6 (variant of option 4)
+# ==============================
+
+iter(Axis) and iter(Group) return a Label, not a Group.
+now we could make both axis[a_single_label] returns a Label or a Group, but it would probably be cleaner to kill
+"scalar" groups altogether, so we would have:
+* axis[a_single_label] returns a Label
+* axis[[a_single_label]] return a Group with a single element. This is not the same (never was) than a scalar Group.
+In either case, on Group: "setish" ops (ie allow duplicates on the LHS)
+on Label: op on .eval()
 
 for y in time[start_year + 1:]:
+    # expected result (arithmetic via Label)
     res = a[y + 1]
 
-for c in sutcode.matches('^...$') | sutcode.matches('^..$') - 'ND':
-    g = sutcode.startswith(c) - c
+# expected result (set op)
+for c in sutcode.matching('^...$') | sutcode.matching('^..$') - 'ND':
+    # expected result (set op)
+    g = sutcode.startingwith(c) - c
+
+# somewhat UNEXPECTED result (fails: no + defined on Group)
+for age in age[1:] + 1:
+# somewhat UNEXPECTED result (set op)
+for age in age[1:] - 1:
+
 
 ==========================================
 ==========================================
@@ -1054,5 +1133,3 @@ subset = pop.q('M, sum(10:20 >> yada1, 20:30 >> yada2'))
 # if using a function (like .q) we could also "rename" axes on the fly. the above would create an aggregated axis
 # named "age" but the code below would create "toto" instead
 subset = pop.q('M', toto=age.sum[10:20, 20:30])
-
-

From ae72b451bad14983146b4861df8a3c239d8d367c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Thu, 27 Sep 2018 11:31:24 +0200
Subject: [PATCH 12/44] moved ratio and rationot0 to the Aggregation function
 section of the API

---
 doc/source/api.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 9a42ac373..8cc8a3c72 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -357,6 +357,8 @@ Aggregation Functions
    LArray.ptp
    LArray.with_total
    LArray.percent
+   LArray.ratio
+   LArray.rationot0
    LArray.growth_rate
    LArray.describe
    LArray.describe_by
@@ -437,8 +439,6 @@ Miscellaneous
 .. autosummary::
    :toctree: _generated/
 
-   LArray.ratio
-   LArray.rationot0
    LArray.divnot0
    LArray.clip
    LArray.shift

From b81a6c4c9bacfe5b629570676e5372c6b5c747f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 23 Nov 2018 14:54:27 +0100
Subject: [PATCH 13/44] better error message

---
 larray/core/axis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/larray/core/axis.py b/larray/core/axis.py
index a6c7b0058..5c5644280 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -182,7 +182,7 @@ def labels(self):
     @labels.setter
     def labels(self, labels):
         if labels is None:
-            raise TypeError("labels should be a sequence or a single int")
+            raise TypeError("labels should be a sequence or a single int, not None")
         if isinstance(labels, (int, long, np.integer)):
             length = labels
             labels = np.arange(length)

From 4c7c9b261191799abfa9f54e820e1f3fb3f9598d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Thu, 4 Oct 2018 09:31:37 +0200
Subject: [PATCH 14/44] small code cleanups in various functions

incidentally it also makes .reshape support string axes collection...
not that it matters much since it should be private method these days anyway
---
 larray/core/array.py   | 17 ++++++++++-------
 larray/inout/pandas.py |  8 ++++----
 larray/random.py       |  4 ++--
 larray/util/misc.py    |  2 +-
 4 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 763a60642..78a0e0c02 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -1332,10 +1332,10 @@ def rename(self, renames=None, to=None, inplace=False, **kwargs):
             items = []
         items += kwargs.items()
         renames = {self.axes[k]: v for k, v in items}
-        axes = [a.rename(renames[a]) if a in renames else a
-                for a in self.axes]
+        axes = AxisCollection([a.rename(renames[a]) if a in renames else a
+                               for a in self.axes])
         if inplace:
-            self.axes = AxisCollection(axes)
+            self.axes = axes
             return self
         else:
             return LArray(self.data, axes)
@@ -1834,8 +1834,7 @@ def sort_key(axis):
                 key = key[::-1]
             return axis.i[key]
 
-        res = self[tuple(sort_key(axis) for axis in axes)]
-        return res
+        return self[tuple(sort_key(axis) for axis in axes)]
 
     sort_axis = renamed_to(sort_axes, 'sort_axis')
 
@@ -2077,6 +2076,7 @@ def set(self, value, **kwargs):
         """
         self.__setitem__(kwargs, value)
 
+    # TODO: this should be a private method
     def reshape(self, target_axes):
         """
         Given a list of new axes, changes the shape of the array.
@@ -2121,9 +2121,12 @@ def reshape(self, target_axes):
         #            -> 3, 8 WRONG (non adjacent dimensions)
         #            -> 8, 3 WRONG
         #    4, 3, 2 -> 2, 2, 3, 2 is potentially ok (splitting dim)
-        data = np.asarray(self).reshape([len(axis) for axis in target_axes])
+        if not isinstance(target_axes, AxisCollection):
+            target_axes = AxisCollection(target_axes)
+        data = np.asarray(self).reshape(target_axes.shape)
         return LArray(data, target_axes)
 
+    # TODO: this should be a private method
     def reshape_like(self, target):
         """
         Same as reshape but with an array as input.
@@ -8405,7 +8408,7 @@ def raw_broadcastable(values, min_axes=None):
     """
     same as make_numpy_broadcastable but returns numpy arrays
     """
-    arrays, res_axes = make_numpy_broadcastable(values, min_axes)
+    arrays, res_axes = make_numpy_broadcastable(values, min_axes=min_axes)
     raw = [a.data if isinstance(a, LArray) else a
            for a in arrays]
     return raw, res_axes
diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py
index c40801977..2a5dc9e8a 100644
--- a/larray/inout/pandas.py
+++ b/larray/inout/pandas.py
@@ -7,7 +7,7 @@
 import pandas as pd
 
 from larray.core.array import LArray
-from larray.core.axis import Axis
+from larray.core.axis import Axis, AxisCollection
 from larray.core.group import LGroup
 from larray.core.constants import nan
 from larray.util.misc import basestring, decode, unique
@@ -67,7 +67,7 @@ def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan
     columns = sorted(df.columns) if sort_columns else list(df.columns)
     # the prodlen test is meant to avoid the more expensive array_equal test
     prodlen = np.prod([len(axis_labels) for axis_labels in labels])
-    if prodlen == len(df) and columns == list(df.columns) and np.array_equal(df.index.values, new_index.values):
+    if prodlen == len(df) and columns == list(df.columns) and np.array_equal(idx.values, new_index.values):
         return df, labels
     return df.reindex(index=new_index, columns=columns, fill_value=fill_value, **kwargs), labels
 
@@ -233,8 +233,8 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
     axes_names = [str(name) if name is not None else name
                   for name in axes_names]
 
-    axes = [Axis(labels, name) for labels, name in zip(axes_labels, axes_names)]
-    data = df.values.reshape([len(axis) for axis in axes])
+    axes = AxisCollection([Axis(labels, name) for labels, name in zip(axes_labels, axes_names)])
+    data = df.values.reshape(axes.shape)
     return LArray(data, axes, meta=meta)
 
 
diff --git a/larray/random.py b/larray/random.py
index 9437c40ab..08a066e38 100644
--- a/larray/random.py
+++ b/larray/random.py
@@ -26,7 +26,7 @@
 import numpy as np
 
 from larray.core.axis import Axis, AxisCollection
-from larray.core.array import LArray, aslarray, stack, ndtest
+from larray.core.array import LArray, aslarray
 from larray.core.array import raw_broadcastable
 import larray as la
 
@@ -35,7 +35,7 @@
 
 
 def generic_random(np_func, args, min_axes, meta):
-    args, res_axes = raw_broadcastable(args, min_axes)
+    args, res_axes = raw_broadcastable(args, min_axes=min_axes)
     res_data = np_func(*args, size=res_axes.shape)
     return LArray(res_data, res_axes, meta=meta)
 
diff --git a/larray/util/misc.py b/larray/util/misc.py
index 0acf18ecc..e26fc5c16 100644
--- a/larray/util/misc.py
+++ b/larray/util/misc.py
@@ -63,7 +63,7 @@ def is_interactive_interpreter():
 
 def csv_open(filename, mode='r'):
     assert 'b' not in mode and 't' not in mode
-    if sys.version < '3':
+    if PY2:
         return open(filename, mode + 'b')
     else:
         return open(filename, mode, newline='', encoding='utf8')

From ae905aec4568bc32480e2cf109c31fbdd9f52d5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Tue, 2 Oct 2018 11:23:09 +0200
Subject: [PATCH 15/44] made IGroupMaker (axis.i) a valid Sequence

(added __len__ and check that key < len(axis) in IGroupMaker)
---
 larray/core/group.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/larray/core/group.py b/larray/core/group.py
index 15ce0aa65..fbd2b5228 100644
--- a/larray/core/group.py
+++ b/larray/core/group.py
@@ -712,8 +712,13 @@ def __init__(self, axis):
         self.axis = axis
 
     def __getitem__(self, key):
+        if isinstance(key, (int, np.integer)) and not isinstance(self.axis, ABCAxisReference) and key >= len(self.axis):
+            raise IndexError("{} is out of range for axis of length {}".format(key, len(self.axis)))
         return IGroup(key, None, self.axis)
 
+    def __len__(self):
+        return len(self.axis)
+
 
 # We need a separate class for LGroup and cannot simply create a new Axis with a subset of values/ticks/labels:
 # the subset of ticks/labels of the LGroup need to correspond to its *Axis* indices

From f1f1c51d2832efbc1e496d493daebd0e3f17fe4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 23 Nov 2018 11:20:19 +0100
Subject: [PATCH 16/44] WIP: added dtype argument to stack and LArray (need to
 split commit & add changelog)

---
 larray/core/array.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 78a0e0c02..9913c078f 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -577,6 +577,8 @@ class LArray(ABCLArray):
     meta : list of pairs or dict or OrderedDict or Metadata, optional
         Metadata (title, description, author, creation_date, ...) associated with the array.
         Keys must be strings. Values must be of type string, int, float, date, time or datetime.
+    dtype : type, optional
+        Datatype for the array. Defaults to None (inferred from the data).
 
     Attributes
     ----------
@@ -655,8 +657,8 @@ class LArray(ABCLArray):
           F  10  11  12
     """
 
-    def __init__(self, data, axes=None, title=None, meta=None):
-        data = np.asarray(data)
+    def __init__(self, data, axes=None, title=None, meta=None, dtype=None):
+        data = np.asarray(data, dtype=dtype)
         ndim = data.ndim
         if axes is None:
             axes = AxisCollection(data.shape)
@@ -8145,7 +8147,7 @@ def eye(rows, columns=None, k=0, title=None, dtype=None, meta=None):
 #       ('DE', 'M'): 4, ('DE', 'F'): 5})
 
 
-def stack(elements=None, axis=None, title=None, meta=None, **kwargs):
+def stack(elements=None, axis=None, title=None, meta=None, dtype=None, **kwargs):
     r"""
     Combines several arrays or sessions along an axis.
 
@@ -8165,6 +8167,8 @@ def stack(elements=None, axis=None, title=None, meta=None, **kwargs):
     meta : list of pairs or dict or OrderedDict or Metadata, optional
         Metadata (title, description, author, creation_date, ...) associated with the array.
         Keys must be strings. Values must be of type string, int, float, date, time or datetime.
+    dtype : type, optional
+        Output dtype. Defaults to None (inspect all output values to infer it automatically).
 
     Returns
     -------
@@ -8342,7 +8346,9 @@ def stack(elements=None, axis=None, title=None, meta=None, **kwargs):
                   for v in values]
         result_axes = AxisCollection.union(*[get_axes(v) for v in values])
         result_axes.append(axis)
-        result = empty(result_axes, dtype=common_type(values), meta=meta)
+        if dtype is None:
+            dtype = common_type(values)
+        result = empty(result_axes, dtype=dtype, meta=meta)
         for k, v in zip(axis, values):
             result[k] = v
         return result

From 895241501d231843aa8c6586961e0cb06922ec02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Mon, 26 Nov 2018 16:47:11 +0100
Subject: [PATCH 17/44] broadcast ufuncs kwargs

---
 larray/core/array.py  | 12 ++++++++++++
 larray/core/ufuncs.py | 12 +++++-------
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 9913c078f..cbb2fb472 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -8420,6 +8420,18 @@ def raw_broadcastable(values, min_axes=None):
     return raw, res_axes
 
 
+def make_args_broadcastable(args, kwargs=None, min_axes=None):
+    """
+    Make args and kwargs (NumPy) broadcastable between them.
+    """
+    values = (args + tuple(kwargs.values())) if kwargs is not None else args
+    first_kw = len(args)
+    raw_bcast_values, res_axes = raw_broadcastable(values, min_axes=min_axes)
+    raw_bcast_args = raw_bcast_values[:first_kw]
+    raw_bcast_kwargs = dict(zip(kwargs.keys(), raw_bcast_values[first_kw:]))
+    return raw_bcast_args, raw_bcast_kwargs, res_axes
+
+
 _default_float_error_handler = float_error_handler_factory(3)
 
 
diff --git a/larray/core/ufuncs.py b/larray/core/ufuncs.py
index 1f1d019d6..d59b30af9 100644
--- a/larray/core/ufuncs.py
+++ b/larray/core/ufuncs.py
@@ -3,15 +3,13 @@
 
 import numpy as np
 
-from larray.core.array import LArray, raw_broadcastable
+from larray.core.array import LArray, make_args_broadcastable
 
 
 def broadcastify(func):
     # intentionally not using functools.wraps, because it does not work for wrapping a function from another module
     def wrapper(*args, **kwargs):
-        # TODO: normalize args/kwargs like in LIAM2 so that we can also broadcast if args are given via kwargs
-        #       (eg out=)
-        raw_args, combined_axes = raw_broadcastable(args)
+        raw_bcast_args, raw_bcast_kwargs, res_axes = make_args_broadcastable(args, kwargs)
 
         # We pass only raw numpy arrays to the ufuncs even though numpy is normally meant to handle those cases itself
         # via __array_wrap__
@@ -25,9 +23,9 @@ def wrapper(*args, **kwargs):
         # It fails on "np.minimum(ndarray, LArray)" because it calls __array_wrap__(high, result) which cannot work if
         # there was broadcasting involved (high has potentially less labels than result).
         # it does this because numpy calls __array_wrap__ on the argument with the highest __array_priority__
-        res_data = func(*raw_args, **kwargs)
-        if combined_axes:
-            return LArray(res_data, combined_axes)
+        res_data = func(*raw_bcast_args, **raw_bcast_kwargs)
+        if res_axes:
+            return LArray(res_data, res_axes)
         else:
             return res_data
     # copy meaningful attributes (numpy ufuncs do not have __annotations__ nor __qualname__)

From 744709957ab80394477626d7f866362160b1f45d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 3 Oct 2018 11:12:32 +0200
Subject: [PATCH 18/44] added SequenceZip as an alternative to builtin zip

---
 larray/util/misc.py | 52 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/larray/util/misc.py b/larray/util/misc.py
index e26fc5c16..ac8058bb3 100644
--- a/larray/util/misc.py
+++ b/larray/util/misc.py
@@ -775,3 +775,55 @@ def __enter__(self):
     def __exit__(self, type_, value, traceback):
         if self.close_store:
             self.store.close()
+
+
+class SequenceZip(object):
+    """
+    Represents the "combination" of several sequences.
+
+    This is very similar to python's builtin zip but only accepts sequences and acts as a Sequence (it can be
+    indexed and has a len).
+
+    Parameters
+    ----------
+    sequences : Iterable of Sequence
+        Sequences to combine.
+
+    Examples
+    --------
+    >>> z = SequenceZip([['a', 'b', 'c'], [1, 2, 3]])
+    >>> for i in range(len(z)):
+    ...     print(z[i])
+    ('a', 1)
+    ('b', 2)
+    ('c', 3)
+    >>> for v in z:
+    ...     print(v)
+    ('a', 1)
+    ('b', 2)
+    ('c', 3)
+    >>> list(z[1:4])
+    [('b', 2), ('c', 3)]
+    """
+    def __init__(self, sequences):
+        self.sequences = sequences
+        length = len(sequences[0])
+        bad_length_seqs = [i for i, s in enumerate(sequences[1:], start=1) if len(s) != length]
+        if bad_length_seqs:
+            first_bad = bad_length_seqs[0]
+            raise ValueError("sequence {} has a length of {} which is different from the length of the "
+                             "first sequence ({})".format(first_bad, len(sequences[first_bad]), length))
+        self._length = length
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, key):
+        if isinstance(key, (int, np.integer)):
+            return tuple(seq[key] for seq in self.sequences)
+        else:
+            assert isinstance(key, slice), "key (%s) has invalid type (%s)" % (key, type(key))
+            return SequenceZip([seq[key] for seq in self.sequences])
+
+    def __iter__(self):
+        return zip(*self.sequences)

From 48285382210c9ce4a02a833770be5aa7893d3353 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Thu, 4 Oct 2018 09:18:41 +0200
Subject: [PATCH 19/44] added Product class (from larray_editor) to make
 product for Sequences

similar to itertools.product but can be indexed and has a length
---
 larray/util/misc.py | 64 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/larray/util/misc.py b/larray/util/misc.py
index ac8058bb3..1529c89ad 100644
--- a/larray/util/misc.py
+++ b/larray/util/misc.py
@@ -827,3 +827,67 @@ def __getitem__(self, key):
 
     def __iter__(self):
         return zip(*self.sequences)
+
+
+# TODO: remove Product from larray_editor.utils (it is almost identical)
+class Product(object):
+    """
+    Represents the `cartesian product` of several sequences.
+
+    This is very similar to itertools.product but only accepts sequences and acts as a sequence (it can be
+    indexed and has a len).
+
+    Parameters
+    ----------
+    sequences : Iterable of Sequence
+        Sequences on which to apply the cartesian product.
+
+    Examples
+    --------
+    >>> p = Product([['a', 'b', 'c'], [1, 2]])
+    >>> for i in range(len(p)):
+    ...     print(p[i])
+    ('a', 1)
+    ('a', 2)
+    ('b', 1)
+    ('b', 2)
+    ('c', 1)
+    ('c', 2)
+    >>> p[1:4]
+    [('a', 2), ('b', 1), ('b', 2)]
+    >>> p[-3:]
+    [('b', 2), ('c', 1), ('c', 2)]
+    >>> list(p)
+    [('a', 1), ('a', 2), ('b', 1), ('b', 2), ('c', 1), ('c', 2)]
+    """
+    def __init__(self, sequences):
+        self.sequences = sequences
+        assert len(sequences)
+        shape = [len(a) for a in self.sequences]
+        self._div_mod = [(int(np.prod(shape[i + 1:])), shape[i])
+                         for i in range(len(shape))]
+        self._length = np.prod(shape)
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, key):
+        if isinstance(key, (int, np.integer)):
+            if key >= self._length:
+                raise IndexError("index %d out of range for Product of length %d" % (key, self._length))
+            # this is similar to np.unravel_index but a tad faster for scalars
+            return tuple(array[key // div % mod]
+                         for array, (div, mod) in zip(self.sequences, self._div_mod))
+        else:
+            assert isinstance(key, slice), "key (%s) has invalid type (%s)" % (key, type(key))
+            start, stop, step = key.indices(self._length)
+            div_mod = self._div_mod
+            arrays = self.sequences
+            # XXX: we probably want to return another Product object with an updated start/stop to stay
+            #      lazy in that case too.
+            return [tuple(array[idx // div % mod]
+                          for array, (div, mod) in zip(arrays, div_mod))
+                    for idx in range(start, stop, step)]
+
+    def __iter__(self):
+        return product(*self.sequences)

From 7441a8085e53932841a5ca10bc6636fd90ea78fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Thu, 13 Dec 2018 15:32:42 +0100
Subject: [PATCH 20/44] WIP: implemented Repeater (need to split
 Product.__repr__ out of this commit)

---
 larray/util/misc.py | 75 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/larray/util/misc.py b/larray/util/misc.py
index 1529c89ad..40968e2e0 100644
--- a/larray/util/misc.py
+++ b/larray/util/misc.py
@@ -829,6 +829,78 @@ def __iter__(self):
         return zip(*self.sequences)
 
 
+class Repeater(object):
+    """
+    Returns a virtual sequence with value repeated n times.
+    The sequence is never actually created in memory.
+
+    Parameters
+    ----------
+    value : any
+        Value to repeat.
+    n : int
+        Number of times to repeat value.
+
+    Notes
+    -----
+    This is very similar to itertools.repeat except this version returns a Sequence instead of an iterator,
+    meaning it has a length and can be indexed.
+
+    Examples
+    --------
+    >>> r = Repeater('a', 3)
+    >>> list(r)
+    ['a', 'a', 'a']
+    >>> r[0]
+    'a'
+    >>> r[2]
+    'a'
+    >>> r[3]
+    Traceback (most recent call last):
+    ...
+    IndexError: index out of range
+    >>> r[-1]
+    'a'
+    >>> r[-3]
+    'a'
+    >>> r[-4]
+    Traceback (most recent call last):
+    ...
+    IndexError: index out of range
+    >>> len(r)
+    3
+    >>> list(r[1:])
+    ['a', 'a']
+    >>> list(r[:2])
+    ['a', 'a']
+    >>> list(r[10:])
+    []
+    """
+    def __init__(self, value, n):
+        self.value = value
+        self.n = n
+
+    def __len__(self):
+        return self.n
+
+    def __getitem__(self, key):
+        if isinstance(key, (int, np.integer)):
+            if key >= self.n or key < -self.n:
+                raise IndexError('index out of range')
+            return self.value
+        else:
+            assert isinstance(key, slice), "key (%s) has invalid type (%s)" % (key, type(key))
+            start, stop, step = key.indices(self.n)
+            # XXX: unsure // step is correct
+            return Repeater(self.value, (stop - start) // step)
+
+    def __iter__(self):
+        return itertools.repeat(self.value, self.n)
+
+    def __repr__(self):
+        return 'Repeater({}, {})'.format(self.value, self.n)
+
+
 # TODO: remove Product from larray_editor.utils (it is almost identical)
 class Product(object):
     """
@@ -891,3 +963,6 @@ def __getitem__(self, key):
 
     def __iter__(self):
         return product(*self.sequences)
+
+    def __repr__(self):
+        return 'Product({})'.format(self.sequences)

From cafa35d1ae641c61051e52f949e859808c57b0c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 15:43:08 +0200
Subject: [PATCH 21/44] allow creating a Session from any object having a
 .items method (e.g. another Session)

---
 larray/core/session.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/larray/core/session.py b/larray/core/session.py
index c40ebfcb7..b2b6ebe24 100644
--- a/larray/core/session.py
+++ b/larray/core/session.py
@@ -94,10 +94,12 @@ def __init__(self, *args, **kwargs):
             if isinstance(a0, str):
                 # assume a0 is a filename
                 self.load(a0)
+            elif hasattr(a0, 'items'):
+                for k, v in a0.items():
+                    self[k] = v
             else:
-                items = a0.items() if isinstance(a0, dict) else a0
                 # assume we have an iterable of tuples
-                for k, v in items:
+                for k, v in a0:
                     self[k] = v
         else:
             self.add(*args, **kwargs)

From 524ca0e57238492857097e1a22642af5ffc5a1ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 15:57:39 +0200
Subject: [PATCH 22/44] optimized iteration speed over LArray, Group and Axis

---
 larray/core/array.py | 25 +++++++++++++++----------
 larray/core/axis.py  |  2 +-
 larray/core/group.py |  3 ++-
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index cbb2fb472..7965dc9f6 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -311,20 +311,21 @@ def concat(arrays, axis=0, dtype=None):
 
 class LArrayIterator(object):
     def __init__(self, array):
-        self.array = array
-        self.index = 0
+        data_iter = iter(array.data)
+        self.nextfunc = data_iter.__next__
+        self.axes = array.axes[1:]
 
     def __iter__(self):
         return self
 
     def __next__(self):
-        array = self.array
-        if self.index == len(self.array):
-            raise StopIteration
-        # result = array.i[array.axes[0].i[self.index]]
-        result = array.i[self.index]
-        self.index += 1
-        return result
+        data = self.nextfunc()
+        axes = self.axes
+        if len(axes):
+            return LArray(data, axes)
+        else:
+            return data
+
     # Python 2
     next = __next__
 
@@ -2290,7 +2291,11 @@ def __str__(self):
     __repr__ = __str__
 
     def __iter__(self):
-        return LArrayIterator(self)
+        # fast path for 1D arrays where we return elements
+        if self.ndim <= 1:
+            return iter(self.data)
+        else:
+            return LArrayIterator(self)
 
     def __contains__(self, key):
         return any(key in axis for axis in self.axes)
diff --git a/larray/core/axis.py b/larray/core/axis.py
index 5c5644280..62dc179aa 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -721,7 +721,7 @@ def __len__(self):
         return self._length
 
     def __iter__(self):
-        return iter([self.i[i] for i in range(self._length)])
+        return iter([IGroup(i, None, self) for i in range(self._length)])
 
     def __getitem__(self, key):
         """
diff --git a/larray/core/group.py b/larray/core/group.py
index fbd2b5228..b9b86929e 100644
--- a/larray/core/group.py
+++ b/larray/core/group.py
@@ -870,7 +870,8 @@ def __len__(self):
     def __iter__(self):
         # XXX: use translate/IGroup instead, so that it works even in the presence of duplicate labels
         #      possibly, only if axis is set?
-        return iter([LGroup(v, axis=self.axis) for v in self.eval()])
+        axis = self.axis
+        return iter([LGroup(v, axis=axis) for v in self.eval()])
 
     def named(self, name):
         """Returns group with a different name.

From 6cb8a8b4b135e94424720f50b051418ce74d54fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 16:24:22 +0200
Subject: [PATCH 23/44] slightly faster AxisCollection.combine_axes

---
 larray/core/axis.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/larray/core/axis.py b/larray/core/axis.py
index 62dc179aa..06ec1c214 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -2904,9 +2904,9 @@ def combine_axes(self, axes=None, sep='_', wildcard=False, front_if_spread=False
                     # Q: if axis is a wildcard axis, should the result be a wildcard axis (and axes_labels discarded?)
                     combined_labels = _axes[0].labels
                 else:
-                    combined_labels = [sep.join(str(l) for l in p)
-                                       for p in product(*_axes.labels)]
-
+                    sepjoin = sep.join
+                    axes_labels = [np.array(l, np.str, copy=False) for l in _axes.labels]
+                    combined_labels = [sepjoin(p) for p in product(*axes_labels)]
                 combined_axis = Axis(combined_labels, combined_name)
             new_axes = new_axes - _axes
             new_axes.insert(combined_axis_pos, combined_axis)

From 8f1f886e76276fa67ec528de1ad8e11ccbee6bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 16:05:56 +0200
Subject: [PATCH 24/44] added __slots__ to all our structures

this makes attribute access a tad faster and uses less memory (because it does not create a __dict__ for a each instance)
---
 larray/core/array.py | 14 +++++++++++---
 larray/core/axis.py  | 10 +++++++---
 larray/core/group.py | 14 +++++++++++---
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 7965dc9f6..4587c081d 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -310,6 +310,8 @@ def concat(arrays, axis=0, dtype=None):
 
 
 class LArrayIterator(object):
+    __slots__ = ('nextfunc', 'axes')
+
     def __init__(self, array):
         data_iter = iter(array.data)
         self.nextfunc = data_iter.__next__
@@ -332,6 +334,7 @@ def __next__(self):
 
 # TODO: rename to LArrayIndexIndexer or something like that
 class LArrayPositionalIndexer(object):
+    __slots__ = ('array',)
     """
     numpy indexing *except* we index the cross product
     """
@@ -362,6 +365,8 @@ def __len__(self):
 
 
 class LArrayPointsIndexer(object):
+    __slots__ = ('array',)
+
     def __init__(self, array):
         self.array = array
 
@@ -383,6 +388,7 @@ def __setitem__(self, key, value):
 
 # TODO: rename to LArrayIndexPointsIndexer or something like that
 class LArrayPositionalPointsIndexer(object):
+    __slots__ = ('array',)
     """
     the closest to numpy indexing we get, but not 100% the same.
     """
@@ -657,6 +663,7 @@ class LArray(ABCLArray):
           M  10   9   8
           F  10  11  12
     """
+    __slots__ = ('data', 'axes', '_meta')
 
     def __init__(self, data, axes=None, title=None, meta=None, dtype=None):
         data = np.asarray(data, dtype=dtype)
@@ -886,14 +893,15 @@ def __getattr__(self, key):
     # needed to make *un*pickling work (because otherwise, __getattr__ is called before .axes exists, which leads to
     # an infinite recursion)
     def __getstate__(self):
-        return self.__dict__
+        return self.data, self.axes, self._meta
 
     def __setstate__(self, d):
-        self.__dict__ = d
+        self.data, self.axes, self._meta = d
 
     def __dir__(self):
         axis_names = set(axis.name for axis in self.axes if axis.name is not None)
-        return list(set(dir(self.__class__)) | set(self.__dict__.keys()) | axis_names)
+        attributes = self.__slots__
+        return list(set(dir(self.__class__)) | set(attributes) | axis_names)
 
     def _ipython_key_completions_(self):
         return list(chain(*[list(labels) for labels in self.axes.labels]))
diff --git a/larray/core/axis.py b/larray/core/axis.py
index 06ec1c214..11258e505 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -75,6 +75,8 @@ class Axis(ABCAxis):
     >>> anonymous
     Axis([0, 1, 2, 3, 4], None)
     """
+    __slots__ = ('name', '__mapping', '__sorted_keys', '__sorted_values', '_labels', '_length', '_iswildcard')
+
     # ticks instead of labels?
     def __init__(self, labels, name=None):
         if isinstance(labels, Group) and name is None:
@@ -1371,6 +1373,7 @@ def _make_axis(obj):
 # not using namedtuple because we have to know the fields in advance (it is a one-off class) and we need more
 # functionality than just a named tuple
 class AxisCollection(object):
+    __slots__ = ('_list', '_map')
     """
     Represents a collection of axes.
 
@@ -1464,10 +1467,11 @@ def __getattr__(self, key):
     # needed to make *un*pickling work (because otherwise, __getattr__ is called before _map exists, which leads to
     # an infinite recursion)
     def __getstate__(self):
-        return self.__dict__
+        return self._list
 
-    def __setstate__(self, d):
-        self.__dict__ = d
+    def __setstate__(self, state):
+        self._list = state
+        self._map = {axis.name: axis for axis in state if axis.name is not None}
 
     def __getitem__(self, key):
         if isinstance(key, Axis):
diff --git a/larray/core/group.py b/larray/core/group.py
index b9b86929e..cb3db85c3 100644
--- a/larray/core/group.py
+++ b/larray/core/group.py
@@ -707,6 +707,8 @@ class IGroupMaker(object):
     -----
     This class is used by the method `Axis.i`
     """
+    __slots__ = ('axis',)
+
     def __init__(self, axis):
         assert isinstance(axis, ABCAxis)
         self.axis = axis
@@ -725,6 +727,8 @@ def __len__(self):
 class Group(object):
     """Abstract Group.
     """
+    __slots__ = ('key', 'name', 'axis')
+
     format_string = None
 
     def __init__(self, key, name=None, axis=None):
@@ -1485,7 +1489,8 @@ def __array__(self, dtype=None):
     def __dir__(self):
         # called by dir() and tab-completion at the interactive prompt, must return a list of any valid getattr key.
         # dir() takes care of sorting but not uniqueness, so we must ensure that.
-        return list(set(dir(self.eval())) | set(self.__dict__.keys()) | set(dir(self.__class__)))
+        attributes = self.__slots__
+        return list(set(dir(self.eval())) | set(attributes) | set(dir(self.__class__)))
 
     def __getattr__(self, key):
         if key == '__array_struct__':
@@ -1496,10 +1501,10 @@ def __getattr__(self, key):
     # needed to make *un*pickling work (because otherwise, __getattr__ is called before .key exists, which leads to
     # an infinite recursion)
     def __getstate__(self):
-        return self.__dict__
+        return (self.key, self.name, self.axis)
 
     def __setstate__(self, d):
-        self.__dict__ = d
+        self.key, self.name, self.axis = d
 
     def __hash__(self):
         # to_tick & to_key are partially opposite operations but this standardize on a single notation so that they can
@@ -1557,6 +1562,7 @@ class LGroup(Group):
     >>> teens
     X.age[10:19] >> 'teens'
     """
+    __slots__ = ()
     format_string = "{axis}[{key}]"
 
     def __init__(self, key, name=None, axis=None):
@@ -1616,6 +1622,7 @@ class LSet(LGroup):
     >>> abc & letters['b:d']
     letters['b', 'c'].set()
     """
+    __slots__ = ()
     format_string = "{axis}[{key}].set()"
 
     def __init__(self, key, name=None, axis=None):
@@ -1678,6 +1685,7 @@ class IGroup(Group):
     axis : int, str, Axis, optional
         Axis for group.
     """
+    __slots__ = ()
     format_string = "{axis}.i[{key}]"
 
     def translate(self, bound=None, stop=False):

From 4c64564cbe2cfae0712addac93eca91aa7a300e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 16:41:33 +0200
Subject: [PATCH 25/44] implemented faster metadata handling

.meta can be None and Metadata() is only created when needed
---
 larray/core/array.py | 56 +++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 4587c081d..f724cb100 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -558,15 +558,27 @@ def nan_equal(a1, a2):
 
 
 def _handle_deprecated_argument_title(meta, title):
-    if meta is None:
-        meta = Metadata()
     if title is not None:
+        if meta is None:
+            meta = Metadata()
         import warnings
         warnings.warn("title argument is deprecated. Please use meta argument instead", FutureWarning, stacklevel=3)
         meta['title'] = title
     return meta
 
 
+# make sure meta is either None or a Metadata instance
+def _handle_meta(meta, title):
+    if title is not None:
+        meta = _handle_deprecated_argument_title(meta, title)
+    if meta is None or isinstance(meta, Metadata):
+        return meta
+    if not isinstance(meta, (list, dict, OrderedDict)):
+        raise TypeError("Expected None, list of pairs, dict, OrderedDict or Metadata object "
+                        "instead of {}".format(type(meta).__name__))
+    return Metadata(meta)
+
+
 class LArray(ABCLArray):
     """
     A LArray object represents a multidimensional, homogeneous array of fixed-size items with labeled axes.
@@ -684,14 +696,14 @@ def __init__(self, data, axes=None, title=None, meta=None, dtype=None):
         self.data = data
         self.axes = axes
 
-        meta = _handle_deprecated_argument_title(meta, title)
-        self.meta = meta
+        meta = _handle_meta(meta, title)
+        self._meta = meta
 
     @property
     def title(self):
         import warnings
         warnings.warn("title attribute is deprecated. Please use meta.title instead", FutureWarning, stacklevel=2)
-        return self._meta.title if 'title' in self._meta else None
+        return self._meta.title if self._meta is not None and 'title' in self._meta else None
 
     @title.setter
     def title(self, title):
@@ -710,14 +722,13 @@ def meta(self):
         Metadata:
             Metadata of the array.
         """
+        if self._meta is None:
+            self._meta = Metadata()
         return self._meta
 
     @meta.setter
     def meta(self, meta):
-        if not isinstance(meta, (list, dict, OrderedDict, Metadata)):
-            raise TypeError("Expected list of pairs or dict or OrderedDict or Metadata object "
-                            "instead of {}".format(type(meta).__name__))
-        self._meta = meta if isinstance(meta, Metadata) else Metadata(meta)
+        self._meta = _handle_meta(meta, None)
 
     # TODO: rename to posnonzero and implement a label version of nonzero
     # TODO: implement wildcard argument to avoid producing the combined labels
@@ -7279,7 +7290,8 @@ def zeros(axes, title=None, dtype=float, order='C', meta=None):
          BE  0.0  0.0
          FO  0.0  0.0
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    # FIXME: the error message is wrong (stackdepth is wrong) because of _check_axes_argument
+    meta = _handle_meta(meta, title)
     axes = AxisCollection(axes)
     return LArray(np.zeros(axes.shape, dtype, order), axes, meta=meta)
 
@@ -7315,7 +7327,7 @@ def zeros_like(array, title=None, dtype=None, order='K', meta=None):
      a0   0   0   0
      a1   0   0   0
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
     return LArray(np.zeros_like(array, dtype, order), array.axes, meta=meta)
 
 
@@ -7351,7 +7363,7 @@ def ones(axes, title=None, dtype=float, order='C', meta=None):
          BE  1.0  1.0
          FO  1.0  1.0
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
     axes = AxisCollection(axes)
     return LArray(np.ones(axes.shape, dtype, order), axes, meta=meta)
 
@@ -7387,7 +7399,7 @@ def ones_like(array, title=None, dtype=None, order='K', meta=None):
      a0   1   1   1
      a1   1   1   1
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
     axes = array.axes
     return LArray(np.ones_like(array, dtype, order), axes, meta=meta)
 
@@ -7424,7 +7436,7 @@ def empty(axes, title=None, dtype=float, order='C', meta=None):
          BE  2.47311483356e-315  2.47498446195e-315
          FO                 0.0  6.07684618082e-31
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
     axes = AxisCollection(axes)
     return LArray(np.empty(axes.shape, dtype, order), axes, meta=meta)
 
@@ -7461,7 +7473,7 @@ def empty_like(array, title=None, dtype=None, order='K', meta=None):
      a1  1.06099789568e-313  1.48539705397e-313
      a2  1.90979621226e-313  2.33419537056e-313
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
     # cannot use empty() because order == 'K' is not understood
     return LArray(np.empty_like(array.data, dtype, order), array.axes, meta=meta)
 
@@ -7508,7 +7520,7 @@ def full(axes, fill_value, title=None, dtype=None, order='C', meta=None):
          BE  0  1
          FO  0  1
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
     if isinstance(fill_value, Axis):
         raise ValueError("If you want to pass several axes or dimension lengths to full, you must pass them as a "
                          "list (using []) or tuple (using()).")
@@ -7552,7 +7564,7 @@ def full_like(array, fill_value, title=None, dtype=None, order='K', meta=None):
      a0   5   5   5
      a1   5   5   5
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
     # cannot use full() because order == 'K' is not understood
     # cannot use np.full_like() because it would not handle LArray fill_value
     res = empty_like(array, dtype=dtype, meta=meta)
@@ -7666,7 +7678,7 @@ def sequence(axis, initial=0, inc=None, mult=1, func=None, axes=None, title=None
     year  2016  2017  2018  2019
            1.0   2.0   3.0   3.0
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
 
     if inc is None:
         inc = 1 if mult is 1 else 0
@@ -7863,7 +7875,7 @@ def ndtest(shape_or_axes, start=0, label_start=0, title=None, dtype=int, meta=No
          BE  0  1
          FO  2  3
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
     # XXX: try to come up with a syntax where start is before "end".
     # For ndim > 1, I cannot think of anything nice.
     if isinstance(shape_or_axes, int):
@@ -8014,7 +8026,7 @@ def labels_array(axes, title=None, meta=None):
     # nat\\sex     M     F
     #      BE  BE,M  BE,F
     #      FO  FO,M  FO,F
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
     axes = AxisCollection(axes)
     if len(axes) > 1:
         res_axes = axes + Axis(axes.names, 'axis')
@@ -8084,7 +8096,7 @@ def eye(rows, columns=None, k=0, title=None, dtype=None, meta=None):
             1  0.0  0.0  1.0
             2  0.0  0.0  0.0
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
     if columns is None:
         columns = rows.copy() if isinstance(rows, Axis) else rows
     axes = AxisCollection([rows, columns])
@@ -8283,7 +8295,7 @@ def stack(elements=None, axis=None, title=None, meta=None, dtype=None, **kwargs)
                M       0.0      0.5
                F       0.0      0.5
     """
-    meta = _handle_deprecated_argument_title(meta, title)
+    meta = _handle_meta(meta, title)
 
     from larray import Session
 

From 3d4684f20bab043de39689f140603abf5143445a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 26 Sep 2018 07:57:08 +0200
Subject: [PATCH 26/44] factorized a unique_multi helper function

it computes unique values across multiple iterables
---
 larray/core/array.py |  5 +----
 larray/core/axis.py  |  8 ++------
 larray/util/misc.py  | 11 +++++++++++
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index f724cb100..8b26cbbd7 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -8352,10 +8352,7 @@ def stack(elements=None, axis=None, title=None, meta=None, dtype=None, **kwargs)
         if not all(isinstance(s, Session) for s in sessions):
             raise TypeError("stack() only supports stacking Session with other Session objects")
 
-        seen = set()
-        all_keys = []
-        for s in sessions:
-            unique_list(s.keys(), all_keys, seen)
+        all_keys = unique_multi(s.keys() for s in sessions)
         res = []
         for name in all_keys:
             try:
diff --git a/larray/core/axis.py b/larray/core/axis.py
index 11258e505..849c7edb8 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -16,7 +16,7 @@
                                _range_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups)
 from larray.util.oset import *
 from larray.util.misc import (basestring, PY2, unicode, long, duplicates, array_lookup2, ReprString, index_by_id,
-                              renamed_to, common_type, LHDFStore, lazy_attribute, _isnoneslice)
+                              renamed_to, common_type, LHDFStore, lazy_attribute, _isnoneslice, unique_multi)
 
 
 class Axis(ABCAxis):
@@ -1145,11 +1145,7 @@ def union(self, other):
             other = _to_ticks(other, parse_single_int=True) if '..' in other or ',' in other else [other]
         if isinstance(other, Axis):
             other = other.labels
-        unique_labels = []
-        seen = set()
-        unique_list(self.labels, unique_labels, seen)
-        unique_list(other, unique_labels, seen)
-        return Axis(unique_labels, self.name)
+        return Axis(unique_multi((self.labels, other)), self.name)
 
     def intersection(self, other):
         """Returns axis with the (set) intersection of this axis labels and other labels.
diff --git a/larray/util/misc.py b/larray/util/misc.py
index 40968e2e0..dd2a6b7c7 100644
--- a/larray/util/misc.py
+++ b/larray/util/misc.py
@@ -234,6 +234,17 @@ def unique_list(iterable, res=None, seen=None):
     return res
 
 
+def unique_multi(iterable_of_iterables):
+    """
+    Returns a list of all unique elements across multiple iterables. Elements of earlier iterables will come first.
+    """
+    seen = set()
+    res = []
+    for iterable in iterable_of_iterables:
+        unique_list(iterable, res, seen)
+    return res
+
+
 def duplicates(iterable):
     """
     List duplicated elements once, preserving order. Remember all elements ever seen.

From 12717dd0030f603c81255185bdadae3c0939be59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 26 Sep 2018 10:56:29 +0200
Subject: [PATCH 27/44] simplified AxisCollection._flat_lookup by using
 np.unravel_index

---
 larray/core/axis.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/larray/core/axis.py b/larray/core/axis.py
index 849c7edb8..d5c588077 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -3153,14 +3153,12 @@ def _flat_lookup(self, flat_indices):
         from larray.core.array import aslarray, LArray, stack
 
         flat_indices = aslarray(flat_indices)
-        shape = self.shape
-        divisors = np.roll(np.cumprod(shape[::-1])[::-1], -1)
-        divisors[-1] = 1
-        axes_indices = [(flat_indices // div) % length for div, length in zip(divisors, shape)]
+        axes_indices = np.unravel_index(flat_indices, self.shape)
         # This could return an LArray with object dtype because axes labels can have different types (but not length)
         # TODO: this should be:
         # return stack([(axis.name, axis.i[inds]) for axis, inds in zip(axes, axes_indices)], axis='axis')
-        return stack([(axis.name, LArray(axis.labels[inds], inds.axes)) for axis, inds in zip(self, axes_indices)],
+        flat_axes = flat_indices.axes
+        return stack([(axis.name, LArray(axis.labels[inds], flat_axes)) for axis, inds in zip(self, axes_indices)],
                      axis='axis')
 
     def _adv_keys_to_combined_axis_la_keys(self, key, wildcard=False, sep='_'):

From 80564e9ef4cce168e59d03c261ae7af0d66c70fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 3 Oct 2018 17:36:33 +0200
Subject: [PATCH 28/44] simplified _adv_keys_to_combined_axis_la_keys

---
 larray/core/axis.py | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/larray/core/axis.py b/larray/core/axis.py
index d5c588077..7ce0ae1d4 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -3189,25 +3189,27 @@ def _adv_keys_to_combined_axis_la_keys(self, key, wildcard=False, sep='_'):
         # TODO: use/factorize with AxisCollection.combine_axes. The problem is that it uses product(*axes_labels)
         #       while here we need zip(*axes_labels)
         ignored_types = (int, np.integer, slice, LArray)
-        adv_key_axes = [axis for axis_key, axis in zip(key, self)
-                        if not isinstance(axis_key, ignored_types)]
-        if not adv_key_axes:
+        adv_keys = [(axis_key, axis) for axis_key, axis in zip(key, self)
+                    if not isinstance(axis_key, ignored_types)]
+        if not adv_keys:
             return key
 
         # axes with a scalar key are not taken, since we want to kill them
 
         # all anonymous axes => anonymous combined axis
-        if all(axis.name is None for axis in adv_key_axes):
+        if all(axis.name is None for axis_key, axis in adv_keys):
             combined_name = None
         else:
             # using axis_id instead of name to allow combining a mix of anonymous & non anonymous axes
-            combined_name = sep.join(str(self.axis_id(axis)) for axis in adv_key_axes)
+            combined_name = sep.join(str(self.axis_id(axis)) for axis_key, axis in adv_keys)
+
+        # explicitly check that all combined keys have the same length
+        first_key, first_axis = adv_keys[0]
+        combined_axis_len = len(first_key)
+        if not all(len(axis_key) == combined_axis_len for axis_key, axis in adv_keys[1:]):
+            raise ValueError("all combined keys should have the same length")
 
         if wildcard:
-            lengths = [len(axis_key) for axis_key in key
-                       if not isinstance(axis_key, ignored_types)]
-            combined_axis_len = lengths[0]
-            assert all(l == combined_axis_len for l in lengths)
             combined_axis = Axis(combined_axis_len, combined_name)
         else:
             # TODO: the combined keys should be objects which display as:
@@ -3217,31 +3219,24 @@ def _adv_keys_to_combined_axis_la_keys(self, key, wildcard=False, sep='_'):
             # A: yes, probably. On the Pandas backend, we could/should have
             #    separate axes. On the numpy backend we cannot.
             # TODO: only convert if
-            if len(adv_key_axes) == 1:
-                # we don't convert to string when there is only a single axis
+            if len(adv_keys) == 1:
+                # we do not convert to string when there is only a single axis
                 axes_labels = [axis.labels[axis_key]
-                               for axis_key, axis in zip(key, self)
-                               if not isinstance(axis_key, ignored_types)]
+                               for axis_key, axis in adv_keys]
                 # Q: if axis is a wildcard axis, should the result be a
                 #    wildcard axis (and axes_labels discarded?)
                 combined_labels = axes_labels[0]
             else:
                 axes_labels = [axis.labels.astype(np.str, copy=False)[axis_key].tolist()
-                               for axis_key, axis in zip(key, self)
-                               if not isinstance(axis_key, ignored_types)]
+                               for axis_key, axis in adv_keys]
                 sepjoin = sep.join
                 combined_labels = [sepjoin(comb) for comb in zip(*axes_labels)]
             combined_axis = Axis(combined_labels, combined_name)
 
         # 2) transform all advanced non-LArray keys to LArray with the combined axis
         # ==========================================================================
-        def to_la_key(axis_key, combined_axis):
-            if isinstance(axis_key, (int, np.integer, slice, LArray)):
-                return axis_key
-            else:
-                assert len(axis_key) == len(combined_axis)
-                return LArray(axis_key, combined_axis)
-        return tuple(to_la_key(axis_key, combined_axis) for axis_key in key)
+        return tuple(axis_key if isinstance(axis_key, ignored_types) else LArray(axis_key, combined_axis)
+                     for axis_key in key)
 
 
 class AxisReference(ABCAxisReference, ExprNode, Axis):

From 4af9381f1e6db84b221fc0297c751121bf7b1417 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Mon, 8 Oct 2018 17:31:50 +0200
Subject: [PATCH 29/44] simplified FileHandler._read_item by returning only the
 value, not the key

(the key wasn't used anywhere)
---
 larray/inout/common.py |  3 +--
 larray/inout/csv.py    |  6 +++---
 larray/inout/excel.py  | 12 ++++++------
 larray/inout/hdf.py    |  2 +-
 larray/inout/pickle.py |  4 ++--
 5 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/larray/inout/common.py b/larray/inout/common.py
index a283571a2..b3efaf915 100644
--- a/larray/inout/common.py
+++ b/larray/inout/common.py
@@ -127,8 +127,7 @@ def read(self, keys, *args, **kwargs):
             if display:
                 print("loading", type, "object", key, "...", end=' ')
             try:
-                key, item = self._read_item(key, type, *args, **kwargs)
-                res[key] = item
+                res[key] = self._read_item(key, type, *args, **kwargs)
             except Exception:
                 if not ignore_exceptions:
                     raise
diff --git a/larray/inout/csv.py b/larray/inout/csv.py
index 07836b1ba..0f89b736b 100644
--- a/larray/inout/csv.py
+++ b/larray/inout/csv.py
@@ -342,11 +342,11 @@ def list_items(self):
 
     def _read_item(self, key, type, *args, **kwargs):
         if type == 'Array':
-            return key, read_csv(self._to_filepath(key), *args, **kwargs)
+            return read_csv(self._to_filepath(key), *args, **kwargs)
         elif type == 'Axis':
-            return key, self.axes[key]
+            return self.axes[key]
         elif type == 'Group':
-            return key, self.groups[key]
+            return self.groups[key]
         else:
             raise TypeError()
 
diff --git a/larray/inout/excel.py b/larray/inout/excel.py
index 9eecfffa8..69417bb33 100644
--- a/larray/inout/excel.py
+++ b/larray/inout/excel.py
@@ -282,11 +282,11 @@ def list_items(self):
     def _read_item(self, key, type, *args, **kwargs):
         if type == 'Array':
             df = self.handle.parse(key, *args, **kwargs)
-            return key, df_aslarray(df, raw=True)
+            return df_aslarray(df, raw=True)
         elif type == 'Axis':
-            return key, self.axes[key]
+            return self.axes[key]
         elif type == 'Group':
-            return key, self.groups[key]
+            return self.groups[key]
         else:
             raise TypeError()
 
@@ -386,11 +386,11 @@ def list_items(self):
 
     def _read_item(self, key, type, *args, **kwargs):
         if type == 'Array':
-            return key, self.handle[key].load(*args, **kwargs)
+            return self.handle[key].load(*args, **kwargs)
         elif type == 'Axis':
-            return key, self.axes[key]
+            return self.axes[key]
         elif type == 'Group':
-            return key, self.groups[key]
+            return self.groups[key]
         else:
             raise TypeError()
 
diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py
index 0ce326a9d..f5d656603 100644
--- a/larray/inout/hdf.py
+++ b/larray/inout/hdf.py
@@ -126,7 +126,7 @@ def _read_item(self, key, type, *args, **kwargs):
             kwargs['name'] = key
         else:
             raise TypeError()
-        return key, read_hdf(self.handle, hdf_key, *args, **kwargs)
+        return read_hdf(self.handle, hdf_key, *args, **kwargs)
 
     def _dump_item(self, key, value, *args, **kwargs):
         if isinstance(value, LArray):
diff --git a/larray/inout/pickle.py b/larray/inout/pickle.py
index 1c8085494..8b4c98501 100644
--- a/larray/inout/pickle.py
+++ b/larray/inout/pickle.py
@@ -34,8 +34,8 @@ def list_items(self):
         return items
 
     def _read_item(self, key, type, *args, **kwargs):
-        if type in ['Array', 'Axis', 'Group']:
-            return key, self.data[key]
+        if type in {'Array', 'Axis', 'Group'}:
+            return self.data[key]
         else:
             raise TypeError()
 

From a10358f79546b180cc38682f3a9a8a0b1e33fd9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Mon, 24 Sep 2018 12:43:27 +0200
Subject: [PATCH 30/44] implemented read_stata and LArray.to_stata

---
 doc/source/api.rst                      |  2 ++
 doc/source/changes/version_0_30.rst.inc | 18 ++++++++++++++
 larray/__init__.py                      |  3 ++-
 larray/core/array.py                    | 16 ++++++++++++
 larray/inout/stata.py                   | 33 +++++++++++++++++++++++++
 5 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 larray/inout/stata.py

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 8cc8a3c72..44512920a 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -623,6 +623,7 @@ Read
    read_hdf
    read_eurostat
    read_sas
+   read_stata
 
 Write
 -----
@@ -633,6 +634,7 @@ Write
    LArray.to_csv
    LArray.to_excel
    LArray.to_hdf
+   LArray.to_stata
 
 Excel
 =====
diff --git a/doc/source/changes/version_0_30.rst.inc b/doc/source/changes/version_0_30.rst.inc
index dde69b6d3..e550fba5a 100644
--- a/doc/source/changes/version_0_30.rst.inc
+++ b/doc/source/changes/version_0_30.rst.inc
@@ -100,6 +100,24 @@ Backward incompatible changes
 New features
 ------------
 
+* implemented :py:obj:`read_stata()` and :py:obj:`LArray.to_stata()` to read arrays from and write arrays to Stata .dta
+  files.
+
+    >>> arr = ndtest((2, 3))
+    >>> arr
+    a\b  b0  b1  b2
+     a0   0   1   2
+     a1   3   4   5
+    >>> arr.to_stata('test.dta')
+    >>> read_stata('test.dta')
+    {0}\{1}   a  b0  b1  b2
+          0  a0   0   1   2
+          1  a1   3   4   5
+    >>> read_stata('test.dta', index_col='a')
+    a\{1}  b0  b1  b2
+       a0   0   1   2
+       a1   3   4   5
+
 * added :py:obj:`LArray.isin()` method to check whether each element of an array is contained in a list (or array) of
   values.
 
diff --git a/larray/__init__.py b/larray/__init__.py
index 169d07fa5..77e5494dc 100644
--- a/larray/__init__.py
+++ b/larray/__init__.py
@@ -28,6 +28,7 @@
 from larray.inout.excel import read_excel
 from larray.inout.hdf import read_hdf
 from larray.inout.sas import read_sas
+from larray.inout.stata import read_stata
 from larray.inout.xw_excel import open_excel, Workbook
 
 from larray.viewer import view, edit, compare
@@ -67,7 +68,7 @@
     'real_if_close', 'interp', 'isnan', 'isinf', 'inverse',
     # inout
     'from_lists', 'from_string', 'from_frame', 'from_series', 'read_csv', 'read_tsv',
-    'read_eurostat', 'read_excel', 'read_hdf', 'read_sas', 'open_excel', 'Workbook',
+    'read_eurostat', 'read_excel', 'read_hdf', 'read_sas', 'read_stata', 'open_excel', 'Workbook',
     # viewer
     'view', 'edit', 'compare',
     # ipfp
diff --git a/larray/core/array.py b/larray/core/array.py
index 8b26cbbd7..7f1a874ca 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -6210,6 +6210,22 @@ def to_hdf(self, filepath, key):
             store.get_storer(key).attrs.type = 'Array'
             self.meta.to_hdf(store, key)
 
+    def to_stata(self, filepath_or_buffer, **kwargs):
+        """
+        Writes array to a Stata .dta file.
+
+        Parameters
+        ----------
+        filepath_or_buffer : str or file-like object
+            Path to .dta file or a file handle.
+
+        Examples
+        --------
+        >>> arr = ndtest((2, 3))
+        >>> arr.to_stata('test.dta')          # doctest: +SKIP
+        """
+        self.to_frame().to_stata(filepath_or_buffer, **kwargs)
+
     @deprecate_kwarg('sheet_name', 'sheet')
     def to_excel(self, filepath=None, sheet=None, position='A1', overwrite_file=False, clear_sheet=False,
                  header=True, transpose=False, wide=True, value_name='value', engine=None, *args, **kwargs):
diff --git a/larray/inout/stata.py b/larray/inout/stata.py
new file mode 100644
index 000000000..9f79cfc8c
--- /dev/null
+++ b/larray/inout/stata.py
@@ -0,0 +1,33 @@
+from __future__ import absolute_import, print_function
+
+import pandas as pd
+
+from larray.inout.pandas import from_frame
+
+__all__ = ['read_stata']
+
+
+def read_stata(filepath_or_buffer, index_col=None, sort_rows=False, sort_columns=False,
+               **kwargs):
+    """
+    Reads Stata .dta file and returns an LArray with the contents
+
+    Parameters
+    ----------
+    filepath_or_buffer : str or file-like object
+        Path to .dta file or a file handle.
+    index_col : str or None, optional
+        Name of column to set as index. Defaults to None.
+    sort_rows : bool, optional
+        Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting).
+        This only makes sense in combination with index_col. Defaults to False.
+    sort_columns : bool, optional
+        Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting).
+        Defaults to False.
+
+    Returns
+    -------
+    LArray
+    """
+    df = pd.read_stata(filepath_or_buffer, index_col=index_col, **kwargs)
+    return from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns)

From 6b7d6c695f4947bc78d174d0126844319081ff3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Mon, 5 Nov 2018 15:25:42 +0100
Subject: [PATCH 31/44] implemented LArray.dump(light=True)

---
 larray/core/array.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 7f1a874ca..b2bfaa3de 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -2432,7 +2432,7 @@ def as_table(self, maxlines=None, edgeitems=5, light=False, wide=True, value_nam
                 # returns next line (labels of N-1 first axes + data)
                 yield list(tick) + dataline.tolist()
 
-    def dump(self, header=True, wide=True, value_name='value'):
+    def dump(self, header=True, wide=True, value_name='value', light=False):
         """Dump array as a 2D nested list
 
         Parameters
@@ -2446,6 +2446,9 @@ def dump(self, header=True, wide=True, value_name='value'):
         value_name : str, optional
             Name of the column containing the values (last column) when `wide=False` (see above).
             Not used if header=False. Defaults to 'value'.
+        light : bool, optional
+            Whether or not to hide repeated labels. In other words, only show a label if it is different from the
+            previous one. Defaults to False.
 
         Returns
         -------
@@ -2455,7 +2458,7 @@ def dump(self, header=True, wide=True, value_name='value'):
             # flatten all dimensions except the last one
             return self.data.reshape(-1, self.shape[-1]).tolist()
         else:
-            return list(self.as_table(wide=wide, value_name=value_name))
+            return list(self.as_table(wide=wide, value_name=value_name, light=light))
 
     # XXX: should filter(geo=['W']) return a view by default? (collapse=True)
     # I think it would be dangerous to make it the default

From 44ae2a215aaae6ed37a17a92b5ef4253d3243003 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 17 Oct 2018 10:02:14 +0200
Subject: [PATCH 32/44] implemented LArray.roll (needs changelog and possibly
 invert n)

---
 larray/core/array.py | 47 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/larray/core/array.py b/larray/core/array.py
index b2bfaa3de..0ff2fdfbd 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -6797,6 +6797,53 @@ def shift(self, axis, n=1):
         else:
             return self[:]
 
+    def roll(self, axis=None, n=1):
+        r"""Rolls the cells of the array n-times to the right along axis
+
+        Parameters
+        ----------
+        axis : int, str or Axis, optional
+            Axis along which to roll. Defaults to None (all axes).
+        n : int or LArray, optional
+            Number of positions to roll. Defaults to 1. Use a negative integers to roll left.
+            If n is an LArray the number of positions rolled can vary along the axes of n.
+
+        Returns
+        -------
+        LArray
+
+        Examples
+        --------
+        >>> arr = ndtest('sex=M,F;time=2010..2012')
+        >>> arr
+        sex\time  2010  2011  2012
+               M     0     1     2
+               F     3     4     5
+        >>> arr.roll('time')
+        sex\time  2010  2011  2012
+               M     2     0     1
+               F     5     3     4
+        >>> n = sequence(arr.sex, initial=1)
+        >>> n
+        sex  M  F
+             1  2
+        >>> arr.roll('time', n)
+        sex\time  2010  2011  2012
+               M     2     0     1
+               F     4     5     3
+        """
+        if isinstance(n, (int, np.integer)):
+            axis_idx = None if axis is None else self.axes.index(axis)
+            return LArray(np.roll(self.data, n, axis=axis_idx), self.axes)
+        else:
+            if not isinstance(n, LArray):
+                raise TypeError("n should either be an integer or an LArray")
+            if axis is None:
+                raise TypeError("axis may not be None if n is an LArray")
+            axis = self.axes[axis]
+            seq = sequence(axis)
+            return self[axis.i[(seq - n) % len(axis)]]
+
     # TODO: add support for groups as axis (like aggregates)
     # eg a.diff(x.year[2018:]) instead of a[2018:].diff(x.year)
     def diff(self, axis=-1, d=1, n=1, label='upper'):

From 1668e92f62a544c832d9ddbe55b476159ffde085 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 21 Sep 2018 16:23:42 +0200
Subject: [PATCH 33/44] implemented AxisCollection.iter_labels

---
 larray/core/axis.py | 70 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/larray/core/axis.py b/larray/core/axis.py
index 7ce0ae1d4..7192a5454 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -16,7 +16,7 @@
                                _range_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups)
 from larray.util.oset import *
 from larray.util.misc import (basestring, PY2, unicode, long, duplicates, array_lookup2, ReprString, index_by_id,
-                              renamed_to, common_type, LHDFStore, lazy_attribute, _isnoneslice, unique_multi)
+                              renamed_to, common_type, LHDFStore, lazy_attribute, _isnoneslice, unique_multi, Product)
 
 
 class Axis(ABCAxis):
@@ -1454,6 +1454,74 @@ def __dir__(self):
     def __iter__(self):
         return iter(self._list)
 
+    # TODO: move a few doctests to unit tests
+    def iter_labels(self, axes=None, ascending=True):
+        r"""Returns a view of the axes labels.
+
+        Parameters
+        ----------
+        axes : int, str or Axis or tuple of them, optional
+            Axis or axes along which to iterate and in which order. Defaults to None (all axes in the order they are
+            in the collection).
+        ascending : bool, optional
+            Whether or not to iterate the axes in ascending order (from start to end). Defaults to True.
+
+        Returns
+        -------
+        Sequence
+            An object you can iterate (loop) on and index by position. The precise type of which is considered an
+            implementation detail and should not be relied on.
+
+        Examples
+        --------
+
+        >>> from larray import ndtest
+        >>> axes = ndtest((2, 2)).axes
+        >>> axes
+        AxisCollection([
+            Axis(['a0', 'a1'], 'a'),
+            Axis(['b0', 'b1'], 'b')
+        ])
+        >>> axes.iter_labels()[0]
+        (a.i[0], b.i[0])
+        >>> for index in axes.iter_labels():
+        ...     print(index)
+        (a.i[0], b.i[0])
+        (a.i[0], b.i[1])
+        (a.i[1], b.i[0])
+        (a.i[1], b.i[1])
+        >>> axes.iter_labels(ascending=False)[0]
+        (a.i[1], b.i[1])
+        >>> for index in axes.iter_labels(ascending=False):
+        ...     print(index)
+        (a.i[1], b.i[1])
+        (a.i[1], b.i[0])
+        (a.i[0], b.i[1])
+        (a.i[0], b.i[0])
+        >>> axes.iter_labels(('b', 'a'))[0]
+        (b.i[0], a.i[0])
+        >>> for index in axes.iter_labels(('b', 'a')):
+        ...     print(index)
+        (b.i[0], a.i[0])
+        (b.i[0], a.i[1])
+        (b.i[1], a.i[0])
+        (b.i[1], a.i[1])
+        >>> axes.iter_labels('b')[0]
+        (b.i[0],)
+        >>> for index in axes.iter_labels('b'):
+        ...     print(index)
+        (b.i[0],)
+        (b.i[1],)
+        """
+        axes = self if axes is None else self[axes]
+        if not isinstance(axes, AxisCollection):
+            axes = (axes,)
+        # we need .i because Product uses len and [] on axes and not iter; and [] creates LGroup and not IGroup
+        p = Product([axis.i for axis in axes])
+        if not ascending:
+            p = p[::-1]
+        return p
+
     def __getattr__(self, key):
         try:
             return self._map[key]

From 20d1b70cdf3b43c52a4fe22ea22a7be8d839c78c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 28 Nov 2018 17:54:40 +0100
Subject: [PATCH 34/44] implemented LArray.keys, LArray.values and LArray.items

---
 doc/source/api.rst                      |  13 ++
 doc/source/changes/version_0_30.rst.inc |  63 +++++-
 larray/core/array.py                    | 278 +++++++++++++++++++++++-
 3 files changed, 352 insertions(+), 2 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 44512920a..0fe2384ff 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -230,6 +230,7 @@ LArray
 * :ref:`la_sorting`
 * :ref:`la_reshaping`
 * :ref:`la_testing`
+* :ref:`la_iter`
 * :ref:`la_op`
 * :ref:`la_misc`
 * :ref:`la_to_pandas`
@@ -422,6 +423,18 @@ Testing/Searching
    LArray.labelofmax
    LArray.indexofmax
 
+.. _la_iter:
+
+Iterating
+---------
+
+.. autosummary::
+   :toctree: _generated/
+
+   LArray.keys
+   LArray.values
+   LArray.items
+
 .. _la_op:
 
 Operators
diff --git a/doc/source/changes/version_0_30.rst.inc b/doc/source/changes/version_0_30.rst.inc
index e550fba5a..22d2c26dc 100644
--- a/doc/source/changes/version_0_30.rst.inc
+++ b/doc/source/changes/version_0_30.rst.inc
@@ -134,8 +134,69 @@ New features
     a_b  a0_b1  a1_b2
              1      5
 
-* added a feature (see the :ref:`miscellaneous section <misc>` for details).
+* implemented :py:obj:`LArray.keys()` :py:obj:`LArray.values()` and :py:obj:`LArray.items()`
+  methods to iterate (loop) on an array labels (keys), values or (key, value) pairs.
 
+    >>> arr = ndtest((2, 2))
+    >>> arr
+    a\b  b0  b1
+     a0   0   1
+     a1   2   3
+
+    By default they iterates on all axes, in the order they are in the array.
+
+    >>> for value in arr.values():
+    ...     print(value)
+    0
+    1
+    2
+    3
+    >>> for key in arr.keys():
+    ...     print(key)
+    (a.i[0], b.i[0])
+    (a.i[0], b.i[1])
+    (a.i[1], b.i[0])
+    (a.i[1], b.i[1])
+    >>> for key, value in arr.items():
+    ...     print(key, "->", value)
+    (a.i[0], b.i[0]) -> 0
+    (a.i[0], b.i[1]) -> 1
+    (a.i[1], b.i[0]) -> 2
+    (a.i[1], b.i[1]) -> 3
+
+    You can iterate in reverse order.
+
+    >>> for value in arr.values(ascending=False):
+    ...     print(value)
+    3
+    2
+    1
+    0
+
+    or specify another axis order:
+
+    >>> for value in arr.values(('b', 'a')):
+    ...     print(value)
+    0
+    2
+    1
+    3
+
+    You can also iterate on less axes than the array has.
+
+    >>> # iterate on the "b" axis, that is, return each label along the "b" axis in turn
+    >>> for key in arr.keys('b'):
+    ...     print(key)
+    (b.i[0],)
+    (b.i[1],)
+    >>> # iterate on the array values along the "b" axis.
+    >>> # That is, for each label along the "b" axis, return the corresponding (sub)array
+    >>> for value in arr.values('b'):
+    ...     print(value)
+    a  a0  a1
+        0   2
+    a  a0  a1
+        1   3
 
 * implemented :py:obj:`Axis.apply()` method to transform an axis labels by a function and return a new Axis.
 
diff --git a/larray/core/array.py b/larray/core/array.py
index 0ff2fdfbd..ab91c4685 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -64,7 +64,7 @@
 from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis
 from larray.util.misc import (table2str, size2str, basestring, izip, rproduct, ReprString, duplicates,
                               float_error_handler_factory, _isnoneslice, light_product, unique_list, common_type,
-                              renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute)
+                              renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip)
 
 
 def all(values, axis=None):
@@ -3100,6 +3100,282 @@ def indicesofsorted(self, axis=None, ascending=True, kind='quicksort'):
 
     posargsort = renamed_to(indicesofsorted, 'posargsort')
 
+    # TODO: move some doctests to unit tests
+    # TODO: implement keys_by
+    # TODO: implement expand=True
+    def keys(self, axes=None, ascending=True):
+        r"""Returns a view on the array labels along axes.
+
+        This is an object you can iterate (loop) on or index to get to the Nth label along axes.
+
+        Parameters
+        ----------
+        axes : int, str or Axis or tuple of them, optional
+            Axis or axes along which to iterate and in which order. Defaults to None (all axes in the order they are
+            in the array).
+        ascending : bool, optional
+            Whether or not to iterate the axes in ascending order (from start to end). Defaults to True.
+
+        Returns
+        -------
+        Sequence
+            An object you can iterate (loop) on and index by position to get the Nth label along axes.
+
+        Examples
+        --------
+        >>> arr = ndtest((2, 2))
+        >>> arr
+        a\b  b0  b1
+         a0   0   1
+         a1   2   3
+
+        By default it iterates on all axes, in the order they are in the array.
+
+        >>> arr.keys()[0]
+        (a.i[0], b.i[0])
+        >>> for key in arr.keys():
+        ...     print(key)
+        (a.i[0], b.i[0])
+        (a.i[0], b.i[1])
+        (a.i[1], b.i[0])
+        (a.i[1], b.i[1])
+        >>> arr.keys(ascending=False)[0]
+        (a.i[1], b.i[1])
+        >>> for key in arr.keys(ascending=False):
+        ...     print(key)
+        (a.i[1], b.i[1])
+        (a.i[1], b.i[0])
+        (a.i[0], b.i[1])
+        (a.i[0], b.i[0])
+
+        but you can specify another axis order:
+
+        >>> arr.keys(('b', 'a'))[0]
+        (b.i[0], a.i[0])
+        >>> for key in arr.keys(('b', 'a')):
+        ...     print(key)
+        (b.i[0], a.i[0])
+        (b.i[0], a.i[1])
+        (b.i[1], a.i[0])
+        (b.i[1], a.i[1])
+
+        One can specify less axes than the array has:
+
+        >>> arr.keys('b')[0]
+        (b.i[0],)
+        >>> # iterate on the "b" axis, that is return each label along the "b" axis
+        ... for key in arr.keys('b'):
+        ...     print(key)
+        (b.i[0],)
+        (b.i[1],)
+        """
+        return self.axes.iter_labels(axes, ascending=ascending)
+
+    # TODO: move many doctests to unit tests
+    # TODO: implement values_by
+    def values(self, axes=None, ascending=True, expand=False):
+        r"""Returns a view on the values of the array along axes.
+
+        Parameters
+        ----------
+        axes : int, str or Axis or tuple of them, optional
+            Axis or axes along which to iterate and in which order. Defaults to None (all axes in the order they are
+            in the array).
+        ascending : bool, optional
+            Whether or not to iterate the axes in ascending order (from start to end). Defaults to True.
+        expand : bool, optional
+            Whether or not to expand array using axes. This allows one to iterate on axes which do not exist in
+            the array, which is useful when iterating on several arrays with different axes. Defaults to False.
+
+        Returns
+        -------
+        Sequence
+            An object you can iterate (loop) on and index by position. The precise type of which is considered an
+            implementation detail and should not be relied on.
+
+        Examples
+        --------
+        >>> arr = ndtest((2, 2))
+        >>> arr
+        a\b  b0  b1
+         a0   0   1
+         a1   2   3
+
+        By default it iterates on all axes, in the order they are in the array.
+
+        >>> for value in arr.values():
+        ...     print(value)
+        0
+        1
+        2
+        3
+        >>> arr.values()[0]
+        0
+        >>> arr.values(ascending=False)[0]
+        3
+        >>> arr.values()[-1]
+        3
+        >>> arr.values(('b', 'a'))[1]
+        2
+        >>> arr.values('b')[0]
+        a  a0  a1
+            0   2
+        >>> arr.values('b', ascending=False)[0]
+        a  a0  a1
+            1   3
+        >>> arr[arr.b.i[-1]]
+        a  a0  a1
+            1   3
+        >>> arr['b.i[-1]']
+        a  a0  a1
+            1   3
+        >>> arr.values('b')[-1]
+        a  a0  a1
+            1   3
+        >>> for value in arr.values(ascending=False):
+        ...     print(value)
+        3
+        2
+        1
+        0
+
+        but you can specify another axis order:
+
+        >>> for value in arr.values(('b', 'a')):
+        ...     print(value)
+        0
+        2
+        1
+        3
+
+        When you specify less axes than the array has, you get arrays back:
+
+        >>> # iterate on the "b" axis, that is return the (sub)array for each label along the "b" axis
+        ... for value in arr.values('b'):
+        ...     print(value)
+        a  a0  a1
+            0   2
+        a  a0  a1
+            1   3
+        >>> # iterate on the "c" axis, which does not exist in arr, that is return arr for each label along the "c" axis
+        ... for value in arr.values('c=c0,c1', expand=True):
+        ...     print(value)
+        a\b  b0  b1
+         a0   0   1
+         a1   2   3
+        a\b  b0  b1
+         a0   0   1
+         a1   2   3
+        >>> # iterate on the "b" axis, that is return the (sub)array for each label along the "b" axis
+        ... for value in arr.values('b', ascending=False):
+        ...     print(value)
+        a  a0  a1
+            1   3
+        a  a0  a1
+            0   2
+        """
+        if axes is None:
+            combined = np.ravel(self.data)
+            return combined if ascending else combined[::-1]
+
+        if not isinstance(axes, (tuple, AxisCollection)):
+            axes = (axes,)
+
+        def get_axis(a):
+            if isinstance(a, basestring):
+                return Axis(a) if '=' in a else self.axes[a]
+            elif isinstance(a, int):
+                return self.axes[a]
+            else:
+                assert isinstance(a, Axis)
+                return a
+        axes = [get_axis(a) for a in axes]
+        array = self.expand(axes, readonly=True) if expand else self
+        axes = array.axes[axes]
+        # move axes in front
+        transposed = array.transpose(axes)
+        # combine axes if necessary
+        combined = transposed.combine_axes(axes, wildcard=True) if len(axes) > 1 else transposed
+        # trailing .i is to support the case where axis < self.axes (ie the elements of the result are arrays)
+        return combined.i if ascending else combined.i[::-1].i
+
+    # TODO: move some doctests to unit tests
+    # TODO: we currently return a tuple of groups even for 1D arrays, which can be both a bad or a good thing.
+    # if we returned an NDGroup in all cases, it would solve the problem
+    # TODO: implement expand=True
+    def items(self, axes=None, ascending=True):
+        r"""Returns a (label, value) view of the array along axes.
+
+        This is an object you can iterate (loop) on or index to get to (label, value) couples along axes.
+
+        Parameters
+        ----------
+        axes : int, str or Axis or tuple of them, optional
+            Axis or axes along which to iterate and in which order. Defaults to None (all axes in the order they are
+            in the array).
+        ascending : bool, optional
+            Whether or not to iterate the axes in ascending order (from start to end). Defaults to True.
+
+        Returns
+        -------
+        Sequence
+            An object you can iterate (loop) on and index by position to get the Nth (label, value) couple along axes.
+
+        Examples
+        --------
+        >>> arr = ndtest((2, 2))
+        >>> arr
+        a\b  b0  b1
+         a0   0   1
+         a1   2   3
+
+        By default it iterates on all axes, in the order they are in the array.
+
+        >>> arr.items()[0]
+        ((a.i[0], b.i[0]), 0)
+        >>> for key, value in arr.items():
+        ...     print(key, "->", value)
+        (a.i[0], b.i[0]) -> 0
+        (a.i[0], b.i[1]) -> 1
+        (a.i[1], b.i[0]) -> 2
+        (a.i[1], b.i[1]) -> 3
+        >>> arr.items(ascending=False)[0]
+        ((a.i[1], b.i[1]), 3)
+        >>> for key, value in arr.items(ascending=False):
+        ...     print(key, "->", value)
+        (a.i[1], b.i[1]) -> 3
+        (a.i[1], b.i[0]) -> 2
+        (a.i[0], b.i[1]) -> 1
+        (a.i[0], b.i[0]) -> 0
+
+        but you can specify another axis order:
+
+        >>> arr.items(('b', 'a'))[0]
+        ((b.i[0], a.i[0]), 0)
+        >>> for key, value in arr.items(('b', 'a')):
+        ...     print(key, "->", value)
+        (b.i[0], a.i[0]) -> 0
+        (b.i[0], a.i[1]) -> 2
+        (b.i[1], a.i[0]) -> 1
+        (b.i[1], a.i[1]) -> 3
+
+        When you specify less axes than the array has, you get arrays back:
+
+        >>> arr.items('b')[0]
+        ((b.i[0],), a  a0  a1
+            0   2)
+        >>> # iterate on the "b" axis, that is return the (sub)array for each label along the "b" axis
+        ... for key, value in arr.items('b'):
+        ...     print(key, value, sep="\n")
+        (b.i[0],)
+        a  a0  a1
+            0   2
+        (b.i[1],)
+        a  a0  a1
+            1   3
+        """
+        return SequenceZip((self.keys(axes, ascending=ascending), self.values(axes, ascending=ascending)))
+
     def copy(self):
         """Returns a copy of the array.
         """

From 35dadc43e50cdb915a1089f4247f4501433af05f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Tue, 27 Nov 2018 10:45:31 +0100
Subject: [PATCH 35/44] implemented LArray.flat

---
 larray/core/array.py | 49 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/larray/core/array.py b/larray/core/array.py
index ab91c4685..559761cbd 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -386,6 +386,26 @@ def __setitem__(self, key, value):
         self.array.__setitem__(self._prepare_key(key, wildcard=True), value, translate_key=False)
 
 
+class LArrayFlatIndexer(object):
+    __slots__ = ('array',)
+
+    def __init__(self, array):
+        self.array = array
+
+    def __getitem__(self, flat_key, sep='_'):
+        axes = self.array.axes
+        key = np.unravel_index(flat_key, axes.shape)
+        la_key = axes._adv_keys_to_combined_axis_la_keys(key, sep=sep)
+        return self.array.__getitem__(la_key, translate_key=False)
+
+    def __setitem__(self, flat_key, value):
+        # np.ndarray.flat is a flatiter object but it is indexable despite the name
+        self.array.data.flat[flat_key] = value
+
+    def __len__(self):
+        return self.array.size
+
+
 # TODO: rename to LArrayIndexPointsIndexer or something like that
 class LArrayPositionalPointsIndexer(object):
     __slots__ = ('array',)
@@ -3376,6 +3396,35 @@ def items(self, axes=None, ascending=True):
         """
         return SequenceZip((self.keys(axes, ascending=ascending), self.values(axes, ascending=ascending)))
 
+    # XXX: rename to iflat instead?
+    @lazy_attribute
+    def flat(self):
+        r"""Access the array by index as if it was flattened (all its axes were combined)
+
+        Examples
+        --------
+        >>> arr = ndtest((2, 3)) * 10
+        >>> arr
+        a\b  b0  b1  b2
+         a0   0  10  20
+         a1  30  40  50
+
+        To select the first, second, fourth and fifth values across all axes:
+
+        >>> arr.flat[[0, 1, 3, 4]]
+        a_b  a0_b0  a0_b1  a1_b0  a1_b1
+                 0     10     30     40
+
+        Set the first and sixth values to 42
+
+        >>> arr.flat[[0, 5]] = 42
+        >>> arr
+        a\b  b0  b1  b2
+         a0  42  10  20
+         a1  30  40  42
+        """
+        return LArrayFlatIndexer(self)
+
     def copy(self):
         """Returns a copy of the array.
         """

From 0c9dc01118467b1de389f5f9d1cb3389345526b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Tue, 27 Nov 2018 10:45:46 +0100
Subject: [PATCH 36/44] implemented LArray.unique

---
 doc/source/api.rst                      |  1 +
 doc/source/changes/version_0_30.rst.inc | 34 +++++++++
 larray/core/array.py                    | 94 +++++++++++++++++++++++++
 larray/tests/test_array.py              | 16 +++++
 4 files changed, 145 insertions(+)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 0fe2384ff..fbb7ba153 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -456,6 +456,7 @@ Miscellaneous
    LArray.clip
    LArray.shift
    LArray.diff
+   LArray.unique
    LArray.to_clipboard
 
 .. _la_to_pandas:
diff --git a/doc/source/changes/version_0_30.rst.inc b/doc/source/changes/version_0_30.rst.inc
index 22d2c26dc..a1b00de1b 100644
--- a/doc/source/changes/version_0_30.rst.inc
+++ b/doc/source/changes/version_0_30.rst.inc
@@ -134,6 +134,40 @@ New features
     a_b  a0_b1  a1_b2
              1      5
 
+* implemented :py:obj:`LArray.unique()` method to compute unique values for an array, optionally
+  along axes.
+
+    >>> arr = LArray([[0, 2, 0, 0],
+    ...               [1, 1, 1, 0]], 'a=a0,a1;b=b0..b3')
+    >>> arr
+    a\b  b0  b1  b2  b3
+     a0   0   2   0   0
+     a1   1   1   1   0
+
+    By default unique() returns the first occurrence of each unique value in the order it appears:
+
+    >>> arr.unique()
+    a_b  a0_b0  a0_b1  a1_b0
+             0      2      1
+
+    To sort the unique values, use the sort argument:
+
+    >>> arr.unique(sort=True)
+    a_b  a0_b0  a1_b0  a0_b1
+             0      1      2
+
+    One can also compute unique sub-arrays (i.e. combination of values) along axes. In our example the a0=0, a1=1
+    combination appears twice along the 'b' axis, so 'b2' is not returned:
+
+    >>> arr.unique('b')
+    a\b  b0  b1  b3
+     a0   0   2   0
+     a1   1   1   0
+    >>> arr.unique('b', sort=True)
+    a\b  b3  b0  b1
+     a0   0   0   2
+     a1   0   1   1
+
 * implemented :py:obj:`LArray.keys()` :py:obj:`LArray.values()` and :py:obj:`LArray.items()`
   methods to iterate (loop) on an array labels (keys), values or (key, value) pairs.
 
diff --git a/larray/core/array.py b/larray/core/array.py
index 559761cbd..c9f293cf3 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -3430,6 +3430,100 @@ def copy(self):
         """
         return LArray(self.data.copy(), axes=self.axes[:], meta=self.meta)
 
+    # XXX: we might want to implement this using .groupby().first()
+    def unique(self, axes=None, sort=False, sep='_'):
+        r"""Returns unique values (optionally along axes)
+
+        Parameters
+        ----------
+        axes : axis reference (int, str, Axis) or sequence of them, optional
+            Axis or axes along which to compute unique values. Defaults to None (all axes).
+        sort : bool, optional
+            Whether or not to sort unique values. Defaults to False. Sorting is not implemented yet for unique() along
+            multiple axes.
+        sep : str, optional
+            Separator when several labels need to be combined. Defaults to '_'.
+
+        Returns
+        -------
+        LArray
+            array with unique values
+
+        Examples
+        --------
+        >>> arr = LArray([[0, 2, 0, 0],
+        ...               [1, 1, 1, 0]], 'a=a0,a1;b=b0..b3')
+        >>> arr
+        a\b  b0  b1  b2  b3
+         a0   0   2   0   0
+         a1   1   1   1   0
+
+        By default unique() returns the first occurrence of each unique value in the order it appears:
+
+        >>> arr.unique()
+        a_b  a0_b0  a0_b1  a1_b0
+                 0      2      1
+
+        To sort the unique values, use the sort argument:
+
+        >>> arr.unique(sort=True)
+        a_b  a0_b0  a1_b0  a0_b1
+                 0      1      2
+
+        One can also compute unique sub-arrays (i.e. combination of values) along axes. In our example the a0=0, a1=1
+        combination appears twice along the 'b' axis, so 'b2' is not returned:
+
+        >>> arr.unique('b')
+        a\b  b0  b1  b3
+         a0   0   2   0
+         a1   1   1   0
+        >>> arr.unique('b', sort=True)
+        a\b  b3  b0  b1
+         a0   0   0   2
+         a1   0   1   1
+        """
+        if axes is not None:
+            axes = self.axes[axes]
+
+        assert axes is None or isinstance(axes, (Axis, AxisCollection))
+
+        if not isinstance(axes, AxisCollection):
+            axis_idx = self.axes.index(axes) if axes is not None else None
+            # axis needs np >= 1.13
+            _, unq_index = np.unique(self, axis=axis_idx, return_index=True)
+            if not sort:
+                unq_index = np.sort(unq_index)
+            if axes is None:
+                return self.flat.__getitem__(unq_index, sep=sep)
+            else:
+                return self[axes.i[unq_index]]
+        else:
+            if sort:
+                raise NotImplementedError('sort=True is not implemented for unique along multiple axes')
+            unq_list = []
+            seen = set()
+            list_append = unq_list.append
+            seen_add = seen.add
+            sep_join = sep.join
+            axis_name = sep_join(a.name for a in axes)
+            first_axis_idx = self.axes.index(axes[0])
+            # XXX: use combine_axes(axes).items() instead?
+            for labels, value in self.items(axes):
+                hashable_value = value.data.tobytes() if isinstance(value, LArray) else value
+                if hashable_value not in seen:
+                    list_append((sep_join(str(l) for l in labels), value))
+                    seen_add(hashable_value)
+            res_arr = stack(unq_list, axis_name)
+            # transpose the combined axis at the position where the first of the combined axes was
+            # TODO: use res_arr.transpose(res_arr.axes.move_axis(-1, first_axis_idx)) once #564 is implemented:
+            #       https://github.com/larray-project/larray/issues/564
+            # stack adds the stacked axes at the end
+            combined_axis = res_arr.axes[-1]
+            assert combined_axis.name == axis_name
+            new_axes_order = res_arr.axes - combined_axis
+            new_axes_order.insert(first_axis_idx, combined_axis)
+            return res_arr.transpose(new_axes_order)
+
     @property
     def info(self):
         """Describes a LArray (metadata + shape and labels for each axis).
diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py
index c54497c09..6026cffba 100644
--- a/larray/tests/test_array.py
+++ b/larray/tests/test_array.py
@@ -2849,6 +2849,22 @@ def test_shift_axis(small_array):
     l2.axes.lipro.labels = lipro.labels[1:]
 
 
+def test_unique():
+    arr = LArray([[[0, 2, 0, 0],
+                   [1, 1, 1, 0]],
+                  [[0, 2, 0, 0],
+                   [2, 1, 2, 0]]], 'a=a0,a1;b=b0,b1;c=c0..c3')
+    assert_array_equal(arr.unique('a'), arr)
+    assert_array_equal(arr.unique('b'), arr)
+    assert_array_equal(arr.unique('c'), arr['c0,c1,c3'])
+    expected = from_string("""\
+a_b\\c  c0  c1  c2  c3
+a0_b0   0   2   0   0
+a0_b1   1   1   1   0
+a1_b1   2   1   2   0""")
+    assert_array_equal(arr.unique(('a', 'b')), expected)
+
+
 def test_extend(small_array):
     sex, lipro = small_array.axes
 

From 53d1581c7720b80d3efde9cde42ad5783d5e0922 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Tue, 25 Sep 2018 09:20:14 +0200
Subject: [PATCH 37/44] WIP: generalized stack to more than one dimension
 (needs tests & finish the changelog)

works for both stack([(ndkey, value), ...], axis=axes) and stack({ndkey: value}, several_axes)

TODO: we might want to implement nested dicts before advertising this!
---
 doc/source/changes/version_0_30.rst.inc |  19 +++
 larray/core/array.py                    | 200 +++++++++++++++++++-----
 larray/core/axis.py                     |  64 +++++++-
 3 files changed, 244 insertions(+), 39 deletions(-)

diff --git a/doc/source/changes/version_0_30.rst.inc b/doc/source/changes/version_0_30.rst.inc
index a1b00de1b..e6477ee95 100644
--- a/doc/source/changes/version_0_30.rst.inc
+++ b/doc/source/changes/version_0_30.rst.inc
@@ -287,6 +287,25 @@ Miscellaneous improvements
      A0   0   1
      A1   2   3
 
+* py:obj:`stack()` can now stack along several axes at once.
+
+    >>> gender = Axis('gender=M,F')
+    >>> country = Axis('country=BE,FR,DE')
+    >>> stack({('BE', 'M'): 0,
+    ...        ('FR', 'F'): 2,
+    ...        ('BE', 'F'): 2,
+    ...        ('FR', 'M'): 2,
+    ...        ('DE', 'M'): 2,
+    ...        ('DE', 'F'): 2},
+    ...       (gender, country))
+    FIXME: this is not correct
+    sex  nat\test   T1   T2
+      M        BE  1.0  0.0
+      M        FO  0.0  1.0
+      F        BE  1.0  0.0
+      F        FO  0.0  1.0
+
+
 * added option ``exact`` to ``join`` argument of :py:obj:`Axis.align()` and :py:obj:`LArray.align()` methods.
   Instead of aligning, passing ``join='exact'`` to the ``align`` method will raise an error when axes are not equal.
   Closes :issue:`338`.
diff --git a/larray/core/array.py b/larray/core/array.py
index c9f293cf3..02253d274 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -8,8 +8,9 @@
 # ? implement multi group in one axis getitem: lipro['P01,P02;P05'] <=> (lipro['P01,P02'], lipro['P05'])
 
 # * we need an API to get to the "next" label. Sometimes, we want to use label+1, but that is problematic when labels
-#   are not numeric, or have not a step of 1. X.agegroup[X.agegroup.after(25):]
-#                                             X.agegroup[X.agegroup[25].next():]
+#   are not numeric, or have not a step of 1.
+#       X.agegroup[X.agegroup.after(25):]
+#       X.agegroup[X.agegroup[25].next():]
 
 # * implement keepaxes=True for _group_aggregate instead of/in addition to group tuples
 
@@ -25,10 +26,7 @@
 
 # * test structured arrays
 
-# ? move "utils" to its own project (so that it is not duplicated between larray and liam2)
-#   OR
-#   include utils only in larray project and make larray a dependency of liam2
-#   (and potentially rename it to reflect the broader scope)
+# * use larray "utils" in LIAM2 (to avoid duplicated code)
 
 from collections import Iterable, Sequence, OrderedDict
 from itertools import product, chain, groupby, islice
@@ -8656,7 +8654,69 @@ def eye(rows, columns=None, k=0, title=None, dtype=None, meta=None):
 #       ('FR', 'M'): 2, ('FR', 'F'): 3,
 #       ('DE', 'M'): 4, ('DE', 'F'): 5})
 
+# for 2D, I think the best compromise is the nested dict (especially for python 3.7+):
 
+# stack({'BE': {'M': 0, 'F': 1},
+#        'FR': {'M': 2, 'F': 3},
+#        'DE': {'M': 4, 'F': 5}}, axes=('nationality', 'sex'))
+
+# we could make this valid too (combine pos and labels) but I don't think it worth it unless it comes
+# naturally from the implementation:
+
+# stack({'BE': {'M,F': [0, 1]},
+#        'FR': {'M,F': [2, 3]},
+#        'DE': {'M,F': [4, 5]}}, axes=('nationality', 'sex'))
+
+# It looks especially nice if the labels have been extracted to variables:
+
+# BE, FR, DE = nat['BE,FR,DE']
+# M, F = sex['M,F']
+
+# stack({BE: {M: 0, F: 1},
+#        FR: {M: 2, F: 3},
+#        DE: {M: 4, F: 5}})
+
+# for 3D:
+
+# stack({'a0': {'b0': {'c0':  0, 'c1':  1},
+#               'b1': {'c0':  2, 'c1':  3},
+#               'b2': {'c0':  4, 'c1':  5}},
+#        'a1': {'b0': {'c0':  6, 'c1':  7},
+#               'b1': {'c0':  8, 'c1':  9},
+#               'b2': {'c0': 10, 'c1': 11}}},
+#       axes=('a', 'b', 'c'))
+
+# a0, a1 = a['a0,a1']
+# b0, b1, b2 = b['b0,b1,b2']
+# c0, c1 = c['c0,c1']
+
+# stack({a0: {b0: {c0:  0, c1:  1},
+#             b1: {c0:  2, c1:  3},
+#             b2: {c0:  4, c1:  5}},
+#        a1: {b0: {c0:  6, c1:  7},
+#             b1: {c0:  8, c1:  9},
+#             b2: {c0: 10, c1: 11}}},
+#       axes=(a, b, c))
+
+# if we implement:
+#     arr[key] = {'a0': 0, 'a1': 1}
+# where key must not be related to the "a" axis
+# if would make it relatively easy to implement the nested dict syntax I think:
+# first do a pass at the structure to get axes (if not provided) then:
+#     for k, v in d.items():
+#         arr[k] = v
+# but that syntax could be annoying if we want to have an array of dicts
+
+# alternatives:
+
+# arr['a0'] = 0; arr['a1'] = 1 # <-- this already works
+# arr['a0,a1'] = [0, 1]        # <-- unsure if this works, but we should make it work (it is annoying if we
+#                              #     have an array of lists
+# arr[:] = {'a0': 0, 'a1': 1}
+# arr[:] = stack({'a0': 0, 'a1': 1}) # <-- not equivalent if a has more labels
+
+
+# TODO: rename axis to axes (with deprecation)
 def stack(elements=None, axis=None, title=None, meta=None, dtype=None, **kwargs):
     r"""
     Combines several arrays or sessions along an axis.
@@ -8670,8 +8730,8 @@ def stack(elements=None, axis=None, title=None, meta=None, dtype=None, **kwargs)
 
         Stacking sessions will return a new session containing the arrays of all sessions stacked together. An array
         missing in a session will be replaced by NaN.
-    axis : str or Axis or Group, optional
-        Axis to create. If None, defaults to a range() axis.
+    axis : str or Axis or Group or tuple/AxisCollection of Axis, optional
+        Axes to create. If None, defaults to a range() axis.
     title : str, optional
         Deprecated. See 'meta' below.
     meta : list of pairs or dict or OrderedDict or Metadata, optional
@@ -8704,6 +8764,8 @@ def stack(elements=None, axis=None, title=None, meta=None, dtype=None, **kwargs)
     sex\nat   BE   FO
           M  1.0  0.0
           F  1.0  0.0
+
+    >>> # TODO: move this to a unit test
     >>> all_nat = Axis('nat=BE,DE,FR,NL,UK')
     >>> stack({'BE': arr1, 'DE': arr2}, all_nat[:'DE'])
     sex\nat   BE   DE
@@ -8756,6 +8818,20 @@ def stack(elements=None, axis=None, title=None, meta=None, dtype=None, **kwargs)
           M  1.0  0.0
           F  1.0  0.0
 
+    Stack can also stack along several axes
+
+    >>> test = Axis('test=T1,T2')
+    >>> stack({('BE', 'T1'): arr1,
+    ...        ('BE', 'T2'): arr2,
+    ...        ('FO', 'T1'): arr2,
+    ...        ('FO', 'T2'): arr1},
+    ...       (nat, test))
+    sex  nat\test   T1   T2
+      M        BE  1.0  0.0
+      M        FO  0.0  1.0
+      F        BE  1.0  0.0
+      F        FO  0.0  1.0
+
     To stack sessions, let us first create two test sessions. For example suppose we have a session storing the results
     of a baseline simulation:
 
@@ -8796,67 +8872,115 @@ def stack(elements=None, axis=None, title=None, meta=None, dtype=None, **kwargs)
     elif kwargs:
         raise TypeError("stack() accept either keyword arguments OR a collection of elements, not both")
 
-    if isinstance(axis, Axis) and all(isinstance(e, tuple) for e in elements):
-        assert all(len(e) == 2 for e in elements)
-        elements = {k: v for k, v in elements}
-
     if isinstance(elements, LArray):
         if axis is None:
             axis = -1
         axis = elements.axes[axis]
-        values = [elements[k] for k in axis]
+        items = elements.items(axis)
     elif isinstance(elements, dict):
+        axis_tuple = isinstance(axis, tuple) and all(isinstance(a, Axis) for a in axis)
+        axis_seq = isinstance(axis, AxisCollection) or axis_tuple
         # TODO: support having no Axis object for Python3.7 (without error or warning)
         # XXX: we probably want to support this with a warning on Python < 3.7
-        assert isinstance(axis, Axis)
-        values = [elements[v] for v in axis.labels]
+        assert isinstance(axis, Axis) or axis_seq
+        if not isinstance(axis, AxisCollection):
+            axis = AxisCollection(axis)
+
+        # this assumes we support non complete axes
+        # items = [(axis.to_igroup(k), v) for k, v in elements.items()]
+
+        # translate elements keys to a group or tuple of groups so that they are compatible with
+        # what iter_labels gives us
+        # FIXME: we must also reorder translated keys otherwise if axis order is different it does not match
+        #     e.g.
+        #     >>> gender = Axis('gender=M,F')
+        #     >>> country = Axis('country=BE,FR,DE')
+        #     >>> stack({('BE', 'M'): 0,
+        #     ...        ('FR', 'F'): 2,
+        #     ...        ('BE', 'F'): 2,
+        #     ...        ('FR', 'M'): 2,
+        #     ...        ('DE', 'M'): 2,
+        #     ...        ('DE', 'F'): 2},
+        #     ...       (gender, country))
+        # solution: in to_igroup, go via dict then tuple of igroups or slice(None)
+        elements = {axis.to_igroup(k): v for k, v in elements.items()}
+        items = [(k, elements[k]) for k in axis.iter_labels()]
     elif isinstance(elements, Iterable):
         if not isinstance(elements, Sequence):
             elements = list(elements)
 
         if all(isinstance(e, tuple) for e in elements):
             assert all(len(e) == 2 for e in elements)
-            keys = [k for k, v in elements]
-            values = [v for k, v in elements]
-            assert all(np.isscalar(k) for k in keys)
-            # this case should already be handled
-            assert not isinstance(axis, Axis)
-            # axis should be None or str
-            axis = Axis(keys, axis)
+            items = elements
+            if axis is None or isinstance(axis, basestring):
+                keys = [k for k, v in elements]
+                # assert that all keys are indexers
+                assert all(np.isscalar(k) or isinstance(k, (Group, tuple)) for k in keys)
+                axis = Axis(keys, axis)
         else:
-            values = elements
             if axis is None or isinstance(axis, basestring):
                 axis = Axis(len(elements), axis)
             else:
                 assert len(axis) == len(elements)
+            items = list(zip(axis, elements))
     else:
         raise TypeError('unsupported type for arrays: %s' % type(elements).__name__)
 
-    if any(isinstance(v, Session) for v in values):
-        sessions = values
-        if not all(isinstance(s, Session) for s in sessions):
+    if any(isinstance(v, Session) for k, v in items):
+        if not all(isinstance(v, Session) for k, v in items):
             raise TypeError("stack() only supports stacking Session with other Session objects")
 
-        all_keys = unique_multi(s.keys() for s in sessions)
-        res = []
-        for name in all_keys:
+        array_names = unique_multi(sess.keys() for sess_name, sess in items)
+
+        def stack_one(array_name):
             try:
-                stacked = stack([s.get(name, nan) for s in sessions], axis=axis)
+                return stack([(sess_name, sess.get(array_name, nan))
+                              for sess_name, sess in items], axis=axis)
             # TypeError for str arrays, ValueError for incompatible axes, ...
             except Exception:
-                stacked = nan
-            res.append((name, stacked))
-        return Session(res, meta=meta)
+                return nan
+
+        return Session([(name, stack_one(name)) for name in array_names], meta=meta)
     else:
-        # XXX : use concat?
         values = [aslarray(v) if not np.isscalar(v) else v
-                  for v in values]
-        result_axes = AxisCollection.union(*[get_axes(v) for v in values])
-        result_axes.append(axis)
+                  for k, v in items]
+
+        # we need a kludge to support stacking along an anonymous axis because AxisCollection.extend (and thus
+        # AxisCollection.union support for anonymous axes is kinda messy).
+        if isinstance(axis, Axis) and axis.name is None:
+            axis = axis.rename('__anonymous__')
+            kludge = True
+        else:
+            kludge = False
+
+        # XXX: with the current semantics of stack, we need to compute the union of axes for values but axis
+        #      needs to be added unconditionally. We *might* want to change the semantics to mean either stack or
+        #      concat depending on whether or not the axis already exists.
+        #      this would be more convenient for users I think, but would mean one class of error we cannot detect
+        #      anymore: if a user unintentionally stacks an array with the axis already present.
+        #      (this is very similar to the debate about combining LArray.append and LArray.extend)
+        all_axes = [get_axes(v) for v in values] + [axis]
+        result_axes = AxisCollection.union(*all_axes)
+        if kludge:
+            # TODO: use AxisCollection.rename when it will exist
+            result_axes = result_axes.replace(axis, axis.rename(None))
+
         if dtype is None:
             dtype = common_type(values)
+
+        # XXX: if we want to support partial axes, we need to use full with a fillvalue
+        #      but only if not entirely filled. How do we check that efficiently?
+        #      *assuming* axes do not contain duplicate labels, we could check that
+        #      len(unique(keys)) == result_axes.size but that would be expensive
+        #      Note that we can translate the keys first then check if it fills the whole array
+        #      as it will be faster to compare ints than strings.
+        #      a quick check would be len(items) == result_axes.size but that isn't very robust
         result = empty(result_axes, dtype=dtype, meta=meta)
-        for k, v in zip(axis, values):
+
+        # FIXME: this is *much* faster but it only works for scalars and not for stacking arrays
+        # keys = tuple(zip(*[k for k, v in items]))
+        # result.points[keys] = values
+        for k, v in items:
             result[k] = v
         return result
 
diff --git a/larray/core/axis.py b/larray/core/axis.py
index 7192a5454..b04fb73af 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -1956,7 +1956,11 @@ def check_compatible(self, axes):
             if not local_axis.iscompatible(axis):
                 raise ValueError("incompatible axes:\n{!r}\nvs\n{!r}".format(axis, local_axis))
 
-    # TODO: deprecate method. union is enough
+    # XXX: deprecate method (functionality is duplicated in union)?
+    #      I am not so sure anymore we need to actually deprecate the method: having both methods with the same
+    #      semantic like we currently have is useless indeed but I think we should have both a set-like method (union)
+    #      and the possibility to add an axis unconditionally (append or extend). That is, add an axis, even if that
+    #      name already exists. This is especially important for anonymous axes (see my comments in stack for example)
     # TODO: deprecate validate argument (unused)
     # TODO: deprecate replace_wildcards argument (unused)
     def extend(self, axes, validate=True, replace_wildcards=False):
@@ -2470,6 +2474,62 @@ def _translate_axis_key(self, axis_key, bool_passthrough=True):
         else:
             return self._translate_axis_key_chunk(axis_key, bool_passthrough)
 
+    def to_igroup(self, key):
+        """
+        Transforms any key (from LArray.__get|setitem__) to a complete indices-based group key.
+
+        Parameters
+        ----------
+        key : scalar, list/array of scalars, Group or tuple or dict of them
+            any key supported by LArray.__get|setitem__
+
+        Returns
+        -------
+        tuple of IGroup
+            len(tuple) == len(key) if isinstance(key, tuple) else 1
+        """
+        from .array import LArray
+
+        # convert scalar keys to 1D keys
+        if not isinstance(key, (tuple, dict)):
+            key = (key,)
+
+        # FIXME: add support for dict key
+
+        # always the case except if key is a dict
+        if isinstance(key, tuple):
+            key = tuple(axis_key.evaluate(self) if isinstance(axis_key, ExprNode) else axis_key
+                        for axis_key in key)
+
+            nonboolkey = []
+            for axis_key in key:
+                if isinstance(axis_key, np.ndarray) and np.issubdtype(axis_key.dtype, np.bool_):
+                    if axis_key.shape != self.shape:
+                        raise ValueError("boolean key with a different shape ({}) than array ({})"
+                                         .format(axis_key.shape, self.shape))
+                    axis_key = LArray(axis_key, self)
+
+                if isinstance(axis_key, LArray) and np.issubdtype(axis_key.dtype, np.bool_):
+                    extra_key_axes = axis_key.axes - self
+                    if extra_key_axes:
+                        raise ValueError("subset key contains more axes ({}) than array ({})"
+                                         .format(axis_key.axes, self))
+                    nonboolkey.extend(axis_key.nonzero())
+                else:
+                    nonboolkey.append(axis_key)
+            key = tuple(nonboolkey)
+
+            # drop slice(None) and Ellipsis since they are meaningless because of guess_axis.
+            # XXX: we might want to raise an exception when we find Ellipses or (most) slice(None) because except for
+            #      a single slice(None) a[:], I don't think there is any point.
+            key = [axis_key for axis_key in key
+                   if not _isnoneslice(axis_key) and axis_key is not Ellipsis]
+
+            # translate all keys to IGroup
+            return tuple(self._translate_axis_key(axis_key) for axis_key in key)
+        else:
+            raise ValueError('dict key not supported for now')
+
     def _translated_key(self, key):
         """
         Transforms any key (from LArray.__get|setitem__) to a complete indices-based key.
@@ -2487,6 +2547,8 @@ def _translated_key(self, key):
             This key is not yet usable as is in a numpy array as it can still contain LArray parts and the advanced key
             parts are not broadcasted together yet.
         """
+        # FIXME: use to_igroup
+
         from .array import LArray
 
         # convert scalar keys to 1D keys

From 33e215c0970998aafe12eea842f864f78b56fc24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 12 Dec 2018 17:28:38 +0100
Subject: [PATCH 38/44] implemented zip_array_values and zip_array_items

---
 larray/core/array.py | 341 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 310 insertions(+), 31 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 02253d274..5891f9129 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -28,8 +28,8 @@
 
 # * use larray "utils" in LIAM2 (to avoid duplicated code)
 
-from collections import Iterable, Sequence, OrderedDict
-from itertools import product, chain, groupby, islice
+from collections import Iterable, Sequence, OrderedDict, abc
+from itertools import product, chain, groupby, islice, repeat
 import os
 import sys
 import functools
@@ -62,8 +62,8 @@
 from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis
 from larray.util.misc import (table2str, size2str, basestring, izip, rproduct, ReprString, duplicates,
                               float_error_handler_factory, _isnoneslice, light_product, unique_list, common_type,
-                              renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip)
-
+                              renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip,
+                              Repeater)
 
 def all(values, axis=None):
     """
@@ -3191,7 +3191,7 @@ def keys(self, axes=None, ascending=True):
 
     # TODO: move many doctests to unit tests
     # TODO: implement values_by
-    def values(self, axes=None, ascending=True, expand=False):
+    def values(self, axes=None, ascending=True):
         r"""Returns a view on the values of the array along axes.
 
         Parameters
@@ -3201,9 +3201,6 @@ def values(self, axes=None, ascending=True, expand=False):
             in the array).
         ascending : bool, optional
             Whether or not to iterate the axes in ascending order (from start to end). Defaults to True.
-        expand : bool, optional
-            Whether or not to expand array using axes. This allows one to iterate on axes which do not exist in
-            the array, which is useful when iterating on several arrays with different axes. Defaults to False.
 
         Returns
         -------
@@ -3275,15 +3272,6 @@ def values(self, axes=None, ascending=True, expand=False):
             0   2
         a  a0  a1
             1   3
-        >>> # iterate on the "c" axis, which does not exist in arr, that is return arr for each label along the "c" axis
-        ... for value in arr.values('c=c0,c1', expand=True):
-        ...     print(value)
-        a\b  b0  b1
-         a0   0   1
-         a1   2   3
-        a\b  b0  b1
-         a0   0   1
-         a1   2   3
         >>> # iterate on the "b" axis, that is return the (sub)array for each label along the "b" axis
         ... for value in arr.values('b', ascending=False):
         ...     print(value)
@@ -3294,24 +3282,15 @@ def values(self, axes=None, ascending=True, expand=False):
         """
         if axes is None:
             combined = np.ravel(self.data)
+            # contrary to what I thought, combined[::-1] *is* indexable
             return combined if ascending else combined[::-1]
 
-        if not isinstance(axes, (tuple, AxisCollection)):
+        if not isinstance(axes, (tuple, list, AxisCollection)):
             axes = (axes,)
 
-        def get_axis(a):
-            if isinstance(a, basestring):
-                return Axis(a) if '=' in a else self.axes[a]
-            elif isinstance(a, int):
-                return self.axes[a]
-            else:
-                assert isinstance(a, Axis)
-                return a
-        axes = [get_axis(a) for a in axes]
-        array = self.expand(axes, readonly=True) if expand else self
-        axes = array.axes[axes]
+        axes = self.axes[axes]
         # move axes in front
-        transposed = array.transpose(axes)
+        transposed = self.transpose(axes)
         # combine axes if necessary
         combined = transposed.combine_axes(axes, wildcard=True) if len(axes) > 1 else transposed
         # trailing .i is to support the case where axis < self.axes (ie the elements of the result are arrays)
@@ -3320,7 +3299,6 @@ def get_axis(a):
     # TODO: move some doctests to unit tests
     # TODO: we currently return a tuple of groups even for 1D arrays, which can be both a bad or a good thing.
     # if we returned an NDGroup in all cases, it would solve the problem
-    # TODO: implement expand=True
     def items(self, axes=None, ascending=True):
         r"""Returns a (label, value) view of the array along axes.
 
@@ -7646,6 +7624,305 @@ def split_axes(self, axes=None, sep='_', names=None, regex=None, sort=False, fil
         return array
     split_axis = renamed_to(split_axes, 'split_axis')
 
+    # FIXME: implement apply_by (this might be this function) and apply
+    def apply(self, transform, axes=None, dtype=None, ascending=True, args=(), **kwargs):
+        r"""
+        Apply func to array elements along axes.
+
+        Parameters
+        ----------
+        transform : function or mapping
+            Function or mapping to apply to elements of the array.
+            The axes and dtype of all results must be the same. Functions will be called with the original value
+            as first argument and must return a single new value. A mapping (dict) must have the values to transform
+            as keys and the new values as values, that is: {<oldvalue1>: <newvalue1>, <oldvalue2>: <newvalue2>, ...}.
+        axes : str, int or Axis or tuple/list/AxisCollection of the them, optional
+            Axis or axes along which to operate. Defaults to None (all axes).
+            Using the axes argument only works with a function transform.
+        dtype : type, optional
+            Output dtype. Defaults to None (inspect all output values to infer it automatically).
+        ascending : bool, optional
+            Whether or not to iterate the axes in ascending order (from start to end). Defaults to True.
+        args : tuple, optional
+            Extra arguments to pass to the function. Defaults to ().
+        **kwargs
+            Extra keyword arguments are passed to the function (as keyword arguments).
+
+        Returns
+        -------
+        LArray or scalar
+            Axes will be the union of those in axis and those of values returned by the function.
+
+        Notes
+        -----
+        To apply a transformation given as an LArray (with current values as labels on one axis of
+        the array and desired values as the array values), you can use: ``mapping_arr[original_arr]``.
+
+        Examples
+        --------
+        First let us define a test array
+
+        >>> arr = LArray([[0, 2, 1],
+        ...               [3, 1, 5]], 'a=a0,a1;b=b0..b2')
+        >>> arr
+        a\b  b0  b1  b2
+         a0   0   2   1
+         a1   3   1   5
+
+        Here is a simple function we would like to apply to each element of the array.
+        Note that this particular example should rather be written as: arr ** 2
+        as it is both more concise and much faster.
+
+        >>> def square(x):
+        ...     return x ** 2
+        >>> arr.apply(square)
+        a\b  b0  b1  b2
+         a0   0   4   1
+         a1   9   1  25
+
+        Now, assuming for a moment that the values of our test array above were in fact some numeric representation of
+        names and we had the correspondence to the actual names stored in a dictionary:
+
+        >>> code_to_names = {0: 'foo', 1: 'bar', 2: 'baz',
+        ...                  3: 'boo', 4: 'far', 5: 'faz'}
+
+        We could get back an array with the actual names by using:
+
+        >>> arr.apply(code_to_names)
+        a\b   b0   b1   b2
+         a0  foo  baz  bar
+         a1  boo  bar  faz
+
+        Functions can also be applied along some axes:
+
+        >>> # this is equivalent to (but much slower than): arr.sum_by('a')
+        ... arr.apply(sum, 'a')
+        a  a0  a1
+            3   9
+
+        Applying the function along some axes will return an array with the
+        union of those axes and the axes of the returned values. For example,
+        let us define a function which returns the k highest values of an array.
+
+        >>> def topk(a, k=2):
+        ...     return a.sort_values(ascending=False).ignore_labels().i[:k]
+        >>> arr.apply(topk, 'a')
+        a\b*  0  1
+          a0  2  1
+          a1  5  3
+
+        Other arguments can be passed to the function as a tuple in the "args" argument:
+
+        >>> arr.apply(topk, axes='a', args=(3,))
+        a\b*  0  1  2
+          a0  2  1  0
+          a1  5  3  1
+
+        or by using keyword arguments:
+
+        >>> arr.apply(topk, axes='a', k=3)
+        a\b*  0  1  2
+          a0  2  1  0
+          a1  5  3  1
+        """
+        if axes is None:
+            if isinstance(transform, abc.Mapping):
+                mapping = transform
+
+                def transform(v):
+                    return mapping.get(v, v)
+            if dtype is None:
+                vfunc = np.vectorize(transform)
+            else:
+                vfunc = np.vectorize(transform, otypes=[dtype])
+            return LArray(vfunc(self.data, *args, **kwargs), self.axes)
+        else:
+            if not callable(transform):
+                raise TypeError("using the 'axes' argument in LArray.apply() only works with a function 'transform'")
+            # this is necessary so that stack output is nice.
+            # XXX: when iter_labels returns NDGroups, this might not be necessary anymore
+            axes = self.axes[axes]
+            # TODO: implement res_axes argument in stack. I guess computing res_axes (by examining each value) takes a
+            # significant time of stack and here we can know it in advance in the usual case (ie each return value
+            # of func has the same axes)
+            values = (self,) + args + tuple(kwargs.values())
+            first_kw = 1 + len(args)
+            kwnames = tuple(kwargs.keys())
+            res_arr = stack([(k, transform(*a_and_kwa[:first_kw], **dict(zip(kwnames, a_and_kwa[first_kw:]))))
+                             for k, a_and_kwa in zip_array_items(values, axes, ascending)],
+                            axis=axes, dtype=dtype)
+
+            # transpose back axis where it was
+            return res_arr.transpose(self.axes & res_arr.axes)
+
+
+def zip_array_values(values, axes=None, ascending=True):
+    r"""
+
+    Parameters
+    ----------
+    axes : int, str or Axis or tuple of them, optional
+        Axis or axes along which to iterate and in which order. Defaults to None (union of all axes present in
+        all arrays, in the order they are found).
+    ascending : bool, optional
+        Whether or not to iterate the axes in ascending order (from start to end). Defaults to True.
+
+    Returns
+    -------
+    Sequence
+
+    Examples
+    --------
+    >>> arr1 = ndtest('a=a0,a1;b=b1,b2')
+    >>> arr2 = ndtest('a=a0,a1;c=c1,c2')
+    >>> arr1
+    a\b  b1  b2
+     a0   0   1
+     a1   2   3
+    >>> arr2
+    a\c  c1  c2
+     a0   0   1
+     a1   2   3
+    >>> for a1, a2 in zip_array_values((arr1, arr2), 'a'):
+    ...     print("==")
+    ...     print(a1)
+    ...     print(a2)
+    ==
+    b  b1  b2
+        0   1
+    c  c1  c2
+        0   1
+    ==
+    b  b1  b2
+        2   3
+    c  c1  c2
+        2   3
+    >>> for a1, a2 in zip_array_values((arr1, arr2), arr2.c):
+    ...     print("==")
+    ...     print(a1)
+    ...     print(a2)
+    ==
+    a\b  b1  b2
+     a0   0   1
+     a1   2   3
+    a  a0  a1
+        0   2
+    ==
+    a\b  b1  b2
+     a0   0   1
+     a1   2   3
+    a  a0  a1
+        1   3
+    >>> for a1, a2 in zip_array_values((arr1, arr2)):
+    ...     print("arr1: {}, arr2: {}".format(a1, a2))
+    arr1: 0, arr2: 0
+    arr1: 0, arr2: 1
+    arr1: 1, arr2: 0
+    arr1: 1, arr2: 1
+    arr1: 2, arr2: 2
+    arr1: 2, arr2: 3
+    arr1: 3, arr2: 2
+    arr1: 3, arr2: 3
+    """
+    def values_with_expand(value, axes, readonly=True, ascending=True):
+        if isinstance(value, LArray):
+            # an Axis axis is not necessarily in array.axes
+            expanded = value.expand(axes, readonly=readonly)
+            return expanded.values(axes, ascending=ascending)
+        else:
+            size = axes.size if axes.ndim else 0
+            return Repeater(value, size)
+
+    all_axes = AxisCollection.union(*[get_axes(v) for v in values])
+    if axes is None:
+        axes = all_axes
+    else:
+        if not isinstance(axes, (tuple, list, AxisCollection)):
+            axes = (axes,)
+        # transform string axes definitions to objects
+        axes = [Axis(axis) if isinstance(axis, basestring) and '=' in axis else axis
+                for axis in axes]
+        axes = AxisCollection([axis if isinstance(axis, Axis) else all_axes[axis]
+                               for axis in axes])
+
+    # sequence of tuples (of scalar or arrays)
+    return SequenceZip([values_with_expand(v, axes, ascending=ascending) for v in values])
+
+
+def zip_array_items(values, axes=None, ascending=True):
+    r"""
+
+    Parameters
+    ----------
+    values : Iterable
+        arrays or values to combine.
+    axes : int, str or Axis or tuple of them, optional
+        Axis or axes along which to iterate and in which order. Defaults to None (union of all axes present in
+        all arrays, in the order they are found).
+    ascending : bool, optional
+        Whether or not to iterate the axes in ascending order (from start to end). Defaults to True.
+
+    Returns
+    -------
+    Sequence
+
+    Examples
+    --------
+    >>> arr1 = ndtest('a=a0,a1;b=b0,b1')
+    >>> arr2 = ndtest('a=a0,a1;c=c0,c1')
+    >>> arr1
+    a\b  b0  b1
+     a0   0   1
+     a1   2   3
+    >>> arr2
+    a\c  c0  c1
+     a0   0   1
+     a1   2   3
+    >>> for k, (a1, a2) in zip_array_items((arr1, arr2), 'a'):
+    ...     print("==", k[0], "==")
+    ...     print(a1)
+    ...     print(a2)
+    == a0 ==
+    b  b0  b1
+        0   1
+    c  c0  c1
+        0   1
+    == a1 ==
+    b  b0  b1
+        2   3
+    c  c0  c1
+        2   3
+    >>> for k, (a1, a2) in zip_array_items((arr1, arr2), arr2.c):
+    ...     print("==", k[0], "==")
+    ...     print(a1)
+    ...     print(a2)
+    == c0 ==
+    a\b  b0  b1
+     a0   0   1
+     a1   2   3
+    a  a0  a1
+        0   2
+    == c1 ==
+    a\b  b0  b1
+     a0   0   1
+     a1   2   3
+    a  a0  a1
+        1   3
+    >>> for k, (a1, a2) in zip_array_items((arr1, arr2)):
+    ...     print(k, "arr1: {}, arr2: {}".format(a1, a2))
+    (a.i[0], b.i[0], c.i[0]) arr1: 0, arr2: 0
+    (a.i[0], b.i[0], c.i[1]) arr1: 0, arr2: 1
+    (a.i[0], b.i[1], c.i[0]) arr1: 1, arr2: 0
+    (a.i[0], b.i[1], c.i[1]) arr1: 1, arr2: 1
+    (a.i[1], b.i[0], c.i[0]) arr1: 2, arr2: 2
+    (a.i[1], b.i[0], c.i[1]) arr1: 2, arr2: 3
+    (a.i[1], b.i[1], c.i[0]) arr1: 3, arr2: 2
+    (a.i[1], b.i[1], c.i[1]) arr1: 3, arr2: 3
+    """
+    res_axes = AxisCollection.union(*[get_axes(v) for v in values])
+    return SequenceZip((res_axes.iter_labels(axes, ascending=ascending),
+                        zip_array_values(values, axes=axes, ascending=ascending)))
+
 
 def larray_equal(a1, a2):
     import warnings
@@ -9083,3 +9360,5 @@ def make_args_broadcastable(args, kwargs=None, min_axes=None):
 # - pyexcelerate: yet faster but also write only. Didn't check whether API is more featured than xlsxwriter or not.
 # - xlwings: wraps win32com & equivalent on mac, so can potentially do everything (I guess) but this is SLOW and needs
 #            a running excel instance, etc.
+
+zip_array_values((1, 2))

From b16ef721b0df58bcc351ccc621e1b5a1208f5021 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 12 Dec 2018 17:28:59 +0100
Subject: [PATCH 39/44] implemented LArray.apply

---
 doc/source/changes/version_0_30.rst.inc | 69 +++++++++++++++++++++++++
 larray/core/array.py                    |  4 ++
 2 files changed, 73 insertions(+)

diff --git a/doc/source/changes/version_0_30.rst.inc b/doc/source/changes/version_0_30.rst.inc
index e6477ee95..63619e907 100644
--- a/doc/source/changes/version_0_30.rst.inc
+++ b/doc/source/changes/version_0_30.rst.inc
@@ -168,6 +168,75 @@ New features
      a0   0   0   2
      a1   0   1   1
 
+* implemented :py:obj:`LArray.apply()` method to apply a python function or mapping to all
+  elements of an array or to all sub-arrays along some axes of an array and return the result. This is an extremely
+  versatile method as it can be used both with aggregating functions or element-wise functions.
+
+  First let us define a test array
+
+    >>> arr = LArray([[0, 2, 1],
+    ...               [3, 1, 5]], 'a=a0,a1;b=b0..b2')
+    >>> arr
+    a\b  b0  b1  b2
+     a0   0   2   1
+     a1   3   1   5
+
+  Here is a simple function we would like to apply to each element of the array.
+  Note that this particular example should rather be written as: arr ** 2
+  as it is both more concise and much faster.
+
+    >>> def square(x):
+    ...     return x ** 2
+    >>> arr.apply(square)
+    a\b  b0  b1  b2
+     a0   0   4   1
+     a1   9   1  25
+
+  Now, assuming for a moment that the values of our test array above were in fact some numeric representation of
+  names and we had the correspondence to the actual names stored in a dictionary:
+
+    >>> code_to_names = {0: 'foo', 1: 'bar', 2: 'baz',
+    ...                  3: 'boo', 4: 'far', 5: 'faz'}
+
+  We could get back an array with the actual names by using:
+
+    >>> arr.apply(code_to_names)
+    a\b   b0   b1   b2
+     a0  foo  baz  bar
+     a1  boo  bar  faz
+
+  Functions can also be applied along some axes:
+
+    >>> # this is equivalent to (but much slower than): arr.sum_by('a')
+    ... arr.apply(sum, 'a')
+    a  a0  a1
+        3   9
+
+  Applying the function along some axes will return an array with the
+  union of those axes and the axes of the returned values. For example,
+  let us define a function which returns the k highest values of an array.
+
+    >>> def topk(a, k=2):
+    ...     return a.sort_values(ascending=False).ignore_labels().i[:k]
+    >>> arr.apply(topk, 'a')
+    a\b*  0  1
+      a0  2  1
+      a1  5  3
+
+  Other arguments can be passed to the function as a tuple in the "args" argument:
+
+    >>> arr.apply(topk, axes='a', args=(3,))
+    a\b*  0  1  2
+      a0  2  1  0
+      a1  5  3  1
+
+  or by using keyword arguments:
+
+    >>> arr.apply(topk, axes='a', k=3)
+    a\b*  0  1  2
+      a0  2  1  0
+      a1  5  3  1
+
 * implemented :py:obj:`LArray.keys()` :py:obj:`LArray.values()` and :py:obj:`LArray.items()`
   methods to iterate (loop) on an array labels (keys), values or (key, value) pairs.
 
diff --git a/larray/core/array.py b/larray/core/array.py
index 5891f9129..f8a1655e8 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -7725,6 +7725,10 @@ def apply(self, transform, axes=None, dtype=None, ascending=True, args=(), **kwa
           a0  2  1  0
           a1  5  3  1
         """
+        # XXX: we could go one step further than vectorize and support a array of callables which would be broadcasted
+        #      with the other arguments. I don't know whether that would actually help because I think it always
+        #      possible to emulate that with a single callable with an extra argument (eg type) which dispatches to
+        #      potentially different callables. It might be more practical & efficient though.
         if axes is None:
             if isinstance(transform, abc.Mapping):
                 mapping = transform

From d284d8abd612399c6ea9d4f518f5f4ac107ac58b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Mon, 11 Mar 2019 14:31:33 +0100
Subject: [PATCH 40/44] WIP: added axes_names argument to as_table

---
 larray/core/array.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index f8a1655e8..c85b60cc7 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -2337,8 +2337,8 @@ def __iter__(self):
     def __contains__(self, key):
         return any(key in axis for axis in self.axes)
 
-    def as_table(self, maxlines=None, edgeitems=5, light=False, wide=True, value_name='value'):
-        """
+    def as_table(self, maxlines=None, edgeitems=5, light=False, wide=True, value_name='value', axes_names=True):
+        r"""
         Generator. Returns next line of the table representing an array.
 
         Parameters
@@ -2360,6 +2360,8 @@ def as_table(self, maxlines=None, edgeitems=5, light=False, wide=True, value_nam
         value_name : str, optional
             Name of the column containing the values (last column) when `wide=False` (see above).
             Defaults to 'value'.
+        axes_names : bool or 'except_last', optional
+            Whether or not to include the last axis name preceded by a '\'. Defaults to True.
 
         Returns
         -------
@@ -2370,13 +2372,13 @@ def as_table(self, maxlines=None, edgeitems=5, light=False, wide=True, value_nam
         --------
         >>> arr = ndtest((2, 2, 3))
         >>> list(arr.as_table())  # doctest: +NORMALIZE_WHITESPACE
-        [['a', 'b\\\\c', 'c0', 'c1', 'c2'],
+        [['a', 'b\\c', 'c0', 'c1', 'c2'],
          ['a0', 'b0', 0, 1, 2],
          ['a0', 'b1', 3, 4, 5],
          ['a1', 'b0', 6, 7, 8],
          ['a1', 'b1', 9, 10, 11]]
         >>> list(arr.as_table(light=True))  # doctest: +NORMALIZE_WHITESPACE
-        [['a', 'b\\\\c', 'c0', 'c1', 'c2'],
+        [['a', 'b\\c', 'c0', 'c1', 'c2'],
          ['a0', 'b0', 0, 1, 2],
          ['', 'b1', 3, 4, 5],
          ['a1', 'b0', 6, 7, 8],
@@ -2409,13 +2411,19 @@ def as_table(self, maxlines=None, edgeitems=5, light=False, wide=True, value_nam
             width = 1
             height = int(np.prod(self.shape))
         data = np.asarray(self).reshape(height, width)
+        display_axes_names = axes_names
 
         # get list of names of axes
         axes_names = self.axes.display_names[:]
         # transforms ['a', 'b', 'c', 'd'] into ['a', 'b', 'c\\d']
         if wide and len(axes_names) > 1:
-            axes_names[-2] = '\\'.join(axes_names[-2:])
-            axes_names.pop()
+            if display_axes_names is True:
+                axes_names[-2] = '\\'.join(axes_names[-2:])
+                axes_names.pop()
+            elif display_axes_names == 'except_last':
+                axes_names = axes_names[:-1]
+            else:
+                axes_names = [''] * (len(axes_names) - 1)
         axes = self.axes[:-1] if wide else self.axes
         # get list of labels for each axis (except the last one if wide=True)
         labels = [axis.labels.tolist() for axis in axes]

From bf1f63c6837a5235225626aaab7884297cc69ee9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Mon, 11 Mar 2019 14:33:55 +0100
Subject: [PATCH 41/44] WIP: added axes_names and na_repr arguments to
 LArray.dump

---
 larray/core/array.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index c85b60cc7..f59513058 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -2458,7 +2458,9 @@ def as_table(self, maxlines=None, edgeitems=5, light=False, wide=True, value_nam
                 # returns next line (labels of N-1 first axes + data)
                 yield list(tick) + dataline.tolist()
 
-    def dump(self, header=True, wide=True, value_name='value', light=False):
+    # TODO: merge with as_table
+    # XXX: dump as a 2D LArray with row & col dims?
+    def dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is'):
         """Dump array as a 2D nested list
 
         Parameters
@@ -2475,6 +2477,13 @@ def dump(self, header=True, wide=True, value_name='value', light=False):
         light : bool, optional
             Whether or not to hide repeated labels. In other words, only show a label if it is different from the
             previous one. Defaults to False.
+        axes_names : bool or 'except_last', optional
+            Assuming header is True, whether or not to include axes names. Defaults to True. If axes_names is
+            'except_last', all axes names will be included except the last.
+            last_axis : bool, optional
+            Whether or not to include the last axis name preceded by a '\'. Defaults to True.
+        na_repr : any scalar, optional
+            Replace missing values (NaN floats) by this value. Default to 'as_is' (do not do any replacement).
 
         Returns
         -------
@@ -2482,9 +2491,15 @@ def dump(self, header=True, wide=True, value_name='value', light=False):
         """
         if not header:
             # flatten all dimensions except the last one
-            return self.data.reshape(-1, self.shape[-1]).tolist()
+            res2d = self.data.reshape(-1, self.shape[-1]).tolist()
         else:
-            return list(self.as_table(wide=wide, value_name=value_name, light=light))
+            res2d = list(self.as_table(wide=wide, value_name=value_name, light=light, axes_names=axes_names))
+        if na_repr != 'as_is':
+            # isnan is
+            res2d = [[na_repr if value != value else value
+                      for value in line]
+                     for line in res2d]
+        return res2d
 
     # XXX: should filter(geo=['W']) return a view by default? (collapse=True)
     # I think it would be dangerous to make it the default

From 775ac3e318ef7ed195a1816d4e83ad8eabace47e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 15 Mar 2019 11:43:08 +0100
Subject: [PATCH 42/44] WIP: added important FIXMEs

---
 larray/core/array.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/larray/core/array.py b/larray/core/array.py
index f59513058..b63d19efd 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -2425,6 +2425,7 @@ def as_table(self, maxlines=None, edgeitems=5, light=False, wide=True, value_nam
             else:
                 axes_names = [''] * (len(axes_names) - 1)
         axes = self.axes[:-1] if wide else self.axes
+        # MEGA-FIXME: ensure that it will work in xlwings (see dump below)
         # get list of labels for each axis (except the last one if wide=True)
         labels = [axis.labels.tolist() for axis in axes]
         # creates vertical lines (ticks is a list of list)
@@ -2490,7 +2491,13 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam
         2D nested list
         """
         if not header:
+            # MEGA-FIXME: ensure that either
+            # * we have no numpy types left here (this can be the case with tolist if we have a numpy array with
+            #   object dtype with numpy types in some of its cells (this is the 65535 dc2019 bug)!)
+            # * xlwings accepts those
+            # Unsure where this should be fixed. In np.array.tolist, in xlwings or in larray.
             # flatten all dimensions except the last one
+            # same fix should be applies in as_table above (it uses tolist too)
             res2d = self.data.reshape(-1, self.shape[-1]).tolist()
         else:
             res2d = list(self.as_table(wide=wide, value_name=value_name, light=light, axes_names=axes_names))

From 28be4952b4e071561ef076160794336f5e09db0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Fri, 15 Mar 2019 11:43:38 +0100
Subject: [PATCH 43/44] WIP: added a few XXX in Excel handling code

---
 larray/inout/xw_excel.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/larray/inout/xw_excel.py b/larray/inout/xw_excel.py
index 560d42408..d9b93ab32 100644
--- a/larray/inout/xw_excel.py
+++ b/larray/inout/xw_excel.py
@@ -270,6 +270,7 @@ def close(self):
             if self.filepath is not None and os.path.isfile(self.xw_wkb.fullname):
                 tmp_file = self.xw_wkb.fullname
                 self.xw_wkb.close()
+                # XXX: do we check for this case earlier and act differently depending on overwrite?
                 os.remove(self.filepath)
                 os.rename(tmp_file, self.filepath)
             else:
@@ -292,6 +293,10 @@ def __enter__(self):
             return self
 
         def __exit__(self, type_, value, traceback):
+            # XXX: we should probably also avoid closing the workbook for visible=True???
+            # XXX: we might want to disallow using open_excel as a context manager (in __enter__)
+            #      when we have nothing to do in close because it is kinda misleading (this might piss off
+            #      users though, so maybe a warning would be better).
             if not self.active_workbook:
                 self.close()
 

From 87949d8f29a47ab4065eca3c756604cb3e6b9d3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= <gdementen@gmail.com>
Date: Wed, 20 Mar 2019 11:05:18 +0100
Subject: [PATCH 44/44] WIP: added support for saving a file with a password

---
 larray/inout/xw_excel.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/larray/inout/xw_excel.py b/larray/inout/xw_excel.py
index d9b93ab32..4eb3efc95 100644
--- a/larray/inout/xw_excel.py
+++ b/larray/inout/xw_excel.py
@@ -25,6 +25,7 @@
 if xw is not None:
     from xlwings.conversion.pandas_conv import PandasDataFrameConverter
 
+    from xlwings.constants import FileFormat
     global_app = None
 
 
@@ -257,12 +258,23 @@ def __delitem__(self, key):
         def sheet_names(self):
             return [s.name for s in self]
 
-        def save(self, path=None):
+        def save(self, path=None, password=None):
             # saved_path = self.xw_wkb.api.Path
             # was_saved = saved_path != ''
             if path is None and self.delayed_filepath is not None:
                 path = self.delayed_filepath
-            self.xw_wkb.save(path=path)
+
+            if password is not None:
+                if path is None:
+                    raise ValueError("saving a Workbook with a password is only supported for workbooks with an "
+                                     "explicit path (given either when opening the workbook or here as the path "
+                                     "argument)")
+                realpath = os.path.realpath(path)
+                # XXX: this is probably Windows only
+                # using Password as keyword argument does not work !
+                self.xw_wkb.api.SaveAs(realpath, FileFormat.xlOpenXMLWorkbook, password)
+            else:
+                self.xw_wkb.save(path=path)
 
         def close(self):
             # Close the workbook in Excel.