Skip to content

Commit e4936d8

Browse files
committed
fix #868: fixed edge case for group aggregates
the bug occurred only if all the following conditions applied: a) the labels indices were at a constant "step" from each other b) they were given in reverse order (step < 0) c) the last label (with smallest index) had an index < abs(step) (ie index + step < 0)
1 parent 7f12972 commit e4936d8

File tree

4 files changed

+61
-43
lines changed

4 files changed

+61
-43
lines changed

doc/source/changes/version_0_33.rst.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,4 @@ Miscellaneous improvements
5858
Fixes
5959
^^^^^
6060

61-
* fixed something (closes :issue:`000`).
61+
* fixed an edge case for group aggregates and labels in reverse order (closes :issue:`868`).

larray/core/axis.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCArray
1111
from larray.core.expr import ExprNode
1212
from larray.core.group import (Group, LGroup, IGroup, IGroupMaker, _to_tick, _to_ticks, _to_key, _seq_summary,
13-
_range_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups)
13+
_idx_seq_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups)
1414
from larray.util.oset import *
1515
from larray.util.misc import (duplicates, array_lookup2, ReprString, index_by_id, renamed_to, common_type, LHDFStore,
1616
lazy_attribute, _isnoneslice, unique_multi, Product)
@@ -2798,7 +2798,7 @@ def _key_to_raw_and_axes(self, key, collapse_slices=False, translate_key=True):
27982798
seq_types = (tuple, list, np.ndarray)
27992799
# TODO: we should only do this if there are no Array key (with axes corresponding to the range)
28002800
# otherwise we will be translating them back to a range afterwards
2801-
key = [_range_to_slice(axis_key, len(axis)) if isinstance(axis_key, seq_types) else axis_key
2801+
key = [_idx_seq_to_slice(axis_key, len(axis)) if isinstance(axis_key, seq_types) else axis_key
28022802
for axis_key, axis in zip(key, self)]
28032803

28042804
# transform non-Array advanced keys (list and ndarray) to Array

larray/core/group.py

Lines changed: 54 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -242,69 +242,83 @@ def _range_str_to_range(s, stack_depth=1):
242242
return generalized_range(start, stop, step)
243243

244244

245-
def _range_to_slice(seq, length=None):
245+
def _normalize_idx(idx, length):
246+
if idx < - length:
247+
return - length - 1
248+
elif -length <= idx < 0:
249+
return idx + length
250+
elif 0 <= idx < length:
251+
return idx
252+
elif idx > length:
253+
return length
254+
255+
256+
def _idx_seq_to_slice(seq, length):
246257
r"""
247-
Returns a slice if possible (including for sequences of 1 element) otherwise returns the input sequence itself
258+
Transform a sequence of indices into a slice if possible and normalize it.
259+
260+
If a constant step between indices of the sequence can be inferred (the sequence is something like
261+
[start, start+step, start+2*step, ...]), a normalized (with has few negative indices as possible) slice is
262+
returned, otherwise the sequence is returned unmodified.
248263
249264
Parameters
250265
----------
251-
seq : sequence-like of int
252-
List, tuple or ndarray of integers representing the range.
253-
It should be something like [start, start+step, start+2*step, ...]
254-
length : int, optional
255-
length of sequence of positions. This is only useful when you must be able to transform decreasing sequences
256-
which can stop at 0.
266+
seq : sequence-like of integers
267+
Sequence (list, tuple or ndarray) of indices.
268+
length : int
269+
Length of the sequence the indices refer to.
257270
258271
Returns
259272
-------
260273
slice or sequence-like
261-
return the input sequence if a slice cannot be defined
274+
return the input sequence if a slice cannot be defined.
262275
263276
Examples
264277
--------
265-
>>> _range_to_slice([3, 4, 5])
278+
>>> _idx_seq_to_slice([3, 4, 5], 10)
266279
slice(3, 6, None)
267-
>>> _range_to_slice([3, 4, 6])
280+
>>> _idx_seq_to_slice([3, 4, 6], 10)
268281
[3, 4, 6]
269-
>>> _range_to_slice([3, 5, 7])
270-
slice(3, 9, 2)
271-
>>> _range_to_slice([-3, -2])
272-
slice(-3, -1, None)
273-
>>> _range_to_slice([-1, -2])
274-
slice(-1, -3, -1)
275-
>>> _range_to_slice([2, 1])
282+
>>> _idx_seq_to_slice([3, 5, 7], 10)
283+
slice(3, 8, 2)
284+
>>> _idx_seq_to_slice([-3, -2], 10)
285+
slice(7, 9, None)
286+
>>> _idx_seq_to_slice([-1, -2], 10)
287+
slice(9, 7, -1)
288+
>>> _idx_seq_to_slice([2, 1], 10)
276289
slice(2, 0, -1)
277-
>>> _range_to_slice([1, 0], 4)
278-
slice(-3, -5, -1)
279-
>>> _range_to_slice([1, 0])
280-
[1, 0]
281-
>>> _range_to_slice([1])
290+
>>> _idx_seq_to_slice([1, 0], 10)
291+
slice(1, -11, -1)
292+
>>> _idx_seq_to_slice([1], 10)
282293
slice(1, 2, None)
283-
>>> _range_to_slice([])
294+
>>> _idx_seq_to_slice([], 10)
284295
[]
296+
>>> _idx_seq_to_slice([3, 1], 10)
297+
slice(3, 0, -2)
285298
"""
286299
if len(seq) < 1:
287300
return seq
288-
start = seq[0]
301+
first_idx = _normalize_idx(seq[0], length)
289302
if len(seq) == 1:
290-
return slice(start, start + 1)
291-
second = seq[1]
292-
step = second - start
293-
prev_value = second
294-
for value in seq[2:]:
295-
if value != prev_value + step:
296-
return seq
297-
prev_value = value
298-
stop = prev_value + step
299-
if prev_value == 0 and step < 0:
300-
if length is None:
303+
return slice(first_idx, first_idx + 1)
304+
second_idx = _normalize_idx(seq[1], length)
305+
step = second_idx - first_idx
306+
prev_idx = second_idx
307+
for raw_idx in seq[2:]:
308+
idx = _normalize_idx(raw_idx, length)
309+
if idx != prev_idx + step:
301310
return seq
302-
else:
303-
stop -= length
304-
start -= length
311+
prev_idx = idx
312+
last_idx = prev_idx
313+
step_sign = 1 if step > 0 else -1
314+
# we stop just after last_idx instead of using a full "step" because for negative steps, it could get us a negative
315+
# stop which isn't what we want
316+
stop = last_idx + step_sign
317+
if last_idx == 0 and step < 0:
318+
stop -= length
305319
if step == 1:
306320
step = None
307-
return slice(start, stop, step)
321+
return slice(first_idx, stop, step)
308322

309323

310324
def _is_object_array(array):

larray/tests/test_array.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1887,6 +1887,10 @@ def test_group_agg_guess_axis(array):
18871887
res = array.sum(age, sex).sum((vla, wal, bru, belgium))
18881888
assert res.shape == (4, 15)
18891889

1890+
# issue #868: labels in reverse order with a "step" between them > index of last label
1891+
arr = ndtest(4)
1892+
assert arr.sum('a3,a1') == 4
1893+
18901894

18911895
def test_group_agg_label_group(array):
18921896
age, geo, sex, lipro = array.axes

0 commit comments

Comments
 (0)