Skip to content

Commit e9d8e73

Browse files
committed
FEAT: made Array[] error message more useful (closes #875)
We now have some axes info in the error messages also factorized * parts of _guess_axis and _translate_axis_key_chunk * _display_name, _axis_summary and _axes_summary methods in AxisCollection and this fixed two corner cases for ambiguous labels on anonymous axes
1 parent 25dadaf commit e9d8e73

File tree

4 files changed

+119
-73
lines changed

4 files changed

+119
-73
lines changed

doc/source/changes/version_0_34.rst.inc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ Miscellaneous improvements
4444
* added type hints for all remaining functions and methods which improves autocompletion in editors (such as PyCharm).
4545
Closes :issue:`864`.
4646

47+
* made several error message more useful when trying to get an invalid subset of an array (closes :issue:`875`).
48+
- when a key is not valid on any axis, the error message includes the array axes
49+
- when a key is not valid for the axis specified by the user, the error message includes that axis labels
50+
- when a label is ambiguous (valid on several axes), the error message includes the axes labels in addition to the
51+
axes names
52+
- when several parts of a key seem to target the same axis, the error message includes the bad key in
53+
addition to the axis.
54+
4755
* made :py:obj:`ipfp()` faster (the smaller the array, the larger the improvement)
4856
For example, for small arrays it is several times faster than before, for 1000x1000 arrays it is about 30% faster.
4957

larray/core/axis.py

Lines changed: 64 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,37 +2439,6 @@ def replace(self, axes_to_replace=None, new_axis=None, inplace=False, **kwargs)
24392439
else:
24402440
return AxisCollection(axes)
24412441

2442-
def _guess_axis(self, axis_key):
2443-
if isinstance(axis_key, Group):
2444-
group_axis = axis_key.axis
2445-
if group_axis is not None:
2446-
# we have axis information but not necessarily an Axis object from self.axes
2447-
real_axis = self[group_axis]
2448-
if group_axis is not real_axis:
2449-
axis_key = axis_key.with_axis(real_axis)
2450-
return axis_key
2451-
2452-
# TODO: instead of checking all axes, we should have a big mapping
2453-
# (in AxisCollection or Array):
2454-
# label -> (axis, index)
2455-
# or possibly (for ambiguous labels)
2456-
# label -> {axis: index}
2457-
# but for Pandas, this wouldn't work, we'd need label -> axis
2458-
valid_axes = []
2459-
for axis in self:
2460-
try:
2461-
axis.index(axis_key)
2462-
valid_axes.append(axis)
2463-
except KeyError:
2464-
continue
2465-
if not valid_axes:
2466-
raise ValueError(f"{axis_key} is not a valid label for any axis\n{self.info}")
2467-
elif len(valid_axes) > 1:
2468-
valid_axes = ', '.join(a.name if a.name is not None else f'{{{self.axes.index(a)}}}'
2469-
for a in valid_axes)
2470-
raise ValueError(f'{axis_key} is ambiguous (valid in {valid_axes})')
2471-
return valid_axes[0][axis_key]
2472-
24732442
def set_labels(self, axis=None, labels=None, inplace=False, **kwargs) -> 'AxisCollection':
24742443
r"""Replaces the labels of one or several axes.
24752444
@@ -2676,9 +2645,44 @@ def index_first_compatible(axis):
26762645
# -1 in to_remove are not a problem since enumerate starts at 0
26772646
return AxisCollection([axis for i, axis in enumerate(self) if i not in to_remove])
26782647

2648+
def _translate_nice_key(self, axis_key):
2649+
# TODO: instead of checking all axes, we should have a big mapping (in AxisCollection):
2650+
# label -> (axis, index) but for sparse/multi-index, this would not work, we'd need label -> axis
2651+
valid_axes = []
2652+
# TODO: use axis_key dtype to only check compatible axes
2653+
for axis in self:
2654+
try:
2655+
axis_pos_key = axis.index(axis_key)
2656+
valid_axes.append(axis)
2657+
except KeyError:
2658+
continue
2659+
if not valid_axes:
2660+
raise ValueError(f"{axis_key!r} is not a valid label for any axis:\n{self._axes_summary()}")
2661+
elif len(valid_axes) > 1:
2662+
raise ValueError(f'{axis_key!r} is ambiguous, it is valid in the following axes:\n'
2663+
f'{self._axes_summary(valid_axes)}')
2664+
real_axis = valid_axes[0]
2665+
return real_axis, axis_pos_key
2666+
2667+
def _guess_axis(self, axis_key):
2668+
"""
2669+
Translates any *single axis* key to an LGroup on the real axis.
2670+
"""
2671+
if isinstance(axis_key, Group):
2672+
group_axis = axis_key.axis
2673+
if group_axis is not None:
2674+
# we have axis information but not necessarily an Axis object from self
2675+
real_axis = self[group_axis]
2676+
if group_axis is not real_axis:
2677+
axis_key = axis_key.with_axis(real_axis)
2678+
return axis_key
2679+
2680+
real_axis, axis_pos_key = self._translate_nice_key(axis_key)
2681+
return real_axis[axis_key]
2682+
26792683
def _translate_axis_key_chunk(self, axis_key):
26802684
"""
2681-
Translates *single axis* label-based key to an IGroup
2685+
Translates any *single axis* label-based key to an (axis, indices) pair.
26822686
26832687
Parameters
26842688
----------
@@ -2717,7 +2721,8 @@ def _translate_axis_key_chunk(self, axis_key):
27172721
try:
27182722
axis_pos_key = real_axis.index(axis_key)
27192723
except KeyError:
2720-
raise ValueError(f"{axis_key!r} is not a valid label for any axis")
2724+
raise ValueError(f"{axis_key!r} is not a valid label for the {real_axis.name!r} axis "
2725+
f"with labels: {', '.join(repr(label) for label in real_axis.labels)}")
27212726
return real_axis, axis_pos_key
27222727
except KeyError:
27232728
# axis associated with axis_key may not belong to self.
@@ -2726,26 +2731,7 @@ def _translate_axis_key_chunk(self, axis_key):
27262731
axis_key = axis_key.to_label()
27272732

27282733
# otherwise we need to guess the axis
2729-
# TODO: instead of checking all axes, we should have a big mapping (in AxisCollection):
2730-
# label -> (axis, index) but for sparse/multi-index, this would not work, we'd need label -> axis
2731-
valid_axes = []
2732-
# TODO: use axis_key dtype to only check compatible axes
2733-
for axis in self:
2734-
try:
2735-
axis_pos_key = axis.index(axis_key)
2736-
valid_axes.append(axis)
2737-
except KeyError:
2738-
continue
2739-
if not valid_axes:
2740-
raise ValueError(f"{axis_key!r} is not a valid label for any axis")
2741-
elif len(valid_axes) > 1:
2742-
# TODO: make an AxisCollection.display_name(axis) method out of this
2743-
# valid_axes = ', '.join(self.display_name(axis) for a in valid_axes)
2744-
valid_axes = ', '.join(a.name if a.name is not None else f'{{{self.index(a)}}}'
2745-
for a in valid_axes)
2746-
raise ValueError(f'{axis_key} is ambiguous (valid in {valid_axes})')
2747-
real_axis = valid_axes[0]
2748-
return real_axis, axis_pos_key
2734+
return self._translate_nice_key(axis_key)
27492735

27502736
def _translate_axis_key(self, axis_key):
27512737
"""
@@ -2878,7 +2864,7 @@ def _key_to_axis_indices_dict(self, key):
28782864
if has_duplicates(axis for axis, axis_key in key_items):
28792865
dupe_axes = duplicates(axis for axis, axis_key in key_items)
28802866
dupe_axes_str = ', '.join(str(axis) for axis in dupe_axes)
2881-
raise ValueError(f"key has several values for axis: {dupe_axes_str}\n{key}")
2867+
raise ValueError(f"key has several values for axis: {dupe_axes_str}\nkey: {key}")
28822868

28832869
# ((axis, indices), (axis, indices), ...) -> dict
28842870
return dict(key_items)
@@ -3093,6 +3079,27 @@ def names(self) -> List[str]:
30933079
"""
30943080
return [axis.name for axis in self._list]
30953081

3082+
# providing idx is just an optimization to avoid the relatively expensive self.index(axis)
3083+
def _display_name(self, axis, idx=None):
3084+
if axis.name is None:
3085+
if idx is None:
3086+
idx = self.index(axis)
3087+
name = f'{{{idx}}}'
3088+
else:
3089+
# using str() because name can be an integer
3090+
name = str(axis.name)
3091+
return (name + '*') if axis.iswildcard else name
3092+
3093+
def _axes_summary(self, axes=None):
3094+
def axis_summary(axis, idx=None):
3095+
return f" {self._display_name(axis, idx)} [{len(axis)}]: {axis.labels_summary()}"
3096+
3097+
if axes is None:
3098+
parts = [axis_summary(axis, i) for i, axis in enumerate(self._list)]
3099+
else:
3100+
parts = [axis_summary(axis) for axis in axes]
3101+
return '\n'.join(parts)
3102+
30963103
@property
30973104
def display_names(self) -> List[str]:
30983105
r"""
@@ -3113,12 +3120,7 @@ def display_names(self) -> List[str]:
31133120
>>> AxisCollection([a, b, c, d]).display_names
31143121
['a', 'b*', '{2}', '{3}*']
31153122
"""
3116-
def display_name(i, axis):
3117-
# str(axis.name) because name can be an integer
3118-
name = str(axis.name) if axis.name is not None else f'{{{i}}}'
3119-
return (name + '*') if axis.iswildcard else name
3120-
3121-
return [display_name(i, axis) for i, axis in enumerate(self._list)]
3123+
return [self._display_name(axis, i) for i, axis in enumerate(self._list)]
31223124

31233125
@property
31243126
def ids(self) -> List[Union[str, int]]:
@@ -3233,10 +3235,8 @@ def info(self) -> str:
32333235
sex [2]: 'M' 'F'
32343236
time [4]: 2007 2008 2009 2010
32353237
"""
3236-
lines = [f" {name} [{len(axis)}]: {axis.labels_summary()}"
3237-
for name, axis in zip(self.display_names, self._list)]
3238-
shape = " x ".join(str(s) for s in self.shape)
3239-
return ReprString('\n'.join([shape] + lines))
3238+
shape_str = " x ".join(str(s) for s in self.shape)
3239+
return ReprString(f"{shape_str}\n{self._axes_summary()}")
32403240

32413241
# XXX: instead of front_if_spread, we might want to require axes to be contiguous
32423242
# (ie the caller would have to transpose axes before calling this)

larray/tests/test_array.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -620,29 +620,49 @@ def test_getitem_guess_axis(array):
620620
assert_array_equal(array[g], raw[..., [0, 4, 8]])
621621

622622
# key with duplicate axes
623-
with pytest.raises(ValueError, match="key has several values for axis: age"):
623+
with must_raise(ValueError, """key has several values for axis: age
624+
key: ([1, 2], [3, 4])"""):
624625
_ = array[[1, 2], [3, 4]]
625626

626627
# key with invalid label (ie label not found on any axis)
627-
with pytest.raises(ValueError, match="999 is not a valid label for any axis"):
628+
with must_raise(ValueError, """999 is not a valid label for any axis:
629+
age [116]: 0 1 2 ... 113 114 115
630+
geo [44]: 'A11' 'A12' 'A13' ... 'A92' 'A93' 'A21'
631+
sex [2]: 'M' 'F'
632+
lipro [15]: 'P01' 'P02' 'P03' ... 'P13' 'P14' 'P15'"""):
628633
_ = array[[1, 2], 999]
629634

630635
# key with invalid label list (ie list of labels not found on any axis)
631-
with pytest.raises(ValueError, match=r"\[998, 999\] is not a valid label for any axis"):
636+
with must_raise(ValueError, """[998, 999] is not a valid label for any axis:
637+
age [116]: 0 1 2 ... 113 114 115
638+
geo [44]: 'A11' 'A12' 'A13' ... 'A92' 'A93' 'A21'
639+
sex [2]: 'M' 'F'
640+
lipro [15]: 'P01' 'P02' 'P03' ... 'P13' 'P14' 'P15'"""):
632641
_ = array[[1, 2], [998, 999]]
633642

634643
# key with partial invalid list (ie list containing a label not found
635644
# on any axis)
636-
# FIXME: the message should be the same as for 999, 4 (ie it should NOT mention age).
637-
with pytest.raises(ValueError, match=r"age\[3, 999\] is not a valid label for any axis"):
645+
with must_raise(ValueError, "age[3, 999] is not a valid label for the 'age' axis with labels: 0, 1, 2, 3, 4, 5, 6, "
646+
"7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, "
647+
"29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, "
648+
"50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, "
649+
"71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, "
650+
"92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, "
651+
"110, 111, 112, 113, 114, 115"):
638652
_ = array[[1, 2], [3, 999]]
639653

640-
with pytest.raises(ValueError, match=r"\[999, 4\] is not a valid label for any axis"):
654+
with must_raise(ValueError, """[999, 4] is not a valid label for any axis:
655+
age [116]: 0 1 2 ... 113 114 115
656+
geo [44]: 'A11' 'A12' 'A13' ... 'A92' 'A93' 'A21'
657+
sex [2]: 'M' 'F'
658+
lipro [15]: 'P01' 'P02' 'P03' ... 'P13' 'P14' 'P15'"""):
641659
_ = array[[1, 2], [999, 4]]
642660

643661
# ambiguous key
644662
arr = ndtest("a=l0,l1;b=l1,l2")
645-
with pytest.raises(ValueError, match=r"l1 is ambiguous \(valid in a, b\)"):
663+
with must_raise(ValueError, """'l1' is ambiguous, it is valid in the following axes:
664+
a [2]: 'l0' 'l1'
665+
b [2]: 'l1' 'l2'"""):
646666
_ = arr['l1']
647667

648668
# ambiguous key disambiguated via string
@@ -1219,7 +1239,7 @@ def test_points_indexer_setitem():
12191239

12201240
arr = ndtest(2)
12211241
# XXX: we might want to raise KeyError or IndexError instead?
1222-
with must_raise(ValueError, match="'b1' is not a valid label for any axis"):
1242+
with must_raise(ValueError, "'b1' is not a valid label for any axis:\n a [2]: 'a0' 'a1'"):
12231243
arr.points['a0', 'b1'] = 42
12241244

12251245
# test when broadcasting is involved
@@ -1943,6 +1963,14 @@ def test_group_agg_guess_axis(array):
19431963
arr = ndtest(4)
19441964
assert arr.sum('a3,a1') == 4
19451965

1966+
# ambiguous label and anonymous axes
1967+
arr = ndtest([Axis("b1,b2"), Axis("b0..b2")])
1968+
msg = """'b1' is ambiguous, it is valid in the following axes:
1969+
{0} [2]: 'b1' 'b2'
1970+
{1} [3]: 'b0' 'b1' 'b2'"""
1971+
with must_raise(ValueError, msg=msg):
1972+
arr.sum('b1;b0,b1')
1973+
19461974

19471975
def test_group_agg_label_group(array):
19481976
age, geo, sex, lipro = array.axes
@@ -2423,7 +2451,8 @@ def test_sum_with_groups_from_other_axis(small_array):
24232451
# use a group (from another axis) which is incompatible with the axis of
24242452
# the same name in the array
24252453
lipro4 = Axis('lipro=P01,P03,P16')
2426-
with pytest.raises(ValueError, match=r"lipro\['P01', 'P16'\] is not a valid label for any axis"):
2454+
codes = "'P01', 'P02', 'P03', 'P04', 'P05', 'P06', 'P07', 'P08', 'P09', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15'"
2455+
with must_raise(ValueError, f"lipro['P01', 'P16'] is not a valid label for the 'lipro' axis with labels: {codes}"):
24272456
small_array.sum(lipro4['P01,P16'])
24282457

24292458

larray/tests/test_axiscollection.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,5 +370,14 @@ def test_repr(col):
370370
])"""
371371

372372

373+
def test_setlabels():
374+
# test when the label is ambiguous AND the axes are anonymous
375+
axes = AxisCollection([Axis("b1,b2"), Axis("b0..b2")])
376+
with must_raise(ValueError, msg="""'b1' is ambiguous, it is valid in the following axes:
377+
{0} [2]: 'b1' 'b2'
378+
{1} [3]: 'b0' 'b1' 'b2'"""):
379+
axes.set_labels({'b1': 'b_one'})
380+
381+
373382
if __name__ == "__main__":
374383
pytest.main()

0 commit comments

Comments
 (0)