Skip to content

Commit e7bf633

Browse files
committed
do not create real Axis objects on Pandas, but rather small stub Axis with properties to retrieve labels
since computing (unique) labels on a MI index is expensive, we cache them
1 parent 3795af1 commit e7bf633

File tree

1 file changed

+58
-6
lines changed

1 file changed

+58
-6
lines changed

larray/core.py

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -591,7 +591,7 @@ def __str__(self):
591591
return self.name if self.name is not None else 'Unnamed axis'
592592

593593
def __repr__(self):
594-
return 'Axis(%r, %r)' % (self.name, self.labels.tolist())
594+
return 'Axis(%r, %r)' % (self.name, list(self.labels))
595595

596596
def __add__(self, other):
597597
if isinstance(other, Axis):
@@ -628,6 +628,51 @@ def sorted(self):
628628
return res
629629

630630

631+
class PandasAxis(Axis):
632+
def __init__(self, index):
633+
self.index = index
634+
635+
@property
636+
def name(self):
637+
return self.index.name
638+
639+
@property
640+
def labels(self):
641+
return self.index.values
642+
643+
@property
644+
def _mapping(self):
645+
raise NotImplementedError("_mapping")
646+
647+
def translate(self, key):
648+
raise NotImplementedError("translate")
649+
650+
def __contains__(self, key):
651+
return to_tick(key) in self.index
652+
653+
654+
class PandasMIAxis(PandasAxis):
655+
def __init__(self, index, level_num):
656+
assert isinstance(index, pd.MultiIndex)
657+
self.index = index
658+
self.level_num = level_num
659+
self._labels = None
660+
661+
@property
662+
def name(self):
663+
return self.index.names[self.level_num]
664+
665+
@property
666+
def labels(self):
667+
if self._labels is None:
668+
self._labels = _index_level_unique_labels(self.index,
669+
self.level_num)
670+
return self._labels
671+
672+
def __contains__(self, key):
673+
return to_tick(key) in self.labels
674+
675+
631676
# We need a separate class for ValueGroup and cannot simply create a
632677
# new Axis with a subset of values/ticks/labels: the subset of
633678
# ticks/labels of the ValueGroup need to correspond to its *Axis*
@@ -1349,8 +1394,10 @@ def _translate_axis_key(self, axis, key):
13491394
# in the actual Axis ticks (and Pandas Index) and NOT the VG itself
13501395
if key in axis:
13511396
# we check if the VG itself is *really* in the axis
1352-
idx = axis.translate(key)
1353-
if isinstance(axis.labels[idx], ValueGroup):
1397+
labels = list(axis.labels)
1398+
# we cannot check with "key in labels" either
1399+
idx = labels.index(key)
1400+
if isinstance(labels[idx], ValueGroup):
13541401
return key
13551402

13561403
key = key.key
@@ -1914,7 +1961,6 @@ def __setitem__(self, key, value, collapse_slices=True):
19141961
def _rename_axis(self, axis, newname):
19151962
"""inplace rename"""
19161963
axis = self.get_axis(axis)
1917-
axis.name = newname
19181964
pd_axis, level = self._df_axis_level(axis)
19191965
_pandas_rename_axis(self.data, pd_axis, level, newname)
19201966

@@ -2009,6 +2055,13 @@ def __init__(self, dtypes):
20092055
dict.__init__(self, dtypes)
20102056

20112057

2058+
def _pandas_axes(index):
2059+
if isinstance(index, pd.MultiIndex):
2060+
return [PandasMIAxis(index, level) for level in range(len(index.names))]
2061+
else:
2062+
return [PandasAxis(index)]
2063+
2064+
20122065
class DataFrameLArray(PandasLArray):
20132066
def __init__(self, data, axes=None):
20142067
"""
@@ -2037,8 +2090,7 @@ def __init__(self, data, axes=None):
20372090
# TODO: accept axes argument and check that it is consistent
20382091
# or possibly even override data in DataFrame?
20392092
assert axes is None
2040-
axes = [Axis(name, labels)
2041-
for name, labels in _df_levels(data, 0) + _df_levels(data, 1)]
2093+
axes = _pandas_axes(data.index) + _pandas_axes(data.columns)
20422094
else:
20432095
raise TypeError("data must be an numpy ndarray or pandas.DataFrame")
20442096

0 commit comments

Comments
 (0)