@@ -382,19 +382,30 @@ def split(self, sep='_', names=None, regex=None, return_labels=False) \
382
382
if not regex :
383
383
# np.char.split does not work on arrays with object dtype
384
384
labels = self .labels if self .labels .dtype .kind != 'O' else self .labels .astype (str )
385
- # gives us an array of lists
385
+ # split_labels is an array of lists
386
386
split_labels = np .char .split (labels , sep )
387
387
else :
388
388
match = re .compile (regex ).match
389
+ # split_labels is a list of tuples
389
390
split_labels = [match (label ).groups () for label in self .labels ]
391
+ first_split_label_length = len (split_labels [0 ])
392
+ # TODO: when our lowest supported version will be Python 3.10, we should use
393
+ # strict=True instead of checking lengths explicitly
394
+ if any (len (split_label ) != first_split_label_length
395
+ for split_label in split_labels ):
396
+ raise ValueError ("not all labels have the same number of separators" )
397
+ indexing_labels = tuple (zip (* split_labels ))
390
398
if names is None :
391
- names = [None ] * len (split_labels )
392
- indexing_labels = zip (* split_labels )
393
- if return_labels :
394
- indexing_labels = tuple (indexing_labels )
399
+ names = [None ] * first_split_label_length
400
+ num_axes = len (indexing_labels )
401
+ if num_axes != len (names ):
402
+ raise ValueError (f"number of resulting axes ({ num_axes } ) differs "
403
+ f"from number of resulting axes names "
404
+ f"({ len (names )} )" )
395
405
# not using np.unique because we want to keep the original order
396
406
split_axes = [Axis (unique_list (ax_labels ), name ) for ax_labels , name in zip (indexing_labels , names )]
397
407
if return_labels :
408
+ assert len (split_axes ) == num_axes
398
409
indexing_labels = tuple (axis [labels ] for axis , labels in zip (split_axes , indexing_labels ))
399
410
return split_axes , indexing_labels
400
411
else :
0 commit comments