diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index eea8beb130..8874a4edb8 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -937,7 +937,7 @@ def drop( columns = labels block = self._block - if index: + if index is not None: level_id = self._resolve_levels(level or 0)[0] if utils.is_list_like(index): @@ -947,6 +947,8 @@ def drop( block, condition_id = block.apply_unary_op( inverse_condition_id, ops.invert_op ) + elif isinstance(index, indexes.Index): + return self._drop_by_index(index) else: block, condition_id = block.apply_unary_op( level_id, ops.partial_right(ops.ne_op, index) @@ -956,10 +958,31 @@ def drop( ) if columns: block = block.drop_columns(self._sql_names(columns)) - if not index and not columns: + if index is None and not columns: raise ValueError("Must specify 'labels' or 'index'/'columns") return DataFrame(block) + def _drop_by_index(self, index: indexes.Index) -> DataFrame: + block = index._data._get_block() + block, ordering_col = block.promote_offsets() + joined_index, (get_column_left, get_column_right) = self._block.index.join( + block.index + ) + + new_ordering_col = get_column_right(ordering_col) + drop_block = joined_index._block + drop_block, drop_col = drop_block.apply_unary_op( + new_ordering_col, + ops.isnull_op, + ) + + drop_block = drop_block.filter(drop_col) + original_columns = [ + get_column_left(column) for column in self._block.value_columns + ] + drop_block = drop_block.select_columns(original_columns) + return DataFrame(drop_block) + def droplevel(self, level: LevelsType, axis: int | str = 0): axis_n = utils.get_axis_number(axis) if axis_n == 0: diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index b8616a54d6..19ea9b8ae5 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -258,6 +258,61 @@ def test_drop_index(scalars_dfs): pd.testing.assert_frame_equal(pd_result, bf_result) +def test_drop_pandas_index(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + drop_index = scalars_pandas_df.iloc[[4, 1, 2]].index + + pd_result = scalars_pandas_df.drop(index=drop_index) + bf_result = scalars_df.drop(index=drop_index).to_pandas() + + pd.testing.assert_frame_equal(pd_result, bf_result) + + +def test_drop_bigframes_index(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + drop_index = scalars_df.loc[[4, 1, 2]].index + drop_pandas_index = scalars_pandas_df.loc[[4, 1, 2]].index + + pd_result = scalars_pandas_df.drop(index=drop_pandas_index) + bf_result = scalars_df.drop(index=drop_index).to_pandas() + + pd.testing.assert_frame_equal(pd_result, bf_result) + + +def test_drop_bigframes_index_with_na(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + scalars_df = scalars_df.copy() + scalars_pandas_df = scalars_pandas_df.copy() + scalars_df = scalars_df.set_index("bytes_col") + scalars_pandas_df = scalars_pandas_df.set_index("bytes_col") + drop_index = scalars_df.iloc[[3, 5]].index + drop_pandas_index = scalars_pandas_df.iloc[[3, 5]].index + + pd_result = scalars_pandas_df.drop(index=drop_pandas_index) # drop_pandas_index) + bf_result = scalars_df.drop(index=drop_index).to_pandas() + + pd.testing.assert_frame_equal(pd_result, bf_result) + + +def test_drop_bigframes_multiindex(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + scalars_df = scalars_df.copy() + scalars_pandas_df = scalars_pandas_df.copy() + sub_df = scalars_df.iloc[[4, 1, 2]] + sub_pandas_df = scalars_pandas_df.iloc[[4, 1, 2]] + sub_df = sub_df.set_index(["bytes_col", "numeric_col"]) + sub_pandas_df = sub_pandas_df.set_index(["bytes_col", "numeric_col"]) + drop_index = sub_df.index + drop_pandas_index = sub_pandas_df.index + + scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"]) + scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"]) + bf_result = scalars_df.drop(index=drop_index).to_pandas() + pd_result = scalars_pandas_df.drop(index=drop_pandas_index) + + pd.testing.assert_frame_equal(pd_result, bf_result) + + def test_drop_labels_axis_0(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs