From 93d3a00e25bc5de1ac54353960c00d3935d45063 Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Wed, 27 Sep 2023 17:03:38 +0000
Subject: [PATCH 01/15] feat: Allow passing index objects to df.drop

---
 bigframes/dataframe.py                               | 12 +++++++++---
 .../.ipynb_checkpoints/Untitled-checkpoint.ipynb     |  6 ++++++
 .../small/.ipynb_checkpoints/untitled-checkpoint.py  |  0
 tests/system/small/test_dataframe.py                 | 10 ++++++++++
 4 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
 create mode 100644 tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 113355589b..c0e6ed9ccf 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -906,16 +906,22 @@ def drop(
                 columns = labels
 
         block = self._block
-        if index:
+        if index is not None:
             level_id = self._resolve_levels(level or 0)[0]
 
-            if utils.is_list_like(index):
+            if utils.is_list_like(index) or isinstance(index, pandas.Index):
                 block, inverse_condition_id = block.apply_unary_op(
                     level_id, ops.IsInOp(index, match_nulls=True)
                 )
                 block, condition_id = block.apply_unary_op(
                     inverse_condition_id, ops.invert_op
                 )
+            elif isinstance(index, indexes.Index):
+                # idea: make a value column with the same values as index
+                # align index with self so that the new value column is NA
+                # for rows that weren't in index originally
+                # then filter by the index's value column == self index
+                pass
             else:
                 block, condition_id = block.apply_unary_op(
                     level_id, ops.partial_right(ops.ne_op, index)
@@ -925,7 +931,7 @@ def drop(
             )
         if columns:
             block = block.drop_columns(self._sql_names(columns))
-        if not index and not columns:
+        if index is None and not columns:
             raise ValueError("Must specify 'labels' or 'index'/'columns")
         return DataFrame(block)
 
diff --git a/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
new file mode 100644
index 0000000000..363fcab7ed
--- /dev/null
+++ b/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py b/tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index adf17848ee..d3670fdfee 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -258,6 +258,16 @@ def test_drop_index(scalars_dfs):
     pd.testing.assert_frame_equal(pd_result, bf_result)
 
 
+def test_drop_pandas_index(scalars_dfs):
+    # TODO
+    pass
+
+
+def test_drop_bigframes_index(scalars_dfs):
+    # TODO
+    pass
+
+
 def test_drop_labels_axis_0(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
 

From ee8eb0d431e72890ea52aa952f9ef8c5e7f2a5d9 Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Wed, 27 Sep 2023 22:06:16 +0000
Subject: [PATCH 02/15] remove notebook files

---
 .../small/.ipynb_checkpoints/Untitled-checkpoint.ipynb      | 6 ------
 .../system/small/.ipynb_checkpoints/untitled-checkpoint.py  | 0
 2 files changed, 6 deletions(-)
 delete mode 100644 tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
 delete mode 100644 tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py

diff --git a/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
deleted file mode 100644
index 363fcab7ed..0000000000
--- a/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py b/tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py
deleted file mode 100644
index e69de29bb2..0000000000

From 0fc8dd76d84cc6013a65d123e4d4ac4b3d961902 Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Thu, 28 Sep 2023 01:04:42 +0000
Subject: [PATCH 03/15] add first implementation for df.drop(index)

---
 bigframes/dataframe.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index c0e6ed9ccf..50a5514290 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -917,11 +917,22 @@ def drop(
                     inverse_condition_id, ops.invert_op
                 )
             elif isinstance(index, indexes.Index):
-                # idea: make a value column with the same values as index
-                # align index with self so that the new value column is NA
-                # for rows that weren't in index originally
-                # then filter by the index's value column == self index
-                pass
+                block = index._data._get_block()
+                original_value_columns = block.value_columns
+                block = blocks.Block(block._expr, [], block._expr.column_names.keys())
+                level_names = ["level_" + str(n) for n in range(index.nlevels)]
+                block = block.set_index(level_names, drop=False)
+                index_df = DataFrame(block)
+                index_df = index_df.drop(columns=original_value_columns)
+                df_with_indices_to_drop = self.join(index_df)
+                bool_series = df_with_indices_to_drop["level_0"].isna()
+                for i in range(1, index.nlevels):
+                    bool_series = (
+                        bool_series & df_with_indices_to_drop[level_names[i]].isna()
+                    )
+                result = df_with_indices_to_drop[bool_series]
+                result = result.drop(columns=level_names)
+                return result
             else:
                 block, condition_id = block.apply_unary_op(
                     level_id, ops.partial_right(ops.ne_op, index)

From 5cdfff91d225bc71e414cd4d5235d533eb0bd33c Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Thu, 28 Sep 2023 15:13:56 +0000
Subject: [PATCH 04/15] use index_columns property

---
 bigframes/dataframe.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 50a5514290..ecf6a19199 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -919,19 +919,19 @@ def drop(
             elif isinstance(index, indexes.Index):
                 block = index._data._get_block()
                 original_value_columns = block.value_columns
+                original_index_columns = block.index_columns
                 block = blocks.Block(block._expr, [], block._expr.column_names.keys())
-                level_names = ["level_" + str(n) for n in range(index.nlevels)]
-                block = block.set_index(level_names, drop=False)
+                block = block.set_index(original_index_columns, drop=False)
                 index_df = DataFrame(block)
                 index_df = index_df.drop(columns=original_value_columns)
                 df_with_indices_to_drop = self.join(index_df)
-                bool_series = df_with_indices_to_drop["level_0"].isna()
-                for i in range(1, index.nlevels):
+                bool_series = df_with_indices_to_drop[original_index_columns[0]].isna()
+                for index_name in original_index_columns[1:]:
                     bool_series = (
-                        bool_series & df_with_indices_to_drop[level_names[i]].isna()
+                        bool_series & df_with_indices_to_drop[index_name].isna()
                     )
                 result = df_with_indices_to_drop[bool_series]
-                result = result.drop(columns=level_names)
+                result = result.drop(columns=list(original_index_columns))
                 return result
             else:
                 block, condition_id = block.apply_unary_op(

From 99f2f59a244b333c519060d44116074f3b2b318b Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Thu, 28 Sep 2023 15:17:15 +0000
Subject: [PATCH 05/15] don't use _expr.keys()

---
 bigframes/dataframe.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index ecf6a19199..6c8e30557d 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -918,9 +918,11 @@ def drop(
                 )
             elif isinstance(index, indexes.Index):
                 block = index._data._get_block()
-                original_value_columns = block.value_columns
-                original_index_columns = block.index_columns
-                block = blocks.Block(block._expr, [], block._expr.column_names.keys())
+                original_value_columns = list(block.value_columns)
+                original_index_columns = list(block.index_columns)
+                block = blocks.Block(
+                    block._expr, [], original_value_columns + original_index_columns
+                )
                 block = block.set_index(original_index_columns, drop=False)
                 index_df = DataFrame(block)
                 index_df = index_df.drop(columns=original_value_columns)
@@ -931,7 +933,7 @@ def drop(
                         bool_series & df_with_indices_to_drop[index_name].isna()
                     )
                 result = df_with_indices_to_drop[bool_series]
-                result = result.drop(columns=list(original_index_columns))
+                result = result.drop(columns=original_index_columns)
                 return result
             else:
                 block, condition_id = block.apply_unary_op(

From f4db207d7b0c2020176cdc1aad8d052d77cc33eb Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Thu, 28 Sep 2023 16:08:00 +0000
Subject: [PATCH 06/15] fix order bug and add test

---
 bigframes/dataframe.py                        |  2 +-
 .../Untitled-checkpoint.ipynb                 |  6 +++++
 .../.ipynb_checkpoints/untitled-checkpoint.py |  0
 tests/system/small/test_dataframe.py          | 22 +++++++++++++++----
 4 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
 create mode 100644 tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 6c8e30557d..68f185f15a 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -921,7 +921,7 @@ def drop(
                 original_value_columns = list(block.value_columns)
                 original_index_columns = list(block.index_columns)
                 block = blocks.Block(
-                    block._expr, [], original_value_columns + original_index_columns
+                    block._expr, [], original_index_columns + original_value_columns
                 )
                 block = block.set_index(original_index_columns, drop=False)
                 index_df = DataFrame(block)
diff --git a/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
new file mode 100644
index 0000000000..363fcab7ed
--- /dev/null
+++ b/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py b/tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index d3670fdfee..f0e96951f8 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -259,13 +259,27 @@ def test_drop_index(scalars_dfs):
 
 
 def test_drop_pandas_index(scalars_dfs):
-    # TODO
-    pass
+    scalars_df, scalars_pandas_df = scalars_dfs
+    drop_index = scalars_pandas_df.iloc[[4, 1, 2]].index
+
+    pd_result = scalars_pandas_df.drop(index=drop_index)
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
 
 
 def test_drop_bigframes_index(scalars_dfs):
-    # TODO
-    pass
+    scalars_df, scalars_pandas_df = scalars_dfs
+    drop_index = scalars_df.loc[[4, 1, 2]].index
+    drop_pandas_index = scalars_pandas_df.loc[[4, 1, 2]].index
+
+    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+
+    print(pd_result)
+    print(bf_result)
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
 
 
 def test_drop_labels_axis_0(scalars_dfs):

From ec7c6a743b0feddc1464a6b31ee6092a4e1d3517 Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Thu, 28 Sep 2023 16:10:44 +0000
Subject: [PATCH 07/15] fix index names

---
 bigframes/dataframe.py               | 2 ++
 tests/system/small/test_dataframe.py | 3 ---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 68f185f15a..8c32097338 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -920,6 +920,7 @@ def drop(
                 block = index._data._get_block()
                 original_value_columns = list(block.value_columns)
                 original_index_columns = list(block.index_columns)
+                original_index_names = self.index.names
                 block = blocks.Block(
                     block._expr, [], original_index_columns + original_value_columns
                 )
@@ -934,6 +935,7 @@ def drop(
                     )
                 result = df_with_indices_to_drop[bool_series]
                 result = result.drop(columns=original_index_columns)
+                result.index.names = original_index_names
                 return result
             else:
                 block, condition_id = block.apply_unary_op(
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index f0e96951f8..a6ef62eabe 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -276,9 +276,6 @@ def test_drop_bigframes_index(scalars_dfs):
     pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
     bf_result = scalars_df.drop(index=drop_index).to_pandas()
 
-    print(pd_result)
-    print(bf_result)
-
     pd.testing.assert_frame_equal(pd_result, bf_result)
 
 

From fd22b014c0de6f382a923fe72f809c6ca5d22591 Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Thu, 28 Sep 2023 18:03:46 +0000
Subject: [PATCH 08/15] support multiindex

---
 bigframes/dataframe.py               | 65 +++++++++++++++++++---------
 tests/system/small/test_dataframe.py | 36 +++++++++++++++
 2 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 8c32097338..e7037d2a48 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -917,26 +917,7 @@ def drop(
                     inverse_condition_id, ops.invert_op
                 )
             elif isinstance(index, indexes.Index):
-                block = index._data._get_block()
-                original_value_columns = list(block.value_columns)
-                original_index_columns = list(block.index_columns)
-                original_index_names = self.index.names
-                block = blocks.Block(
-                    block._expr, [], original_index_columns + original_value_columns
-                )
-                block = block.set_index(original_index_columns, drop=False)
-                index_df = DataFrame(block)
-                index_df = index_df.drop(columns=original_value_columns)
-                df_with_indices_to_drop = self.join(index_df)
-                bool_series = df_with_indices_to_drop[original_index_columns[0]].isna()
-                for index_name in original_index_columns[1:]:
-                    bool_series = (
-                        bool_series & df_with_indices_to_drop[index_name].isna()
-                    )
-                result = df_with_indices_to_drop[bool_series]
-                result = result.drop(columns=original_index_columns)
-                result.index.names = original_index_names
-                return result
+                return self._drop_by_index(index)
             else:
                 block, condition_id = block.apply_unary_op(
                     level_id, ops.partial_right(ops.ne_op, index)
@@ -950,6 +931,50 @@ def drop(
             raise ValueError("Must specify 'labels' or 'index'/'columns")
         return DataFrame(block)
 
+    def _drop_by_index(self, index: indexes.Index):
+        block = index._data._get_block()
+        original_value_columns = list(block.value_columns)
+        original_index_columns = list(block.index_columns)
+        original_index_names = self.index.names
+        # move all the columns to value columns
+        block = blocks.Block(
+            block._expr, [], original_index_columns + original_value_columns
+        )
+        # additionally restore index columns in order to join
+        block = block.set_index(original_index_columns, drop=False)
+        index_df = DataFrame(block)
+        original_isna = index_df[original_index_columns[0]].isna()
+        for index_name in original_index_columns[1:]:
+            original_isna = original_isna & index_df[index_name].isna()
+        # used to drop NA-labeled rows later
+        original_has_all_na_row = original_isna.any()
+
+        # value columns on the index argument are superfluous and could cause
+        # name conflicts, so we drop them
+        index_df = index_df.drop(columns=original_value_columns)
+        index_df.index.names = original_index_names
+        df_with_indices_to_drop = self.join(index_df)
+        # df_with_indices_to_drop has columns from the original index argument's
+        # index columns, and if all such columns are <NA> for a row, it means that
+        # row was not listed and therefore should be kept. All rows with entries in
+        # the original index argument should be dropped.
+        bool_series = df_with_indices_to_drop[original_index_columns[0]].isna()
+        for index_name in original_index_columns[1:]:
+            bool_series = bool_series & df_with_indices_to_drop[index_name].isna()
+        result = df_with_indices_to_drop[bool_series]
+        result = result.drop(columns=original_index_columns)
+        result.index.names = original_index_names
+        # if the user passed a <NA> label to drop, it will not be dropped yet,
+        # so we drop all <NA> labeled rows here if needed
+        if original_has_all_na_row:
+            num_keys = len(original_index_columns)
+            if num_keys == 1:
+                result = result.drop(index=[None])
+            else:
+                none_key = [tuple([None] * num_keys)]
+                result = result.drop(index=none_key)
+        return result
+
     def droplevel(self, level: LevelsType, axis: int | str = 0):
         axis_n = utils.get_axis_number(axis)
         if axis_n == 0:
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index a6ef62eabe..e957823182 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -279,6 +279,42 @@ def test_drop_bigframes_index(scalars_dfs):
     pd.testing.assert_frame_equal(pd_result, bf_result)
 
 
+def test_drop_bigframes_index_with_na(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_df = scalars_df.set_index("bytes_col")
+    scalars_pandas_df = scalars_pandas_df.set_index("bytes_col")
+    drop_index = scalars_df.iloc[[3, 5]].index
+    drop_pandas_index = scalars_pandas_df.iloc[[3, 5]].index
+
+    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)  # drop_pandas_index)
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+
+    print(pd_result)
+    print(bf_result)
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_bigframes_multiindex(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    sub_df = scalars_df.iloc[[4, 1, 2]]
+    sub_pandas_df = scalars_pandas_df.iloc[[4, 1, 2]]
+    sub_df = sub_df.set_index(["bytes_col", "numeric_col"])
+    sub_pandas_df = sub_pandas_df.set_index(["bytes_col", "numeric_col"])
+    drop_index = sub_df.index
+    drop_pandas_index = sub_pandas_df.index
+
+    scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"])
+    scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"])
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
 def test_drop_labels_axis_0(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
 

From b0b0533bbd1affb48f3ac8c494913db4fc649fc0 Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Thu, 28 Sep 2023 18:05:04 +0000
Subject: [PATCH 09/15] remove accidentally added files

---
 .../small/.ipynb_checkpoints/Untitled-checkpoint.ipynb      | 6 ------
 .../system/small/.ipynb_checkpoints/untitled-checkpoint.py  | 0
 2 files changed, 6 deletions(-)
 delete mode 100644 tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
 delete mode 100644 tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py

diff --git a/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
deleted file mode 100644
index 363fcab7ed..0000000000
--- a/tests/system/small/.ipynb_checkpoints/Untitled-checkpoint.ipynb
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py b/tests/system/small/.ipynb_checkpoints/untitled-checkpoint.py
deleted file mode 100644
index e69de29bb2..0000000000

From 9a5afdd046817a08ce9cf5e306705407a3ced593 Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Thu, 28 Sep 2023 18:21:21 +0000
Subject: [PATCH 10/15] add type hint

---
 bigframes/dataframe.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index e7037d2a48..5feca9c4bd 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -931,7 +931,7 @@ def drop(
             raise ValueError("Must specify 'labels' or 'index'/'columns")
         return DataFrame(block)
 
-    def _drop_by_index(self, index: indexes.Index):
+    def _drop_by_index(self, index: indexes.Index) -> DataFrame:
         block = index._data._get_block()
         original_value_columns = list(block.value_columns)
         original_index_columns = list(block.index_columns)
@@ -943,6 +943,7 @@ def _drop_by_index(self, index: indexes.Index):
         # additionally restore index columns in order to join
         block = block.set_index(original_index_columns, drop=False)
         index_df = DataFrame(block)
+        index_df.index.names = original_index_names
         original_isna = index_df[original_index_columns[0]].isna()
         for index_name in original_index_columns[1:]:
             original_isna = original_isna & index_df[index_name].isna()
@@ -952,7 +953,6 @@ def _drop_by_index(self, index: indexes.Index):
         # value columns on the index argument are superfluous and could cause
         # name conflicts, so we drop them
         index_df = index_df.drop(columns=original_value_columns)
-        index_df.index.names = original_index_names
         df_with_indices_to_drop = self.join(index_df)
         # df_with_indices_to_drop has columns from the original index argument's
         # index columns, and if all such columns are <NA> for a row, it means that
@@ -963,7 +963,6 @@ def _drop_by_index(self, index: indexes.Index):
             bool_series = bool_series & df_with_indices_to_drop[index_name].isna()
         result = df_with_indices_to_drop[bool_series]
         result = result.drop(columns=original_index_columns)
-        result.index.names = original_index_names
         # if the user passed a <NA> label to drop, it will not be dropped yet,
         # so we drop all <NA> labeled rows here if needed
         if original_has_all_na_row:

From 451b901ea19051a70ddc97a791258e41756c31bd Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Thu, 28 Sep 2023 19:31:39 +0000
Subject: [PATCH 11/15] remove debug print statements

---
 tests/system/small/test_dataframe.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index e957823182..72f2ae47bc 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -291,8 +291,6 @@ def test_drop_bigframes_index_with_na(scalars_dfs):
     pd_result = scalars_pandas_df.drop(index=drop_pandas_index)  # drop_pandas_index)
     bf_result = scalars_df.drop(index=drop_index).to_pandas()
 
-    print(pd_result)
-    print(bf_result)
     pd.testing.assert_frame_equal(pd_result, bf_result)
 
 

From 6a5ac4fa5aef976d18a388da53aaedcfd51308b4 Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Mon, 2 Oct 2023 22:41:04 +0000
Subject: [PATCH 12/15] fix: fix df/series.iloc by list with multiindex

---
 bigframes/core/indexers.py           |  15 ++--
 tests/system/small/test_dataframe.py | 114 ++++++---------------------
 2 files changed, 31 insertions(+), 98 deletions(-)

diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
index a538c80711..e22a5aed85 100644
--- a/bigframes/core/indexers.py
+++ b/bigframes/core/indexers.py
@@ -332,8 +332,6 @@ def _iloc_getitem_series_or_dataframe(
     elif isinstance(key, slice):
         return series_or_dataframe._slice(key.start, key.stop, key.step)
     elif pd.api.types.is_list_like(key):
-        # TODO(henryjsolberg): support MultiIndex
-
         if len(key) == 0:
             return typing.cast(
                 typing.Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
@@ -346,15 +344,18 @@ def _iloc_getitem_series_or_dataframe(
                 original_series_name if original_series_name is not None else "0"
             )
             df = series_or_dataframe.to_frame()
-        original_index_name = df.index.name
-        temporary_index_name = guid.generate_guid(prefix="temp_iloc_index_")
-        df = df.rename_axis(temporary_index_name)
+        original_index_names = df.index.names
+        temporary_index_names = [
+            guid.generate_guid(prefix="temp_iloc_index_")
+            for _ in range(len(df.index.names))
+        ]
+        df = df.rename_axis(temporary_index_names)
 
         # set to offset index and use regular loc, then restore index
         df = df.reset_index(drop=False)
         result = df.loc[key]
-        result = result.set_index(temporary_index_name)
-        result = result.rename_axis(original_index_name)
+        result = result.set_index(temporary_index_names)
+        result = result.rename_axis(original_index_names)
 
         if isinstance(series_or_dataframe, bigframes.series.Series):
             result = result[series_name]
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 3d50a609b2..afba8804d3 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -1266,77 +1266,6 @@ def test_combine(
     pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
 
-@pytest.mark.parametrize(
-    ("overwrite", "filter_func"),
-    [
-        (True, None),
-        (False, None),
-        (True, lambda x: x.isna() | (x % 2 == 0)),
-    ],
-    ids=[
-        "default",
-        "overwritefalse",
-        "customfilter",
-    ],
-)
-def test_df_update(overwrite, filter_func):
-    if pd.__version__.startswith("1."):
-        pytest.skip("dtype handled differently in pandas 1.x.")
-    index1 = pandas.Index([1, 2, 3, 4], dtype="Int64")
-    index2 = pandas.Index([1, 2, 4, 5], dtype="Int64")
-    pd_df1 = pandas.DataFrame(
-        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
-    )
-    pd_df2 = pandas.DataFrame(
-        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
-        dtype="Int64",
-        index=index2,
-    )
-
-    bf_df1 = dataframe.DataFrame(pd_df1)
-    bf_df2 = dataframe.DataFrame(pd_df2)
-
-    bf_df1.update(bf_df2, overwrite=overwrite, filter_func=filter_func)
-    pd_df1.update(pd_df2, overwrite=overwrite, filter_func=filter_func)
-
-    pd.testing.assert_frame_equal(bf_df1.to_pandas(), pd_df1)
-
-
-@pytest.mark.parametrize(
-    ("join", "axis"),
-    [
-        ("outer", None),
-        ("outer", 0),
-        ("outer", 1),
-        ("left", 0),
-        ("right", 1),
-        ("inner", None),
-        ("inner", 1),
-    ],
-)
-def test_df_align(join, axis):
-    index1 = pandas.Index([1, 2, 3, 4], dtype="Int64")
-    index2 = pandas.Index([1, 2, 4, 5], dtype="Int64")
-    pd_df1 = pandas.DataFrame(
-        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
-    )
-    pd_df2 = pandas.DataFrame(
-        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
-        dtype="Int64",
-        index=index2,
-    )
-
-    bf_df1 = dataframe.DataFrame(pd_df1)
-    bf_df2 = dataframe.DataFrame(pd_df2)
-
-    bf_result1, bf_result2 = bf_df1.align(bf_df2, join=join, axis=axis)
-    pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis)
-
-    # Don't check dtype as pandas does unnecessary float conversion
-    pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)
-    pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)
-
-
 def test_combine_first(
     scalars_df_index,
     scalars_df_2_index,
@@ -1358,6 +1287,11 @@ def test_combine_first(
     pd_df_b.columns = ["b", "a", "d"]
     pd_result = pd_df_a.combine_first(pd_df_b)
 
+    print("pandas")
+    print(pd_result.to_string())
+    print("bigframes")
+    print(bf_result.to_string())
+
     # Some dtype inconsistency for all-NULL columns
     pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
@@ -1826,26 +1760,6 @@ def test_df_stack(scalars_dfs):
     pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
 
 
-def test_df_unstack(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    # To match bigquery dataframes
-    scalars_pandas_df = scalars_pandas_df.copy()
-    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
-    # Can only stack identically-typed columns
-    columns = [
-        "rowindex_2",
-        "int64_col",
-        "int64_too",
-    ]
-
-    # unstack on mono-index produces series
-    bf_result = scalars_df[columns].unstack().to_pandas()
-    pd_result = scalars_pandas_df[columns].unstack()
-
-    # Pandas produces NaN, where bq dataframes produces pd.NA
-    pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
-
-
 @pytest.mark.parametrize(
     ("values", "index", "columns"),
     [
@@ -2580,6 +2494,24 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index):
     )
 
 
+def test_iloc_list_multiindex(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"])
+    scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"])
+
+    index_list = [0, 0, 0, 5, 4, 7]
+
+    bf_result = scalars_df.iloc[index_list]
+    pd_result = scalars_pandas_df.iloc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
 def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index):
     index_list = []
 

From 01334bd8d09908503adbe6bb857965385096b5f8 Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Tue, 3 Oct 2023 17:06:49 +0000
Subject: [PATCH 13/15] complete merge

---
 tests/system/small/test_dataframe.py | 96 ++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 5 deletions(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index ff9799d16b..db5689a7d5 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -1299,6 +1299,77 @@ def test_combine(
     pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
 
+@pytest.mark.parametrize(
+    ("overwrite", "filter_func"),
+    [
+        (True, None),
+        (False, None),
+        (True, lambda x: x.isna() | (x % 2 == 0)),
+    ],
+    ids=[
+        "default",
+        "overwritefalse",
+        "customfilter",
+    ],
+)
+def test_df_update(overwrite, filter_func):
+    if pd.__version__.startswith("1."):
+        pytest.skip("dtype handled differently in pandas 1.x.")
+    index1 = pandas.Index([1, 2, 3, 4], dtype="Int64")
+    index2 = pandas.Index([1, 2, 4, 5], dtype="Int64")
+    pd_df1 = pandas.DataFrame(
+        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
+    )
+    pd_df2 = pandas.DataFrame(
+        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
+        dtype="Int64",
+        index=index2,
+    )
+
+    bf_df1 = dataframe.DataFrame(pd_df1)
+    bf_df2 = dataframe.DataFrame(pd_df2)
+
+    bf_df1.update(bf_df2, overwrite=overwrite, filter_func=filter_func)
+    pd_df1.update(pd_df2, overwrite=overwrite, filter_func=filter_func)
+
+    pd.testing.assert_frame_equal(bf_df1.to_pandas(), pd_df1)
+
+
+@pytest.mark.parametrize(
+    ("join", "axis"),
+    [
+        ("outer", None),
+        ("outer", 0),
+        ("outer", 1),
+        ("left", 0),
+        ("right", 1),
+        ("inner", None),
+        ("inner", 1),
+    ],
+)
+def test_df_align(join, axis):
+    index1 = pandas.Index([1, 2, 3, 4], dtype="Int64")
+    index2 = pandas.Index([1, 2, 4, 5], dtype="Int64")
+    pd_df1 = pandas.DataFrame(
+        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
+    )
+    pd_df2 = pandas.DataFrame(
+        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
+        dtype="Int64",
+        index=index2,
+    )
+
+    bf_df1 = dataframe.DataFrame(pd_df1)
+    bf_df2 = dataframe.DataFrame(pd_df2)
+
+    bf_result1, bf_result2 = bf_df1.align(bf_df2, join=join, axis=axis)
+    pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis)
+
+    # Don't check dtype as pandas does unnecessary float conversion
+    pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)
+    pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)
+
+
 def test_combine_first(
     scalars_df_index,
     scalars_df_2_index,
@@ -1320,11 +1391,6 @@ def test_combine_first(
     pd_df_b.columns = ["b", "a", "d"]
     pd_result = pd_df_a.combine_first(pd_df_b)
 
-    print("pandas")
-    print(pd_result.to_string())
-    print("bigframes")
-    print(bf_result.to_string())
-
     # Some dtype inconsistency for all-NULL columns
     pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
@@ -1793,6 +1859,26 @@ def test_df_stack(scalars_dfs):
     pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
 
 
+def test_df_unstack(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # To match bigquery dataframes
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
+    # Can only stack identically-typed columns
+    columns = [
+        "rowindex_2",
+        "int64_col",
+        "int64_too",
+    ]
+
+    # unstack on mono-index produces series
+    bf_result = scalars_df[columns].unstack().to_pandas()
+    pd_result = scalars_pandas_df[columns].unstack()
+
+    # Pandas produces NaN, where bq dataframes produces pd.NA
+    pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     ("values", "index", "columns"),
     [

From a400b1d5cfba49fc7da26c34ddf56c285df15bef Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Tue, 3 Oct 2023 17:09:40 +0000
Subject: [PATCH 14/15] remove unneeded isinstance

---
 bigframes/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 476aa53cbe..a2104f81dd 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -932,7 +932,7 @@ def drop(
         if index is not None:
             level_id = self._resolve_levels(level or 0)[0]
 
-            if utils.is_list_like(index) or isinstance(index, pandas.Index):
+            if utils.is_list_like(index):
                 block, inverse_condition_id = block.apply_unary_op(
                     level_id, ops.IsInOp(index, match_nulls=True)
                 )

From afbf8c3a3ab0e6fa188504cd1ac90b22805b163d Mon Sep 17 00:00:00 2001
From: Henry J Solberg <henryjsolberg@google.com>
Date: Tue, 3 Oct 2023 20:08:01 +0000
Subject: [PATCH 15/15] refactor _drop_by_index

---
 bigframes/dataframe.py | 56 +++++++++++++-----------------------------
 1 file changed, 17 insertions(+), 39 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index a2104f81dd..1770898345 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -956,46 +956,24 @@ def drop(
 
     def _drop_by_index(self, index: indexes.Index) -> DataFrame:
         block = index._data._get_block()
-        original_value_columns = list(block.value_columns)
-        original_index_columns = list(block.index_columns)
-        original_index_names = self.index.names
-        # move all the columns to value columns
-        block = blocks.Block(
-            block._expr, [], original_index_columns + original_value_columns
+        block, ordering_col = block.promote_offsets()
+        joined_index, (get_column_left, get_column_right) = self._block.index.join(
+            block.index
         )
-        # additionally restore index columns in order to join
-        block = block.set_index(original_index_columns, drop=False)
-        index_df = DataFrame(block)
-        index_df.index.names = original_index_names
-        original_isna = index_df[original_index_columns[0]].isna()
-        for index_name in original_index_columns[1:]:
-            original_isna = original_isna & index_df[index_name].isna()
-        # used to drop NA-labeled rows later
-        original_has_all_na_row = original_isna.any()
-
-        # value columns on the index argument are superfluous and could cause
-        # name conflicts, so we drop them
-        index_df = index_df.drop(columns=original_value_columns)
-        df_with_indices_to_drop = self.join(index_df)
-        # df_with_indices_to_drop has columns from the original index argument's
-        # index columns, and if all such columns are <NA> for a row, it means that
-        # row was not listed and therefore should be kept. All rows with entries in
-        # the original index argument should be dropped.
-        bool_series = df_with_indices_to_drop[original_index_columns[0]].isna()
-        for index_name in original_index_columns[1:]:
-            bool_series = bool_series & df_with_indices_to_drop[index_name].isna()
-        result = df_with_indices_to_drop[bool_series]
-        result = result.drop(columns=original_index_columns)
-        # if the user passed a <NA> label to drop, it will not be dropped yet,
-        # so we drop all <NA> labeled rows here if needed
-        if original_has_all_na_row:
-            num_keys = len(original_index_columns)
-            if num_keys == 1:
-                result = result.drop(index=[None])
-            else:
-                none_key = [tuple([None] * num_keys)]
-                result = result.drop(index=none_key)
-        return result
+
+        new_ordering_col = get_column_right(ordering_col)
+        drop_block = joined_index._block
+        drop_block, drop_col = drop_block.apply_unary_op(
+            new_ordering_col,
+            ops.isnull_op,
+        )
+
+        drop_block = drop_block.filter(drop_col)
+        original_columns = [
+            get_column_left(column) for column in self._block.value_columns
+        ]
+        drop_block = drop_block.select_columns(original_columns)
+        return DataFrame(drop_block)
 
     def droplevel(self, level: LevelsType, axis: int | str = 0):
         axis_n = utils.get_axis_number(axis)