From 70b17422cb14ae153438139a79dddaae258d421b Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Mon, 16 Sep 2024 20:50:46 +0000
Subject: [PATCH 1/7] chore: Use fixture for list/str accessor tests

---
 tests/data/repeated.jsonl                     |  4 +++
 tests/data/repeated_schema.json               | 12 +++++++
 tests/system/conftest.py                      | 36 +++++++++++++++++++
 tests/system/small/operations/test_lists.py   | 29 ++++++---------
 tests/system/small/operations/test_strings.py | 13 +++----
 5 files changed, 69 insertions(+), 25 deletions(-)
 create mode 100644 tests/data/repeated.jsonl
 create mode 100644 tests/data/repeated_schema.json

diff --git a/tests/data/repeated.jsonl b/tests/data/repeated.jsonl
new file mode 100644
index 0000000000..eb9c4317ac
--- /dev/null
+++ b/tests/data/repeated.jsonl
@@ -0,0 +1,4 @@
+{"rowindex": 0, "list_col": [1]}
+{"rowindex": 1, "list_col": [1,2]}
+{"rowindex": 2, "list_col": [1,2,3]}
+{"rowindex": 3, "list_col": [1,2,3,4]}
diff --git a/tests/data/repeated_schema.json b/tests/data/repeated_schema.json
new file mode 100644
index 0000000000..e6f32bd9e1
--- /dev/null
+++ b/tests/data/repeated_schema.json
@@ -0,0 +1,12 @@
+[
+    {
+        "name": "rowindex",
+        "type": "INTEGER",
+        "mode": "REQUIRED"
+    },
+    {
+        "name": "list_col",
+        "type": "INTEGER",
+        "mode": "REPEATED"
+    }
+]
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 49cd887cfd..99257c6c00 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -32,6 +32,7 @@
 import ibis.backends
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 import pytz
 import test_utils.prefixer
@@ -39,6 +40,7 @@
 import bigframes
 import bigframes.dataframe
 import bigframes.pandas as bpd
+import bigframes.series
 import tests.system.utils
 
 # Use this to control the number of cloud functions being deleted in a single
@@ -294,6 +296,7 @@ def load_test_data_tables(
         ("scalars", "scalars_schema.json", "scalars.jsonl"),
         ("scalars_too", "scalars_schema.json", "scalars.jsonl"),
         ("nested", "nested_schema.json", "nested.jsonl"),
+        ("repeated", "repeated_schema.json", "repeated.jsonl"),
         ("penguins", "penguins_schema.json", "penguins.jsonl"),
         ("time_series", "time_series_schema.json", "time_series.jsonl"),
         ("hockey_players", "hockey_players.json", "hockey_players.jsonl"),
@@ -369,6 +372,9 @@ def scalars_table_tokyo(test_data_tables_tokyo) -> str:
 def nested_table_id(test_data_tables) -> str:
     return test_data_tables["nested"]
 
+@pytest.fixture(scope="session")
+def repeated_table_id(test_data_tables) -> str:
+    return test_data_tables["repeated"]
 
 @pytest.fixture(scope="session")
 def penguins_table_id(test_data_tables) -> str:
@@ -409,6 +415,36 @@ def nested_pandas_df() -> pd.DataFrame:
     df = df.set_index("rowindex")
     return df
 
+@pytest.fixture(scope="session")
+def repeated_df(
+    repeated_table_id: str, session: bigframes.Session
+) -> bigframes.dataframe.DataFrame:
+    """Returns a DataFrame containing columns of list type."""
+    return session.read_gbq(repeated_table_id, index_col="rowindex")
+
+@pytest.fixture(scope="session")
+def repeated_series(
+    repeated_df: bigframes.dataframe.DataFrame
+) -> bigframes.series.Series:
+    """Returns a Series of lists"""
+    return repeated_df["list_col"]
+
+
+@pytest.fixture(scope="session")
+def repeated_pandas_df() -> pd.DataFrame:
+    """Returns a DataFrame containing columns of list type."""
+
+    df = pd.read_json(
+        DATA_DIR / "repeated.jsonl",
+        lines=True,
+    )
+    df = df.set_index("rowindex")
+    return df
+
+@pytest.fixture(scope="session")
+def repeated_pandas_series(repeated_pandas_df: pd.DataFrame) -> pd.Series:
+    """pd.DataFrame pointing at test data."""
+    return repeated_pandas_df["list_col"].astype(pd.ArrowDtype(pa.list_(pa.int64())))
 
 @pytest.fixture(scope="session")
 def scalars_df_default_index(
diff --git a/tests/system/small/operations/test_lists.py b/tests/system/small/operations/test_lists.py
index 7ecf79dc6a..5146b8047e 100644
--- a/tests/system/small/operations/test_lists.py
+++ b/tests/system/small/operations/test_lists.py
@@ -32,19 +32,16 @@
         pytest.param(slice(0, 2, None), id="default_step_slice"),
     ],
 )
-def test_getitem(key):
+def test_getitem(key, repeated_series, repeated_pandas_series):
     if packaging.version.Version(pd.__version__) < packaging.version.Version("2.2.0"):
         pytest.skip(
             "https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#series-list-accessor-for-pyarrow-list-data"
         )
-    data = [[1], [2, 3], [4, 5, 6]]
-    s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
-    pd_s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
 
-    bf_result = s.list[key].to_pandas()
-    pd_result = pd_s.list[key]
+    bf_result = repeated_series.list[key].to_pandas()
+    pd_result = repeated_pandas_series.list[key]
 
-    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
+    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False, check_names=False)
 
 
 @pytest.mark.parametrize(
@@ -60,24 +57,18 @@ def test_getitem(key):
         (slice(0, 2, 2), pytest.raises(NotImplementedError)),
     ],
 )
-def test_getitem_notsupported(key, expectation):
-    data = [[1], [2, 3], [4, 5, 6]]
-    s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
-
+def test_getitem_notsupported(key, expectation, repeated_series):
     with expectation as e:
-        assert s.list[key] == e
+        assert repeated_series.list[key] == e
 
 
-def test_len():
+def test_len(repeated_series, repeated_pandas_series):
     if packaging.version.Version(pd.__version__) < packaging.version.Version("2.2.0"):
         pytest.skip(
             "https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#series-list-accessor-for-pyarrow-list-data"
         )
-    data = [[], [1], [1, 2], [1, 2, 3]]
-    s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
-    pd_s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
 
-    bf_result = s.list.len().to_pandas()
-    pd_result = pd_s.list.len()
+    bf_result = repeated_series.list.len().to_pandas()
+    pd_result = repeated_pandas_series.list.len()
 
-    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
+    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False, check_names=False)
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index 15e8512317..56b08f96cb 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -622,13 +622,14 @@ def test_getitem_w_string(scalars_dfs, index):
         pytest.param(slice(0, 0, None), id="single_one_slice"),
     ],
 )
-def test_getitem_w_array(index):
-    data = [[1], [2, 3], [], [4, 5, 6]]
-    s = bpd.Series(data)
-    pd_s = pd.Series(data)
+def test_getitem_w_array(index, repeated_series, repeated_pandas_df):
+    bf_result = repeated_series.str[index].to_pandas()
+    # We use repeated_pandas_df['list_col'] instead of repeated_pandas_series. 
+    # Reason: The series fixture contains lists that are strongly-typed as PyArrow lists. 
+    # Using str accessor on this type would fail the type check. However, the default 
+    # type for lists is Object in pandas, and it happily accepts str accessors.
+    pd_result = repeated_pandas_df['list_col'].str[index]
 
-    bf_result = s.str[index].to_pandas()
-    pd_result = pd_s.str[index]
     # Skip dtype checks here because pandas returns `int64` while BF returns `Int64`.
     assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
 

From 9994d261dae9560e5d8e4995338aceefb44dc1e9 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Mon, 16 Sep 2024 20:52:30 +0000
Subject: [PATCH 2/7] fix format

---
 tests/system/conftest.py                      |  8 +++++++-
 tests/system/small/operations/test_lists.py   | 16 ++++++++++++++--
 tests/system/small/operations/test_strings.py |  8 ++++----
 3 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 99257c6c00..9d3e25e080 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -372,10 +372,12 @@ def scalars_table_tokyo(test_data_tables_tokyo) -> str:
 def nested_table_id(test_data_tables) -> str:
     return test_data_tables["nested"]
 
+
 @pytest.fixture(scope="session")
 def repeated_table_id(test_data_tables) -> str:
     return test_data_tables["repeated"]
 
+
 @pytest.fixture(scope="session")
 def penguins_table_id(test_data_tables) -> str:
     return test_data_tables["penguins"]
@@ -415,6 +417,7 @@ def nested_pandas_df() -> pd.DataFrame:
     df = df.set_index("rowindex")
     return df
 
+
 @pytest.fixture(scope="session")
 def repeated_df(
     repeated_table_id: str, session: bigframes.Session
@@ -422,9 +425,10 @@ def repeated_df(
     """Returns a DataFrame containing columns of list type."""
     return session.read_gbq(repeated_table_id, index_col="rowindex")
 
+
 @pytest.fixture(scope="session")
 def repeated_series(
-    repeated_df: bigframes.dataframe.DataFrame
+    repeated_df: bigframes.dataframe.DataFrame,
 ) -> bigframes.series.Series:
     """Returns a Series of lists"""
     return repeated_df["list_col"]
@@ -441,11 +445,13 @@ def repeated_pandas_df() -> pd.DataFrame:
     df = df.set_index("rowindex")
     return df
 
+
 @pytest.fixture(scope="session")
 def repeated_pandas_series(repeated_pandas_df: pd.DataFrame) -> pd.Series:
     """pd.DataFrame pointing at test data."""
     return repeated_pandas_df["list_col"].astype(pd.ArrowDtype(pa.list_(pa.int64())))
 
+
 @pytest.fixture(scope="session")
 def scalars_df_default_index(
     scalars_df_index: bigframes.dataframe.DataFrame,
diff --git a/tests/system/small/operations/test_lists.py b/tests/system/small/operations/test_lists.py
index 5146b8047e..1689c230d9 100644
--- a/tests/system/small/operations/test_lists.py
+++ b/tests/system/small/operations/test_lists.py
@@ -41,7 +41,13 @@ def test_getitem(key, repeated_series, repeated_pandas_series):
     bf_result = repeated_series.list[key].to_pandas()
     pd_result = repeated_pandas_series.list[key]
 
-    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False, check_names=False)
+    assert_series_equal(
+        pd_result,
+        bf_result,
+        check_dtype=False,
+        check_index_type=False,
+        check_names=False,
+    )
 
 
 @pytest.mark.parametrize(
@@ -71,4 +77,10 @@ def test_len(repeated_series, repeated_pandas_series):
     bf_result = repeated_series.list.len().to_pandas()
     pd_result = repeated_pandas_series.list.len()
 
-    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False, check_names=False)
+    assert_series_equal(
+        pd_result,
+        bf_result,
+        check_dtype=False,
+        check_index_type=False,
+        check_names=False,
+    )
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index 56b08f96cb..ab238e3cbc 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -624,11 +624,11 @@ def test_getitem_w_string(scalars_dfs, index):
 )
 def test_getitem_w_array(index, repeated_series, repeated_pandas_df):
     bf_result = repeated_series.str[index].to_pandas()
-    # We use repeated_pandas_df['list_col'] instead of repeated_pandas_series. 
-    # Reason: The series fixture contains lists that are strongly-typed as PyArrow lists. 
-    # Using str accessor on this type would fail the type check. However, the default 
+    # We use repeated_pandas_df['list_col'] instead of repeated_pandas_series.
+    # Reason: The series fixture contains lists that are strongly-typed as PyArrow lists.
+    # Using str accessor on this type would fail the type check. However, the default
     # type for lists is Object in pandas, and it happily accepts str accessors.
-    pd_result = repeated_pandas_df['list_col'].str[index]
+    pd_result = repeated_pandas_df["list_col"].str[index]
 
     # Skip dtype checks here because pandas returns `int64` while BF returns `Int64`.
     assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)

From e7edcdf3ccd4528c922e7d7390f46ba191e2691d Mon Sep 17 00:00:00 2001
From: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Date: Mon, 16 Sep 2024 20:53:38 +0000
Subject: [PATCH 3/7] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?=
 =?UTF-8?q?st-processor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
---
 tests/system/conftest.py                      |  8 +++++++-
 tests/system/small/operations/test_lists.py   | 16 ++++++++++++++--
 tests/system/small/operations/test_strings.py |  8 ++++----
 3 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 99257c6c00..9d3e25e080 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -372,10 +372,12 @@ def scalars_table_tokyo(test_data_tables_tokyo) -> str:
 def nested_table_id(test_data_tables) -> str:
     return test_data_tables["nested"]
 
+
 @pytest.fixture(scope="session")
 def repeated_table_id(test_data_tables) -> str:
     return test_data_tables["repeated"]
 
+
 @pytest.fixture(scope="session")
 def penguins_table_id(test_data_tables) -> str:
     return test_data_tables["penguins"]
@@ -415,6 +417,7 @@ def nested_pandas_df() -> pd.DataFrame:
     df = df.set_index("rowindex")
     return df
 
+
 @pytest.fixture(scope="session")
 def repeated_df(
     repeated_table_id: str, session: bigframes.Session
@@ -422,9 +425,10 @@ def repeated_df(
     """Returns a DataFrame containing columns of list type."""
     return session.read_gbq(repeated_table_id, index_col="rowindex")
 
+
 @pytest.fixture(scope="session")
 def repeated_series(
-    repeated_df: bigframes.dataframe.DataFrame
+    repeated_df: bigframes.dataframe.DataFrame,
 ) -> bigframes.series.Series:
     """Returns a Series of lists"""
     return repeated_df["list_col"]
@@ -441,11 +445,13 @@ def repeated_pandas_df() -> pd.DataFrame:
     df = df.set_index("rowindex")
     return df
 
+
 @pytest.fixture(scope="session")
 def repeated_pandas_series(repeated_pandas_df: pd.DataFrame) -> pd.Series:
     """pd.DataFrame pointing at test data."""
     return repeated_pandas_df["list_col"].astype(pd.ArrowDtype(pa.list_(pa.int64())))
 
+
 @pytest.fixture(scope="session")
 def scalars_df_default_index(
     scalars_df_index: bigframes.dataframe.DataFrame,
diff --git a/tests/system/small/operations/test_lists.py b/tests/system/small/operations/test_lists.py
index 5146b8047e..1689c230d9 100644
--- a/tests/system/small/operations/test_lists.py
+++ b/tests/system/small/operations/test_lists.py
@@ -41,7 +41,13 @@ def test_getitem(key, repeated_series, repeated_pandas_series):
     bf_result = repeated_series.list[key].to_pandas()
     pd_result = repeated_pandas_series.list[key]
 
-    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False, check_names=False)
+    assert_series_equal(
+        pd_result,
+        bf_result,
+        check_dtype=False,
+        check_index_type=False,
+        check_names=False,
+    )
 
 
 @pytest.mark.parametrize(
@@ -71,4 +77,10 @@ def test_len(repeated_series, repeated_pandas_series):
     bf_result = repeated_series.list.len().to_pandas()
     pd_result = repeated_pandas_series.list.len()
 
-    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False, check_names=False)
+    assert_series_equal(
+        pd_result,
+        bf_result,
+        check_dtype=False,
+        check_index_type=False,
+        check_names=False,
+    )
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index 56b08f96cb..ab238e3cbc 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -624,11 +624,11 @@ def test_getitem_w_string(scalars_dfs, index):
 )
 def test_getitem_w_array(index, repeated_series, repeated_pandas_df):
     bf_result = repeated_series.str[index].to_pandas()
-    # We use repeated_pandas_df['list_col'] instead of repeated_pandas_series. 
-    # Reason: The series fixture contains lists that are strongly-typed as PyArrow lists. 
-    # Using str accessor on this type would fail the type check. However, the default 
+    # We use repeated_pandas_df['list_col'] instead of repeated_pandas_series.
+    # Reason: The series fixture contains lists that are strongly-typed as PyArrow lists.
+    # Using str accessor on this type would fail the type check. However, the default
     # type for lists is Object in pandas, and it happily accepts str accessors.
-    pd_result = repeated_pandas_df['list_col'].str[index]
+    pd_result = repeated_pandas_df["list_col"].str[index]
 
     # Skip dtype checks here because pandas returns `int64` while BF returns `Int64`.
     assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)

From 9a9d0ac3452dc7d3e27462a252abd54fe2d23866 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Mon, 16 Sep 2024 22:53:23 +0000
Subject: [PATCH 4/7] add more type coverage in tests

---
 tests/data/repeated.jsonl                     |  7 ++--
 tests/data/repeated_schema.json               | 32 ++++++++++++++-
 tests/system/conftest.py                      | 14 -------
 tests/system/small/operations/test_lists.py   | 40 +++++++++++++++----
 tests/system/small/operations/test_strings.py | 26 +++++++-----
 5 files changed, 82 insertions(+), 37 deletions(-)

diff --git a/tests/data/repeated.jsonl b/tests/data/repeated.jsonl
index eb9c4317ac..b3c47772f6 100644
--- a/tests/data/repeated.jsonl
+++ b/tests/data/repeated.jsonl
@@ -1,4 +1,3 @@
-{"rowindex": 0, "list_col": [1]}
-{"rowindex": 1, "list_col": [1,2]}
-{"rowindex": 2, "list_col": [1,2,3]}
-{"rowindex": 3, "list_col": [1,2,3,4]}
+{"rowindex": 0, "int_list_col": [1],        "bool_list_col": [true],        "float_list_col": [1.2, 2.3],        "date_list_col": ["2021-07-21"],                "date_time_list_col": ["2021-07-21 11:39:45"], "numeric_list_col": [1.2, 2.3, 3.4], "string_list_col": ["abc", "de", "f"]}
+{"rowindex": 1, "int_list_col": [1,2],      "bool_list_col": [true, false], "float_list_col": [1.1],             "date_list_col": ["2021-07-21", "1987-03-28"],  "date_time_list_col": ["1999-03-14 17:22:00"], "numeric_list_col": [5.5, 2.3],      "string_list_col": ["a", "bc", "de"]}
+{"rowindex": 2, "int_list_col": [1,2,3],    "bool_list_col": [true],        "float_list_col": [0.5, -1.9, 2.3],  "date_list_col": ["2017-08-01", "2004-11-22"],  "date_time_list_col": ["1979-06-03 03:20:45"], "numeric_list_col": [1.7],           "string_list_col": ["", "a"]}
diff --git a/tests/data/repeated_schema.json b/tests/data/repeated_schema.json
index e6f32bd9e1..300f32c994 100644
--- a/tests/data/repeated_schema.json
+++ b/tests/data/repeated_schema.json
@@ -5,8 +5,38 @@
         "mode": "REQUIRED"
     },
     {
-        "name": "list_col",
+        "name": "int_list_col",
         "type": "INTEGER",
         "mode": "REPEATED"
+    },
+    {
+        "name": "bool_list_col",
+        "type": "BOOLEAN",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "float_list_col",
+        "type": "FLOAT",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "date_list_col",
+        "type": "DATE",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "date_time_list_col",
+        "type": "DATETIME",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "numeric_list_col",
+        "type": "NUMERIC",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "string_list_col",
+        "type": "STRING",
+        "mode": "REPEATED"
     }
 ]
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 9d3e25e080..e7a85c5a65 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -426,14 +426,6 @@ def repeated_df(
     return session.read_gbq(repeated_table_id, index_col="rowindex")
 
 
-@pytest.fixture(scope="session")
-def repeated_series(
-    repeated_df: bigframes.dataframe.DataFrame,
-) -> bigframes.series.Series:
-    """Returns a Series of lists"""
-    return repeated_df["list_col"]
-
-
 @pytest.fixture(scope="session")
 def repeated_pandas_df() -> pd.DataFrame:
     """Returns a DataFrame containing columns of list type."""
@@ -446,12 +438,6 @@ def repeated_pandas_df() -> pd.DataFrame:
     return df
 
 
-@pytest.fixture(scope="session")
-def repeated_pandas_series(repeated_pandas_df: pd.DataFrame) -> pd.Series:
-    """pd.DataFrame pointing at test data."""
-    return repeated_pandas_df["list_col"].astype(pd.ArrowDtype(pa.list_(pa.int64())))
-
-
 @pytest.fixture(scope="session")
 def scalars_df_default_index(
     scalars_df_index: bigframes.dataframe.DataFrame,
diff --git a/tests/system/small/operations/test_lists.py b/tests/system/small/operations/test_lists.py
index 1689c230d9..06529743ed 100644
--- a/tests/system/small/operations/test_lists.py
+++ b/tests/system/small/operations/test_lists.py
@@ -32,14 +32,26 @@
         pytest.param(slice(0, 2, None), id="default_step_slice"),
     ],
 )
-def test_getitem(key, repeated_series, repeated_pandas_series):
+@pytest.mark.parametrize(
+    ("column_name", "dtype"),
+    [
+        pytest.param("int_list_col", pd.ArrowDtype(pa.list_(pa.int64()))),
+        pytest.param("bool_list_col", pd.ArrowDtype(pa.list_(pa.bool_()))),
+        pytest.param("float_list_col", pd.ArrowDtype(pa.list_(pa.float64()))),
+        pytest.param("date_list_col", pd.ArrowDtype(pa.list_(pa.date32()))),
+        pytest.param("date_time_list_col", pd.ArrowDtype(pa.list_(pa.timestamp("us")))),
+        pytest.param("numeric_list_col", pd.ArrowDtype(pa.list_(pa.decimal128(38, 9)))),
+        pytest.param("string_list_col", pd.ArrowDtype(pa.list_(pa.string()))),
+    ]
+)
+def test_getitem(key, column_name, dtype, repeated_df, repeated_pandas_df):
     if packaging.version.Version(pd.__version__) < packaging.version.Version("2.2.0"):
         pytest.skip(
             "https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#series-list-accessor-for-pyarrow-list-data"
         )
 
-    bf_result = repeated_series.list[key].to_pandas()
-    pd_result = repeated_pandas_series.list[key]
+    bf_result = repeated_df[column_name].list[key].to_pandas()
+    pd_result = repeated_pandas_df[column_name].astype(dtype).list[key]
 
     assert_series_equal(
         pd_result,
@@ -63,19 +75,31 @@ def test_getitem(key, repeated_series, repeated_pandas_series):
         (slice(0, 2, 2), pytest.raises(NotImplementedError)),
     ],
 )
-def test_getitem_notsupported(key, expectation, repeated_series):
+def test_getitem_notsupported(key, expectation, repeated_df):
     with expectation as e:
-        assert repeated_series.list[key] == e
+        assert repeated_df['int_list_col'].list[key] == e
 
 
-def test_len(repeated_series, repeated_pandas_series):
+@pytest.mark.parametrize(
+    ("column_name", "dtype"),
+    [
+        pytest.param("int_list_col", pd.ArrowDtype(pa.list_(pa.int64()))),
+        pytest.param("bool_list_col", pd.ArrowDtype(pa.list_(pa.bool_()))),
+        pytest.param("float_list_col", pd.ArrowDtype(pa.list_(pa.float64()))),
+        pytest.param("date_list_col", pd.ArrowDtype(pa.list_(pa.date32()))),
+        pytest.param("date_time_list_col", pd.ArrowDtype(pa.list_(pa.timestamp("us")))),
+        pytest.param("numeric_list_col", pd.ArrowDtype(pa.list_(pa.decimal128(38, 9)))),
+        pytest.param("string_list_col", pd.ArrowDtype(pa.list_(pa.string()))),
+    ]
+)
+def test_len(column_name, dtype, repeated_df, repeated_pandas_df):
     if packaging.version.Version(pd.__version__) < packaging.version.Version("2.2.0"):
         pytest.skip(
             "https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#series-list-accessor-for-pyarrow-list-data"
         )
 
-    bf_result = repeated_series.list.len().to_pandas()
-    pd_result = repeated_pandas_series.list.len()
+    bf_result = repeated_df[column_name].list.len().to_pandas()
+    pd_result = repeated_pandas_df[column_name].astype(dtype).list.len()
 
     assert_series_equal(
         pd_result,
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index ab238e3cbc..568dcc416a 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -615,22 +615,28 @@ def test_getitem_w_string(scalars_dfs, index):
 @pytest.mark.parametrize(
     ("index"),
     [
-        pytest.param(2, id="int"),
+        pytest.param(0, id="int"),
         pytest.param(slice(None, None, None), id="default_start_slice"),
         pytest.param(slice(0, None, 1), id="default_stop_slice"),
         pytest.param(slice(0, 2, None), id="default_step_slice"),
         pytest.param(slice(0, 0, None), id="single_one_slice"),
     ],
 )
-def test_getitem_w_array(index, repeated_series, repeated_pandas_df):
-    bf_result = repeated_series.str[index].to_pandas()
-    # We use repeated_pandas_df['list_col'] instead of repeated_pandas_series.
-    # Reason: The series fixture contains lists that are strongly-typed as PyArrow lists.
-    # Using str accessor on this type would fail the type check. However, the default
-    # type for lists is Object in pandas, and it happily accepts str accessors.
-    pd_result = repeated_pandas_df["list_col"].str[index]
-
-    # Skip dtype checks here because pandas returns `int64` while BF returns `Int64`.
+@pytest.mark.parametrize(
+    "column_name",
+    [
+        pytest.param("int_list_col"),
+        pytest.param("bool_list_col"),
+        pytest.param("float_list_col"),
+        pytest.param("string_list_col"),
+        # date, date_time and numeric are excluded because their default types are different
+        # in Pandas and BigFrames
+    ]
+)
+def test_getitem_w_array(index, column_name, repeated_df, repeated_pandas_df):
+    bf_result = repeated_df[column_name].str[index].to_pandas()
+    pd_result = repeated_pandas_df[column_name].str[index]
+
     assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
 
 

From f27dda7548ba8a4dec3b878bc657a6b4965dbfc1 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Mon, 16 Sep 2024 22:54:00 +0000
Subject: [PATCH 5/7] fix format

---
 tests/system/small/operations/test_lists.py   | 6 +++---
 tests/system/small/operations/test_strings.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/system/small/operations/test_lists.py b/tests/system/small/operations/test_lists.py
index 06529743ed..4b012d9ffa 100644
--- a/tests/system/small/operations/test_lists.py
+++ b/tests/system/small/operations/test_lists.py
@@ -42,7 +42,7 @@
         pytest.param("date_time_list_col", pd.ArrowDtype(pa.list_(pa.timestamp("us")))),
         pytest.param("numeric_list_col", pd.ArrowDtype(pa.list_(pa.decimal128(38, 9)))),
         pytest.param("string_list_col", pd.ArrowDtype(pa.list_(pa.string()))),
-    ]
+    ],
 )
 def test_getitem(key, column_name, dtype, repeated_df, repeated_pandas_df):
     if packaging.version.Version(pd.__version__) < packaging.version.Version("2.2.0"):
@@ -77,7 +77,7 @@ def test_getitem(key, column_name, dtype, repeated_df, repeated_pandas_df):
 )
 def test_getitem_notsupported(key, expectation, repeated_df):
     with expectation as e:
-        assert repeated_df['int_list_col'].list[key] == e
+        assert repeated_df["int_list_col"].list[key] == e
 
 
 @pytest.mark.parametrize(
@@ -90,7 +90,7 @@ def test_getitem_notsupported(key, expectation, repeated_df):
         pytest.param("date_time_list_col", pd.ArrowDtype(pa.list_(pa.timestamp("us")))),
         pytest.param("numeric_list_col", pd.ArrowDtype(pa.list_(pa.decimal128(38, 9)))),
         pytest.param("string_list_col", pd.ArrowDtype(pa.list_(pa.string()))),
-    ]
+    ],
 )
 def test_len(column_name, dtype, repeated_df, repeated_pandas_df):
     if packaging.version.Version(pd.__version__) < packaging.version.Version("2.2.0"):
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index 568dcc416a..98fecaa93b 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -631,7 +631,7 @@ def test_getitem_w_string(scalars_dfs, index):
         pytest.param("string_list_col"),
         # date, date_time and numeric are excluded because their default types are different
         # in Pandas and BigFrames
-    ]
+    ],
 )
 def test_getitem_w_array(index, column_name, repeated_df, repeated_pandas_df):
     bf_result = repeated_df[column_name].str[index].to_pandas()

From 2c0eeb329536e7015384e1fa693524f5f3d8754d Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Mon, 16 Sep 2024 22:56:17 +0000
Subject: [PATCH 6/7] remove unnecessary dep

---
 tests/system/small/operations/test_lists.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/system/small/operations/test_lists.py b/tests/system/small/operations/test_lists.py
index 4b012d9ffa..7b39bdebd5 100644
--- a/tests/system/small/operations/test_lists.py
+++ b/tests/system/small/operations/test_lists.py
@@ -18,8 +18,6 @@
 import pyarrow as pa
 import pytest
 
-import bigframes.pandas as bpd
-
 from ...utils import assert_series_equal
 
 

From 40ccad90fc2032d33f43868e31dd7fe955ca3b57 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Mon, 16 Sep 2024 22:59:43 +0000
Subject: [PATCH 7/7] remove import

---
 tests/system/conftest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index e7a85c5a65..9cfb9082af 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -32,7 +32,6 @@
 import ibis.backends
 import numpy as np
 import pandas as pd
-import pyarrow as pa
 import pytest
 import pytz
 import test_utils.prefixer