From 5004d08c6af93471686ccb319c69cd38c7893042 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Thu, 12 Dec 2024 15:38:50 -0600
Subject: [PATCH 1/4] feat: `to_gbq` fails with `TypeError` if passing in a
 bigframes DataFrame object (#833)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: `to_gbq` fails with `TypeError` if passing in a bigframes DataFrame object

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 pandas_gbq/gbq.py         |  8 ++++++++
 tests/unit/test_to_gbq.py | 19 +++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index b04ad131..feffd858 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -1091,6 +1091,14 @@ def to_gbq(
         .. versionadded:: 0.23.3
     """
 
+    # If we get a bigframes.pandas.DataFrame object, it may be possible to use
+    # the code paths here, but it could potentially be quite expensive because
+    # of the queries involved in type detection. It would be safer just to
+    # fail early if there are bigframes-y methods available.
+    # https://github.com/googleapis/python-bigquery-pandas/issues/824
+    if hasattr(dataframe, "to_pandas") and hasattr(dataframe, "to_gbq"):
+        raise TypeError(f"Expected a pandas.DataFrame, but got {repr(type(dataframe))}")
+
     _test_google_api_imports()
 
     from google.api_core import exceptions as google_exceptions
diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py
index 60ea8025..f4012dc8 100644
--- a/tests/unit/test_to_gbq.py
+++ b/tests/unit/test_to_gbq.py
@@ -11,6 +11,16 @@
 from pandas_gbq import gbq
 
 
+class FakeDataFrame:
+    """A fake bigframes DataFrame to avoid depending on bigframes."""
+
+    def to_gbq(self):
+        """Fake to_gbq() to mimic a bigframes object."""
+
+    def to_pandas(self):
+        """Fake to_pandas() to mimic a bigframes object."""
+
+
 @pytest.fixture
 def expected_load_method(mock_bigquery_client):
     return mock_bigquery_client.load_table_from_dataframe
@@ -66,6 +76,15 @@ def test_to_gbq_load_method_translates_exception(
     expected_load_method.assert_called_once()
 
 
+def test_to_gbq_with_bigframes_raises_typeerror():
+    dataframe = FakeDataFrame()
+
+    with pytest.raises(
+        TypeError, match=r"Expected a pandas.DataFrame, but got .+FakeDataFrame"
+    ):
+        gbq.to_gbq(dataframe, "my_dataset.my_table", project_id="myproj")
+
+
 def test_to_gbq_with_if_exists_append(mock_bigquery_client, expected_load_method):
     from google.cloud.bigquery import SchemaField
 

From 5484a8c69965549e36afa4388ff42be3f83ec097 Mon Sep 17 00:00:00 2001
From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com>
Date: Wed, 18 Dec 2024 02:41:50 +0800
Subject: [PATCH 2/4] chore(python): update dependencies in .kokoro/docker/docs
 (#841)

Source-Link: https://github.com/googleapis/synthtool/commit/e808c98e1ab7eec3df2a95a05331619f7001daef
Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 .github/.OwlBot.lock.yaml            |  4 +--
 .kokoro/docker/docs/requirements.txt | 52 ++++++++++++++++++++++------
 2 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml
index 6301519a..26306af6 100644
--- a/.github/.OwlBot.lock.yaml
+++ b/.github/.OwlBot.lock.yaml
@@ -13,5 +13,5 @@
 # limitations under the License.
 docker:
   image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
-  digest: sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562
-# created: 2024-11-12T12:09:45.821174897Z
+  digest: sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737
+# created: 2024-12-17T00:59:58.625514486Z
diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt
index 8bb07645..f99a5c4a 100644
--- a/.kokoro/docker/docs/requirements.txt
+++ b/.kokoro/docker/docs/requirements.txt
@@ -2,11 +2,11 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --generate-hashes requirements.in
+#    pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in
 #
-argcomplete==3.5.1 \
-    --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \
-    --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4
+argcomplete==3.5.2 \
+    --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \
+    --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb
     # via nox
 colorlog==6.9.0 \
     --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \
@@ -23,7 +23,7 @@ filelock==3.16.1 \
 nox==2024.10.9 \
     --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \
     --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95
-    # via -r requirements.in
+    # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in
 packaging==24.2 \
     --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
     --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
@@ -32,11 +32,41 @@ platformdirs==4.3.6 \
     --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \
     --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb
     # via virtualenv
-tomli==2.0.2 \
-    --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \
-    --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed
+tomli==2.2.1 \
+    --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \
+    --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \
+    --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \
+    --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \
+    --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \
+    --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \
+    --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \
+    --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \
+    --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \
+    --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \
+    --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \
+    --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \
+    --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \
+    --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \
+    --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \
+    --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \
+    --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \
+    --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \
+    --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \
+    --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \
+    --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \
+    --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \
+    --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \
+    --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \
+    --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \
+    --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \
+    --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \
+    --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \
+    --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \
+    --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \
+    --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \
+    --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7
     # via nox
-virtualenv==20.27.1 \
-    --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \
-    --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4
+virtualenv==20.28.0 \
+    --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \
+    --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa
     # via nox

From cf1aadd48165617768fecff91e68941255148dbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Thu, 19 Dec 2024 11:22:03 -0600
Subject: [PATCH 3/4] fix: `to_gbq` uses `default_type` for ambiguous array
 types and struct field types (#838)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: `to_gbq` uses `default_type` for ambiguous array types and struct field types

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* fix arrow list(null) case too

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* lint

* Update pandas_gbq/schema/pandas_to_bigquery.py

Co-authored-by: Chalmer Lowe <chalmerlowe@google.com>

* Update pandas_gbq/schema/pandas_to_bigquery.py

Co-authored-by: Chalmer Lowe <chalmerlowe@google.com>

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* remove redundant string check

* Apply suggestions from code review

Co-authored-by: Chalmer Lowe <chalmerlowe@google.com>

* add docstrings and a few more test cases

* use python 3.10 for docs github action

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: Chalmer Lowe <chalmerlowe@google.com>
---
 .github/workflows/docs.yml                    |   2 +-
 owlbot.py                                     |   1 +
 pandas_gbq/schema/pandas_to_bigquery.py       | 111 +++++++++++++++---
 pandas_gbq/schema/pyarrow_to_bigquery.py      |  61 +++++++++-
 tests/unit/schema/test_pandas_to_bigquery.py  |  49 ++++++--
 tests/unit/schema/test_pyarrow_to_bigquery.py |  18 ++-
 tests/unit/test_schema.py                     |  51 +++++++-
 7 files changed, 244 insertions(+), 49 deletions(-)

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 698fbc5c..2833fe98 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -12,7 +12,7 @@ jobs:
     - name: Setup Python
       uses: actions/setup-python@v5
       with:
-        python-version: "3.9"
+        python-version: "3.10"
     - name: Install nox
       run: |
         python -m pip install --upgrade setuptools pip wheel
diff --git a/owlbot.py b/owlbot.py
index 190298a6..e50b9e9e 100644
--- a/owlbot.py
+++ b/owlbot.py
@@ -57,6 +57,7 @@
         "noxfile.py",
         "README.rst",
         # exclude this file as we have an alternate prerelease.cfg
+        ".github/workflows/docs.yml",
         ".kokoro/presubmit/prerelease-deps.cfg",
         ".kokoro/presubmit/presubmit.cfg",
     ],
diff --git a/pandas_gbq/schema/pandas_to_bigquery.py b/pandas_gbq/schema/pandas_to_bigquery.py
index 5a979a12..5afae356 100644
--- a/pandas_gbq/schema/pandas_to_bigquery.py
+++ b/pandas_gbq/schema/pandas_to_bigquery.py
@@ -4,7 +4,7 @@
 
 import collections.abc
 import datetime
-from typing import Optional, Tuple
+from typing import Any, Optional, Tuple
 import warnings
 
 import db_dtypes
@@ -28,14 +28,21 @@
 # `docs/source/writing.rst`.
 _PANDAS_DTYPE_TO_BQ = {
     "bool": "BOOLEAN",
+    "boolean": "BOOLEAN",
     "datetime64[ns, UTC]": "TIMESTAMP",
+    "datetime64[us, UTC]": "TIMESTAMP",
     "datetime64[ns]": "DATETIME",
+    "datetime64[us]": "DATETIME",
     "float32": "FLOAT",
     "float64": "FLOAT",
     "int8": "INTEGER",
     "int16": "INTEGER",
     "int32": "INTEGER",
     "int64": "INTEGER",
+    "Int8": "INTEGER",
+    "Int16": "INTEGER",
+    "Int32": "INTEGER",
+    "Int64": "INTEGER",
     "uint8": "INTEGER",
     "uint16": "INTEGER",
     "uint32": "INTEGER",
@@ -103,7 +110,7 @@ def dataframe_to_bigquery_fields(
 
         # Try to automatically determine the type based on a few rows of the data.
         values = dataframe.reset_index()[column]
-        bq_field = values_to_bigquery_field(column, values)
+        bq_field = values_to_bigquery_field(column, values, default_type=default_type)
 
         if bq_field:
             bq_schema_out.append(bq_field)
@@ -114,7 +121,9 @@ def dataframe_to_bigquery_fields(
             arrow_value = pyarrow.array(values)
             bq_field = (
                 pandas_gbq.schema.pyarrow_to_bigquery.arrow_type_to_bigquery_field(
-                    column, arrow_value.type
+                    column,
+                    arrow_value.type,
+                    default_type=default_type,
                 )
             )
 
@@ -151,6 +160,19 @@ def dataframe_to_bigquery_fields(
 
 
 def dtype_to_bigquery_field(name, dtype) -> Optional[schema.SchemaField]:
+    """Infers the BigQuery schema field type from a pandas dtype.
+
+    Args:
+        name (str):
+            Name of the column/field.
+        dtype:
+            A pandas / numpy dtype object.
+
+    Returns:
+        Optional[schema.SchemaField]:
+            The schema field, or None if a type cannot be inferred, such as if
+            it is ambiguous like the object dtype.
+    """
     bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
 
     if bq_type is not None:
@@ -164,9 +186,44 @@ def dtype_to_bigquery_field(name, dtype) -> Optional[schema.SchemaField]:
     return None
 
 
-def value_to_bigquery_field(name, value) -> Optional[schema.SchemaField]:
-    if isinstance(value, str):
-        return schema.SchemaField(name, "STRING")
+def value_to_bigquery_field(
+    name: str, value: Any, default_type: Optional[str] = None
+) -> Optional[schema.SchemaField]:
+    """Infers the BigQuery schema field type from a single value.
+
+    Args:
+        name:
+            The name of the field.
+        value:
+            The value to infer the type from. If None, the default type is used
+            if available.
+        default_type:
+            The default field type.  Defaults to None.
+
+    Returns:
+        The schema field, or None if a type cannot be inferred.
+    """
+
+    # Set the SchemaField datatype to the given default_type if the value
+    # being assessed is None.
+    if value is None:
+        return schema.SchemaField(name, default_type)
+
+    # Map from Python types to BigQuery types. This isn't super exhaustive
+    # because we rely more on pyarrow, which can check more than one value to
+    # determine the type.
+    type_mapping = {
+        str: "STRING",
+    }
+
+    # geopandas and shapely are optional dependencies, so only check if those
+    # are installed.
+    if _BaseGeometry is not None:
+        type_mapping[_BaseGeometry] = "GEOGRAPHY"
+
+    for type_, bq_type in type_mapping.items():
+        if isinstance(value, type_):
+            return schema.SchemaField(name, bq_type)
 
     # For timezone-naive datetimes, the later pyarrow conversion to try and
     # learn the type add a timezone to such datetimes, causing them to be
@@ -182,35 +239,51 @@ def value_to_bigquery_field(name, value) -> Optional[schema.SchemaField]:
         else:
             return schema.SchemaField(name, "DATETIME")
 
-    if _BaseGeometry is not None and isinstance(value, _BaseGeometry):
-        return schema.SchemaField(name, "GEOGRAPHY")
-
     return None
 
 
-def values_to_bigquery_field(name, values) -> Optional[schema.SchemaField]:
+def values_to_bigquery_field(
+    name: str, values: Any, default_type: str = "STRING"
+) -> Optional[schema.SchemaField]:
+    """Infers the BigQuery schema field type from a list of values.
+
+    This function iterates through the given values to determine the
+    corresponding schema field type.
+
+    Args:
+        name:
+            The name of the field.
+        values:
+            An iterable of values to infer the type from. If all the values
+            are None or the iterable is empty, the function returns None.
+        default_type:
+            The default field type to use if a specific type cannot be
+            determined from the values. Defaults to "STRING".
+
+    Returns:
+        The schema field, or None if a type cannot be inferred.
+    """
     value = pandas_gbq.core.pandas.first_valid(values)
 
-    # All NULL, type not determinable.
+    # All values came back as NULL, thus type not determinable by this method.
+    # Return None so we can try other methods.
     if value is None:
         return None
 
-    field = value_to_bigquery_field(name, value)
-    if field is not None:
+    field = value_to_bigquery_field(name, value, default_type=default_type)
+    if field:
         return field
 
-    if isinstance(value, str):
-        return schema.SchemaField(name, "STRING")
-
-    # Check plain ARRAY values here. Let STRUCT get determined by pyarrow,
-    # which can examine more values to determine all keys.
+    # Check plain ARRAY values here. Exclude mapping types to let STRUCT get
+    # determined by pyarrow, which can examine more values to determine all
+    # keys.
     if isinstance(value, collections.abc.Iterable) and not isinstance(
         value, collections.abc.Mapping
     ):
         # It could be that this value contains all None or is empty, so get the
         # first non-None value we can find.
         valid_item = pandas_gbq.core.pandas.first_array_valid(values)
-        field = value_to_bigquery_field(name, valid_item)
+        field = value_to_bigquery_field(name, valid_item, default_type=default_type)
 
         if field is not None:
             return schema.SchemaField(name, field.field_type, mode="REPEATED")
diff --git a/pandas_gbq/schema/pyarrow_to_bigquery.py b/pandas_gbq/schema/pyarrow_to_bigquery.py
index da1a1ce8..91677f9d 100644
--- a/pandas_gbq/schema/pyarrow_to_bigquery.py
+++ b/pandas_gbq/schema/pyarrow_to_bigquery.py
@@ -37,7 +37,31 @@
 }
 
 
-def arrow_type_to_bigquery_field(name, type_) -> Optional[schema.SchemaField]:
+def arrow_type_to_bigquery_field(
+    name, type_, default_type="STRING"
+) -> Optional[schema.SchemaField]:
+    """Infers the BigQuery schema field type from an arrow type.
+
+    Args:
+        name (str):
+            Name of the column/field.
+        type_:
+            A pyarrow type object.
+
+    Returns:
+        Optional[schema.SchemaField]:
+            The schema field, or None if a type cannot be inferred, such as if
+            it is a type that doesn't have a clear mapping in BigQuery.
+
+            null() are assumed to be the ``default_type``, since there are no
+            values that contradict that.
+    """
+    # If a sub-field is the null type, then assume it's the default type, as
+    # that's the best we can do.
+    # https://github.com/googleapis/python-bigquery-pandas/issues/836
+    if pyarrow.types.is_null(type_):
+        return schema.SchemaField(name, default_type)
+
     # Since both TIMESTAMP/DATETIME use pyarrow.timestamp(...), we need to use
     # a special case to disambiguate them. See:
     # https://github.com/googleapis/python-bigquery-pandas/issues/450
@@ -52,22 +76,49 @@ def arrow_type_to_bigquery_field(name, type_) -> Optional[schema.SchemaField]:
         return schema.SchemaField(name, detected_type)
 
     if pyarrow.types.is_list(type_):
-        return arrow_list_type_to_bigquery(name, type_)
+        return arrow_list_type_to_bigquery(name, type_, default_type=default_type)
 
     if pyarrow.types.is_struct(type_):
         inner_fields: list[pyarrow.Field] = []
         struct_type = cast(pyarrow.StructType, type_)
         for field_index in range(struct_type.num_fields):
             field = struct_type[field_index]
-            inner_fields.append(arrow_type_to_bigquery_field(field.name, field.type))
+            inner_fields.append(
+                arrow_type_to_bigquery_field(
+                    field.name, field.type, default_type=default_type
+                )
+            )
 
         return schema.SchemaField(name, "RECORD", fields=inner_fields)
 
     return None
 
 
-def arrow_list_type_to_bigquery(name, type_) -> Optional[schema.SchemaField]:
-    inner_field = arrow_type_to_bigquery_field(name, type_.value_type)
+def arrow_list_type_to_bigquery(
+    name, type_, default_type="STRING"
+) -> Optional[schema.SchemaField]:
+    """Infers the BigQuery schema field type from an arrow list type.
+
+    Args:
+        name (str):
+            Name of the column/field.
+        type_:
+            A pyarrow type object.
+
+    Returns:
+        Optional[schema.SchemaField]:
+            The schema field, or None if a type cannot be inferred, such as if
+            it is a type that doesn't have a clear mapping in BigQuery.
+
+            null() are assumed to be the ``default_type``, since there are no
+            values that contradict that.
+    """
+    inner_field = arrow_type_to_bigquery_field(
+        name, type_.value_type, default_type=default_type
+    )
+
+    # If this is None, it means we got some type that we can't cleanly map to
+    # a BigQuery type, so bubble that status up.
     if inner_field is None:
         return None
 
diff --git a/tests/unit/schema/test_pandas_to_bigquery.py b/tests/unit/schema/test_pandas_to_bigquery.py
index 924ce1ee..777c3825 100644
--- a/tests/unit/schema/test_pandas_to_bigquery.py
+++ b/tests/unit/schema/test_pandas_to_bigquery.py
@@ -21,13 +21,34 @@ def module_under_test():
 def test_dataframe_to_bigquery_fields_w_named_index(module_under_test):
     df_data = collections.OrderedDict(
         [
+            ("str_index", ["a", "b"]),
             ("str_column", ["hello", "world"]),
             ("int_column", [42, 8]),
+            ("nullable_int_column", pandas.Series([42, None], dtype="Int64")),
+            ("uint_column", pandas.Series([7, 13], dtype="uint8")),
             ("bool_column", [True, False]),
+            ("boolean_column", pandas.Series([True, None], dtype="boolean")),
+            (
+                "datetime_column",
+                [
+                    datetime.datetime(1999, 12, 31, 23, 59, 59, 999999),
+                    datetime.datetime(2000, 1, 1, 0, 0, 0),
+                ],
+            ),
+            (
+                "timestamp_column",
+                [
+                    datetime.datetime(
+                        1999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
+                    ),
+                    datetime.datetime(
+                        2000, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
+                    ),
+                ],
+            ),
         ]
     )
-    index = pandas.Index(["a", "b"], name="str_index")
-    dataframe = pandas.DataFrame(df_data, index=index)
+    dataframe = pandas.DataFrame(df_data).set_index("str_index", drop=True)
 
     returned_schema = module_under_test.dataframe_to_bigquery_fields(
         dataframe, [], index=True
@@ -37,7 +58,12 @@ def test_dataframe_to_bigquery_fields_w_named_index(module_under_test):
         schema.SchemaField("str_index", "STRING", "NULLABLE"),
         schema.SchemaField("str_column", "STRING", "NULLABLE"),
         schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
+        schema.SchemaField("nullable_int_column", "INTEGER", "NULLABLE"),
+        schema.SchemaField("uint_column", "INTEGER", "NULLABLE"),
         schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"),
+        schema.SchemaField("boolean_column", "BOOLEAN", "NULLABLE"),
+        schema.SchemaField("datetime_column", "DATETIME", "NULLABLE"),
+        schema.SchemaField("timestamp_column", "TIMESTAMP", "NULLABLE"),
     )
     assert returned_schema == expected_schema
 
@@ -45,19 +71,24 @@ def test_dataframe_to_bigquery_fields_w_named_index(module_under_test):
 def test_dataframe_to_bigquery_fields_w_multiindex(module_under_test):
     df_data = collections.OrderedDict(
         [
+            ("str_index", ["a", "a"]),
+            ("int_index", [0, 0]),
+            (
+                "dt_index",
+                [
+                    datetime.datetime(1999, 12, 31, 23, 59, 59, 999999),
+                    datetime.datetime(2000, 1, 1, 0, 0, 0),
+                ],
+            ),
             ("str_column", ["hello", "world"]),
             ("int_column", [42, 8]),
             ("bool_column", [True, False]),
         ]
     )
-    index = pandas.MultiIndex.from_tuples(
-        [
-            ("a", 0, datetime.datetime(1999, 12, 31, 23, 59, 59, 999999)),
-            ("a", 0, datetime.datetime(2000, 1, 1, 0, 0, 0)),
-        ],
-        names=["str_index", "int_index", "dt_index"],
+    dataframe = pandas.DataFrame(df_data).set_index(
+        ["str_index", "int_index", "dt_index"],
+        drop=True,
     )
-    dataframe = pandas.DataFrame(df_data, index=index)
 
     returned_schema = module_under_test.dataframe_to_bigquery_fields(
         dataframe, [], index=True
diff --git a/tests/unit/schema/test_pyarrow_to_bigquery.py b/tests/unit/schema/test_pyarrow_to_bigquery.py
index 4af0760f..dc5504f9 100644
--- a/tests/unit/schema/test_pyarrow_to_bigquery.py
+++ b/tests/unit/schema/test_pyarrow_to_bigquery.py
@@ -42,16 +42,14 @@ def test_arrow_type_to_bigquery_field_scalar_types(pyarrow_type, bigquery_type):
 
 
 def test_arrow_type_to_bigquery_field_unknown():
-    assert (
-        pyarrow_to_bigquery.arrow_type_to_bigquery_field("test_name", pyarrow.null())
-        is None
-    )
+    assert pyarrow_to_bigquery.arrow_type_to_bigquery_field(
+        "test_name", pyarrow.null(), default_type="DEFAULT_TYPE"
+    ) == bigquery.SchemaField("test_name", "DEFAULT_TYPE")
 
 
 def test_arrow_type_to_bigquery_field_list_of_unknown():
-    assert (
-        pyarrow_to_bigquery.arrow_type_to_bigquery_field(
-            "test_name", pyarrow.list_(pyarrow.null())
-        )
-        is None
-    )
+    assert pyarrow_to_bigquery.arrow_type_to_bigquery_field(
+        "test_name",
+        pyarrow.list_(pyarrow.null()),
+        default_type="DEFAULT_TYPE",
+    ) == bigquery.SchemaField("test_name", "DEFAULT_TYPE", mode="REPEATED")
diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py
index 48e8862a..0da16baf 100644
--- a/tests/unit/test_schema.py
+++ b/tests/unit/test_schema.py
@@ -70,7 +70,7 @@ def test_schema_is_subset_fails_if_not_subset():
     [
         pytest.param(
             pandas.DataFrame(data={"col1": [object()]}),
-            {"fields": [{"name": "col1", "type": "STRING"}]},
+            {"fields": [{"name": "col1", "type": "DEFAULT_TYPE"}]},
             id="default-type-fails-pyarrow-conversion",
         ),
         (
@@ -182,13 +182,15 @@ def test_schema_is_subset_fails_if_not_subset():
                         else "object",
                     ),
                     "list_of_struct": pandas.Series(
-                        [[], [{"test": "abc"}], []],
+                        [[], [{"test": 123.0}], []],
                         dtype=pandas.ArrowDtype(
-                            pyarrow.list_(pyarrow.struct([("test", pyarrow.string())]))
+                            pyarrow.list_(pyarrow.struct([("test", pyarrow.float64())]))
                         )
                         if hasattr(pandas, "ArrowDtype")
                         else "object",
                     ),
+                    "list_of_unknown": [[], [], []],
+                    "list_of_null": [[None, None], [None], [None, None]],
                 }
             ),
             {
@@ -200,17 +202,56 @@ def test_schema_is_subset_fails_if_not_subset():
                         "type": "RECORD",
                         "mode": "REPEATED",
                         "fields": [
-                            {"name": "test", "type": "STRING", "mode": "NULLABLE"},
+                            {"name": "test", "type": "FLOAT", "mode": "NULLABLE"},
                         ],
                     },
+                    # Use DEFAULT_TYPE because there are no values to detect a type.
+                    {
+                        "name": "list_of_unknown",
+                        "type": "DEFAULT_TYPE",
+                        "mode": "REPEATED",
+                    },
+                    {
+                        "name": "list_of_null",
+                        "type": "DEFAULT_TYPE",
+                        "mode": "REPEATED",
+                    },
                 ],
             },
             id="array",
         ),
+        pytest.param(
+            # If a struct contains only nulls in a sub-field, use the default
+            # type for subfields without a type we can determine.
+            # https://github.com/googleapis/python-bigquery-pandas/issues/836
+            pandas.DataFrame(
+                {
+                    "id": [0, 1],
+                    "positions": [{"state": None}, {"state": None}],
+                },
+            ),
+            {
+                "fields": [
+                    {"name": "id", "type": "INTEGER"},
+                    {
+                        "name": "positions",
+                        "type": "RECORD",
+                        "fields": [
+                            {
+                                "name": "state",
+                                "type": "DEFAULT_TYPE",
+                                "mode": "NULLABLE",
+                            },
+                        ],
+                    },
+                ],
+            },
+            id="issue832-null-struct-field",
+        ),
     ],
 )
 def test_generate_bq_schema(dataframe, expected_schema):
-    schema = pandas_gbq.gbq._generate_bq_schema(dataframe)
+    schema = pandas_gbq.gbq._generate_bq_schema(dataframe, default_type="DEFAULT_TYPE")
 
     # NULLABLE is the default mode.
     for field in expected_schema["fields"]:

From b32a9c98ec717573ffe45b51ce834a3903df8bc1 Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Thu, 19 Dec 2024 11:48:05 -0600
Subject: [PATCH 4/4] chore(main): release 0.26.0 (#837)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 CHANGELOG.md          | 12 ++++++++++++
 pandas_gbq/version.py |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bcf55cd4..41b4c8df 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # Changelog
 
+## [0.26.0](https://github.com/googleapis/python-bigquery-pandas/compare/v0.25.0...v0.26.0) (2024-12-19)
+
+
+### Features
+
+* `to_gbq` fails with `TypeError` if passing in a bigframes DataFrame object ([#833](https://github.com/googleapis/python-bigquery-pandas/issues/833)) ([5004d08](https://github.com/googleapis/python-bigquery-pandas/commit/5004d08c6af93471686ccb319c69cd38c7893042))
+
+
+### Bug Fixes
+
+* `to_gbq` uses `default_type` for ambiguous array types and struct field types ([#838](https://github.com/googleapis/python-bigquery-pandas/issues/838)) ([cf1aadd](https://github.com/googleapis/python-bigquery-pandas/commit/cf1aadd48165617768fecff91e68941255148dbd))
+
 ## [0.25.0](https://github.com/googleapis/python-bigquery-pandas/compare/v0.24.0...v0.25.0) (2024-12-11)
 
 
diff --git a/pandas_gbq/version.py b/pandas_gbq/version.py
index 478b8136..0c8dab15 100644
--- a/pandas_gbq/version.py
+++ b/pandas_gbq/version.py
@@ -2,4 +2,4 @@
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
 
-__version__ = "0.25.0"
+__version__ = "0.26.0"