From d2f32df4670f4c18464c6772896bf1583c36e338 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 6 Jan 2025 10:49:34 -0600 Subject: [PATCH 1/3] fix: ensure BIGNUMERIC type is used if scale > 9 in Decimal values (#844) --- pandas_gbq/schema/pyarrow_to_bigquery.py | 7 ++++++ tests/unit/schema/test_pandas_to_bigquery.py | 26 ++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/pandas_gbq/schema/pyarrow_to_bigquery.py b/pandas_gbq/schema/pyarrow_to_bigquery.py index 91677f9d..d917499f 100644 --- a/pandas_gbq/schema/pyarrow_to_bigquery.py +++ b/pandas_gbq/schema/pyarrow_to_bigquery.py @@ -72,6 +72,13 @@ def arrow_type_to_bigquery_field( return schema.SchemaField(name, "TIMESTAMP") detected_type = _ARROW_SCALAR_IDS_TO_BQ.get(type_.id, None) + + # We need a special case for values that might fit in Arrow decimal128 but + # not with the scale/precision that is used in BigQuery's NUMERIC type. + # See: https://github.com/googleapis/python-bigquery/issues/1650 + if detected_type == "NUMERIC" and type_.scale > 9: + detected_type = "BIGNUMERIC" + if detected_type is not None: return schema.SchemaField(name, detected_type) diff --git a/tests/unit/schema/test_pandas_to_bigquery.py b/tests/unit/schema/test_pandas_to_bigquery.py index 777c3825..f3c4410b 100644 --- a/tests/unit/schema/test_pandas_to_bigquery.py +++ b/tests/unit/schema/test_pandas_to_bigquery.py @@ -4,6 +4,7 @@ import collections import datetime +import decimal import operator from google.cloud.bigquery import schema @@ -46,6 +47,29 @@ def test_dataframe_to_bigquery_fields_w_named_index(module_under_test): ), ], ), + # Need to fallback to Arrow to avoid data loss and disambiguate + # NUMERIC from BIGNUMERIC. We don't want to pick too small of a + # type and lose precision. See: + # https://github.com/googleapis/python-bigquery/issues/1650 + # + ( + "bignumeric_column", + [ + # Start with a lower precision Decimal to make sure we + # aren't trying to determine the type from just one value. + decimal.Decimal("1.25"), + decimal.Decimal("0.1234567891"), + ], + ), + ( + "numeric_column", + [ + # Minimum value greater than 0 that can be handled: 1e-9 + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric_types + decimal.Decimal("0.000000001"), + decimal.Decimal("-0.000000001"), + ], + ), ] ) dataframe = pandas.DataFrame(df_data).set_index("str_index", drop=True) @@ -64,6 +88,8 @@ def test_dataframe_to_bigquery_fields_w_named_index(module_under_test): schema.SchemaField("boolean_column", "BOOLEAN", "NULLABLE"), schema.SchemaField("datetime_column", "DATETIME", "NULLABLE"), schema.SchemaField("timestamp_column", "TIMESTAMP", "NULLABLE"), + schema.SchemaField("bignumeric_column", "BIGNUMERIC", "NULLABLE"), + schema.SchemaField("numeric_column", "NUMERIC", "NULLABLE"), ) assert returned_schema == expected_schema From c9fe842d3c128780e4e03880246e81a331671579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 6 Jan 2025 10:54:42 -0600 Subject: [PATCH 2/3] chore: add bigframes team as co-owners for pandas-gbq (#847) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: add bigframes team as co-owners for pandas-gbq * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .github/CODEOWNERS | 8 ++++---- .github/blunderbuss.yml | 3 +++ .repo-metadata.json | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 193b4363..24c0ca96 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,8 +5,8 @@ # https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax # Note: This file is autogenerated. To make changes to the codeowner team, please update .repo-metadata.json. -# @googleapis/yoshi-python @googleapis/api-bigquery are the default owners for changes in this repo -* @googleapis/yoshi-python @googleapis/api-bigquery +# @googleapis/yoshi-python @googleapis/api-bigquery @googleapis/api-bigquery-dataframe are the default owners for changes in this repo +* @googleapis/yoshi-python @googleapis/api-bigquery @googleapis/api-bigquery-dataframe -# @googleapis/python-samples-reviewers @googleapis/api-bigquery are the default owners for samples changes -/samples/ @googleapis/python-samples-reviewers @googleapis/api-bigquery +# @googleapis/python-samples-reviewers @googleapis/api-bigquery @googleapis/api-bigquery-dataframe are the default owners for samples changes +/samples/ @googleapis/python-samples-reviewers @googleapis/api-bigquery @googleapis/api-bigquery-dataframe diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml index 5b7383dc..6677a65c 100644 --- a/.github/blunderbuss.yml +++ b/.github/blunderbuss.yml @@ -5,6 +5,7 @@ # team, please update `codeowner_team` in `.repo-metadata.json`. assign_issues: - googleapis/api-bigquery + - googleapis/api-bigquery-dataframe assign_issues_by: - labels: @@ -12,6 +13,8 @@ assign_issues_by: to: - googleapis/python-samples-reviewers - googleapis/api-bigquery + - googleapis/api-bigquery-dataframe assign_prs: - googleapis/api-bigquery + - googleapis/api-bigquery-dataframe diff --git a/.repo-metadata.json b/.repo-metadata.json index 912be418..b016c47d 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -11,5 +11,5 @@ "distribution_name": "pandas-gbq", "api_id": "bigquery.googleapis.com", "default_version": "", - "codeowner_team": "@googleapis/api-bigquery" + "codeowner_team": "@googleapis/api-bigquery @googleapis/api-bigquery-dataframe" } From 6999a1f594c1d3ef3e397b047eee94e1141156a0 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 10:15:00 -0800 Subject: [PATCH 3/3] chore(main): release 0.26.1 (#848) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Sweña (Swast) --- CHANGELOG.md | 7 +++++++ pandas_gbq/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41b4c8df..18fec1f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.26.1](https://github.com/googleapis/python-bigquery-pandas/compare/v0.26.0...v0.26.1) (2025-01-06) + + +### Bug Fixes + +* Ensure BIGNUMERIC type is used if scale > 9 in Decimal values ([#844](https://github.com/googleapis/python-bigquery-pandas/issues/844)) ([d2f32df](https://github.com/googleapis/python-bigquery-pandas/commit/d2f32df4670f4c18464c6772896bf1583c36e338)) + ## [0.26.0](https://github.com/googleapis/python-bigquery-pandas/compare/v0.25.0...v0.26.0) (2024-12-19) diff --git a/pandas_gbq/version.py b/pandas_gbq/version.py index 0c8dab15..47551e36 100644 --- a/pandas_gbq/version.py +++ b/pandas_gbq/version.py @@ -2,4 +2,4 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -__version__ = "0.26.0" +__version__ = "0.26.1"