Skip to content

fix: fix the default value for na_value for numpy conversions #1766

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 29, 2025
3 changes: 2 additions & 1 deletion bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import google.cloud.bigquery as bigquery
import numpy
import pandas
from pandas.api import extensions as pd_ext
import pandas.io.formats.format
import pyarrow
import tabulate
Expand Down Expand Up @@ -4097,7 +4098,7 @@ def to_numpy(
self,
dtype=None,
copy=False,
na_value=None,
na_value=pd_ext.no_default,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the na_value for pandas.to_numpy is set as no_default, this is not counted as breaking change:
https://github.com/pandas-dev/pandas/blob/v2.2.3/pandas/core/frame.py#L1933-L1997

*,
allow_large_results=None,
**kwargs,
Expand Down
3 changes: 2 additions & 1 deletion bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import google.cloud.bigquery as bigquery
import numpy
import pandas
from pandas.api import extensions as pd_ext
import pandas.core.dtypes.common
import pyarrow as pa
import typing_extensions
Expand Down Expand Up @@ -2109,7 +2110,7 @@ def to_numpy(
self,
dtype=None,
copy=False,
na_value=None,
na_value=pd_ext.no_default,
*,
allow_large_results=None,
**kwargs,
Expand Down
11 changes: 11 additions & 0 deletions tests/system/small/test_dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from typing import Tuple

import google.api_core.exceptions
import numpy
import numpy.testing
import pandas as pd
import pandas.testing
import pyarrow as pa
Expand Down Expand Up @@ -1061,3 +1063,12 @@ def test_to_sql_query_named_index_excluded(
utils.assert_pandas_df_equal(
roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
)


def test_to_numpy(scalars_dfs):
bf_df, pd_df = scalars_dfs

bf_result = numpy.array(bf_df[["int64_too"]], dtype="int64")
pd_result = numpy.array(pd_df[["int64_too"]], dtype="int64")

numpy.testing.assert_array_equal(bf_result, pd_result)
2 changes: 1 addition & 1 deletion tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2132,7 +2132,7 @@ def test_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, col_na
],
)
def test_unique(scalars_df_index, scalars_pandas_df_index, col_name):
bf_uniq = scalars_df_index[col_name].unique().to_numpy()
bf_uniq = scalars_df_index[col_name].unique().to_numpy(na_value=None)
pd_uniq = scalars_pandas_df_index[col_name].unique()
numpy.array_equal(pd_uniq, bf_uniq)

Expand Down
11 changes: 11 additions & 0 deletions tests/system/small/test_series_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy
import numpy.testing
import pandas as pd
import pytest

Expand Down Expand Up @@ -114,3 +116,12 @@ def test_to_pandas_batches(scalars_dfs, page_size, max_results, allow_large_resu
total_rows += actual_rows

assert total_rows == expected_total_rows


def test_to_numpy(scalars_dfs):
bf_df, pd_df = scalars_dfs

bf_result = numpy.array(bf_df["int64_too"], dtype="int64")
pd_result = numpy.array(pd_df["int64_too"], dtype="int64")

numpy.testing.assert_array_equal(bf_result, pd_result)
3 changes: 2 additions & 1 deletion third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import bigframes_vendored.pandas.core.generic as generic
import numpy as np
import pandas as pd
from pandas.api import extensions as pd_ext

# -----------------------------------------------------------------------
# DataFrame class
Expand Down Expand Up @@ -369,7 +370,7 @@ def to_numpy(
self,
dtype=None,
copy=False,
na_value=None,
na_value=pd_ext.no_default,
*,
allow_large_results=None,
**kwargs,
Expand Down
8 changes: 5 additions & 3 deletions third_party/bigframes_vendored/pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
from bigframes_vendored.pandas.core.generic import NDFrame
import numpy
import numpy as np
from pandas._libs import lib
from pandas._typing import Axis, FilePath, NaPosition, WriteBuffer
from pandas.api import extensions as pd_ext

from bigframes import constants

Expand Down Expand Up @@ -323,7 +323,7 @@ def reset_index(
self,
*,
drop: bool = False,
name=lib.no_default,
name=pd_ext.no_default,
) -> DataFrame | Series | None:
"""
Generate a new DataFrame or Series with the index reset.
Expand Down Expand Up @@ -730,7 +730,9 @@ def tolist(self, *, allow_large_results: Optional[bool] = None) -> list:

to_list = tolist

def to_numpy(self, dtype, copy=False, na_value=None, *, allow_large_results=None):
def to_numpy(
self, dtype, copy=False, na_value=pd_ext.no_default, *, allow_large_results=None
):
"""
A NumPy ndarray representing the values in this Series or Index.

Expand Down