Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,15 +620,31 @@ def to_pandas_batches(
ordered=True,
use_explicit_destination=allow_large_results,
)

total_batches = 0
for df in execute_result.to_pandas_batches(
page_size=page_size, max_results=max_results
):
total_batches += 1
self._copy_index_to_pandas(df)
if squeeze:
yield df.squeeze(axis=1)
else:
yield df

# To reduce the number of edge cases to consider when working with the
# results of this, always return at least one DataFrame. See:
# b/428918844.
if total_batches == 0:
df = pd.DataFrame(
{
col: pd.Series([], dtype=self.expr.get_column_type(col))
for col in itertools.chain(self.value_columns, self.index_columns)
}
)
self._copy_index_to_pandas(df)
yield df

def _copy_index_to_pandas(self, df: pd.DataFrame):
"""Set the index on pandas DataFrame to match this block.

Expand Down
22 changes: 22 additions & 0 deletions tests/system/small/test_dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,28 @@ def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
pd.testing.assert_series_equal(actual, expected)


def test_to_pandas_batches_w_empty_dataframe(session):
"""Verify to_pandas_batches() APIs returns at least one DataFrame.

See b/428918844 for additional context.
"""
empty = bpd.DataFrame(
{
"idx1": [],
"idx2": [],
"col1": pandas.Series([], dtype="string[pyarrow]"),
"col2": pandas.Series([], dtype="Int64"),
},
session=session,
).set_index(["idx1", "idx2"], drop=True)

results = list(empty.to_pandas_batches())
assert len(results) == 1
assert list(results[0].index.names) == ["idx1", "idx2"]
assert list(results[0].columns) == ["col1", "col2"]
pandas.testing.assert_series_equal(results[0].dtypes, empty.dtypes)


@pytest.mark.parametrize("allow_large_results", (True, False))
def test_to_pandas_batches_w_page_size_and_max_results(session, allow_large_results):
"""Verify to_pandas_batches() APIs returns the expected page size.
Expand Down