googleapis · Mar 14, 2024 · Mar 14, 2024 · Mar 15, 2024 · Mar 15, 2024 · Mar 15, 2024
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -22,6 +22,25 @@ If you are still having issues, please be sure to include as much information as
   - pip version: `pip --version`
   - `bigframes` version: `pip show bigframes`
 
+
+```python
+import sys
+import bigframes
+import google.cloud.bigquery
+import ibis
+import pandas
+import pyarrow
+import sqlglot
+
+print(f"Python: {sys.version}")
+print(f"bigframes=={bigframes.__version__}")
+print(f"google-cloud-bigquery=={google.cloud.bigquery.__version__}")
+print(f"ibis=={ibis.__version__}")
+print(f"pandas=={pandas.__version__}")
+print(f"pyarrow=={pyarrow.__version__}")
+print(f"sqlglot=={sqlglot.__version__}")
+```
+
 #### Steps to reproduce
 
   1. ?

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,46 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [0.26.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.25.0...v0.26.0) (2024-03-20)
+
+
+### ⚠ BREAKING CHANGES
+
+* exclude remote models for .register() ([#465](https://github.com/googleapis/python-bigquery-dataframes/issues/465))
+
+### Features
+
+* (Series|DataFrame).plot ([#438](https://github.com/googleapis/python-bigquery-dataframes/issues/438)) ([1c3e668](https://github.com/googleapis/python-bigquery-dataframes/commit/1c3e668ceb26fd0f1377acbf6b95e8f4bcef40d6))
+* `read_gbq_table` supports `LIKE` as a operator in `filters` ([#454](https://github.com/googleapis/python-bigquery-dataframes/issues/454)) ([d2d425a](https://github.com/googleapis/python-bigquery-dataframes/commit/d2d425a93aa9e96f3b71c3ca3b185f4b5eaf32ef))
+* Add DataFrame.pipe() method ([#421](https://github.com/googleapis/python-bigquery-dataframes/issues/421)) ([95f5a6e](https://github.com/googleapis/python-bigquery-dataframes/commit/95f5a6e749468743af65062e559bc35ac56f3c24))
+* Set `force=True` by default in `DataFrame.peek()` ([#469](https://github.com/googleapis/python-bigquery-dataframes/issues/469)) ([4e8e97d](https://github.com/googleapis/python-bigquery-dataframes/commit/4e8e97d661078ed38d77be93b0bc1ad0fd52949c))
+* Support datetime related casting in (Series|DataFrame|Index).astype ([#442](https://github.com/googleapis/python-bigquery-dataframes/issues/442)) ([fde339b](https://github.com/googleapis/python-bigquery-dataframes/commit/fde339b71c754e617c61052940215b77890b59e4))
+* Support Series.dt.strftime ([#453](https://github.com/googleapis/python-bigquery-dataframes/issues/453)) ([8f6e955](https://github.com/googleapis/python-bigquery-dataframes/commit/8f6e955fc946db97c95ea012659432355b0cd12c))
+
+
+### Bug Fixes
+
+* Any() on empty set now correctly returns False ([#471](https://github.com/googleapis/python-bigquery-dataframes/issues/471)) ([f55680c](https://github.com/googleapis/python-bigquery-dataframes/commit/f55680cd0eed46ee06cd9baf658de792f4a27f31))
+* Df.drop_na preserves columns dtype ([#457](https://github.com/googleapis/python-bigquery-dataframes/issues/457)) ([3bab1a9](https://github.com/googleapis/python-bigquery-dataframes/commit/3bab1a917a5833bd58b20071a229ee95cf86a251))
+* Disable to_json and to_csv related tests ([#462](https://github.com/googleapis/python-bigquery-dataframes/issues/462)) ([874026d](https://github.com/googleapis/python-bigquery-dataframes/commit/874026da612bf08fbaf6d7dbfaa3325dc8a61500))
+* Exclude remote models for .register() ([#465](https://github.com/googleapis/python-bigquery-dataframes/issues/465)) ([73fe0f8](https://github.com/googleapis/python-bigquery-dataframes/commit/73fe0f89a96557afc4225521654978b96a2291b3))
+* Fix broken link in covid notebook ([#450](https://github.com/googleapis/python-bigquery-dataframes/issues/450)) ([adadb06](https://github.com/googleapis/python-bigquery-dataframes/commit/adadb0658c35142fed228abbd9baa42f9372f44b))
+* Fix broken multiindex loc cases ([#467](https://github.com/googleapis/python-bigquery-dataframes/issues/467)) ([b519197](https://github.com/googleapis/python-bigquery-dataframes/commit/b519197d51cc098ac4981a9a57a9d6988ba07d03))
+* Fix grouping series on multiple other series ([#455](https://github.com/googleapis/python-bigquery-dataframes/issues/455)) ([3971bd2](https://github.com/googleapis/python-bigquery-dataframes/commit/3971bd27c96b68b859399564dbb6abdb93de5f14))
+* Groupby aggregates no longer check if grouping keys are numeric ([#472](https://github.com/googleapis/python-bigquery-dataframes/issues/472)) ([4fbf938](https://github.com/googleapis/python-bigquery-dataframes/commit/4fbf938c200a3e0e6b592aa4a4e18b59f2f34082))
+* Raise `ValueError` when `read_pandas()` receives a bigframes `DataFrame` ([#447](https://github.com/googleapis/python-bigquery-dataframes/issues/447)) ([b28f9fd](https://github.com/googleapis/python-bigquery-dataframes/commit/b28f9fdd9681b3c9783a6e52322b70093e0283ec))
+* Series.(to_csv|to_json) leverages bq export ([#452](https://github.com/googleapis/python-bigquery-dataframes/issues/452)) ([718a00c](https://github.com/googleapis/python-bigquery-dataframes/commit/718a00c1fa8ac44b0d3a79a2217e5b12690785fb))
+* Warn when `read_gbq` / `read_gbq_table` uses the snapshot time cache ([#441](https://github.com/googleapis/python-bigquery-dataframes/issues/441)) ([e16a8c0](https://github.com/googleapis/python-bigquery-dataframes/commit/e16a8c0a6fb46cf1a7be12eec9471ae95d6f2c44))
+
+
+### Documentation
+
+* Add code samples for `ml.metrics.r2_score` ([#459](https://github.com/googleapis/python-bigquery-dataframes/issues/459)) ([85fefa2](https://github.com/googleapis/python-bigquery-dataframes/commit/85fefa2f1d4dbe3e0c9d4ab8124cea88eb5df38f))
+* Add the docs for loc and iloc indexers ([#446](https://github.com/googleapis/python-bigquery-dataframes/issues/446)) ([14ab8d8](https://github.com/googleapis/python-bigquery-dataframes/commit/14ab8d834d793ac7644f066145912e6d50966881))
+* Add the pages for at and iat indexers ([#456](https://github.com/googleapis/python-bigquery-dataframes/issues/456)) ([340f0b5](https://github.com/googleapis/python-bigquery-dataframes/commit/340f0b5b41fc5150d73890c7f27ae68dc308e160))
+* Add version information to bug template ([#437](https://github.com/googleapis/python-bigquery-dataframes/issues/437)) ([91bd39e](https://github.com/googleapis/python-bigquery-dataframes/commit/91bd39e8b194ddad09d53fca96201eee58063bb9))
+* Indicate that project and location are optional in example notebooks ([#451](https://github.com/googleapis/python-bigquery-dataframes/issues/451)) ([1df0140](https://github.com/googleapis/python-bigquery-dataframes/commit/1df014010652e7827a2720a906d0afe482a30ca9))
+
 ## [0.25.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.24.0...v0.25.0) (2024-03-14)
 
 

diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py
@@ -23,6 +23,17 @@ class ComputeOptions:
     """
     Encapsulates configuration for compute options.
 
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
+
+        >>> bpd.options.compute.maximum_bytes_billed = 500
+        >>> # df.to_pandas() # this should fail
+        google.api_core.exceptions.InternalServerError: 500 Query exceeded limit for bytes billed: 500. 10485760 or higher required.
+
+        >>> bpd.options.compute.maximum_bytes_billed = None  # reset option
+
     Attributes:
         maximum_bytes_billed (int, Options):
             Limits the bytes billed for query jobs. Queries that will have

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
@@ -28,6 +28,7 @@
 import bigframes.core.nodes as nodes
 from bigframes.core.ordering import OrderingColumnReference
 import bigframes.core.ordering as orderings
+import bigframes.core.rewrite
 import bigframes.core.utils
 from bigframes.core.window_spec import WindowSpec
 import bigframes.dtypes
@@ -351,14 +352,15 @@ def join(
         join_def: join_def.JoinDefinition,
         allow_row_identity_join: bool = False,
     ):
-        return ArrayValue(
-            nodes.JoinNode(
-                left_child=self.node,
-                right_child=other.node,
-                join=join_def,
-                allow_row_identity_join=allow_row_identity_join,
-            )
+        join_node = nodes.JoinNode(
+            left_child=self.node,
+            right_child=other.node,
+            join=join_def,
+            allow_row_identity_join=allow_row_identity_join,
         )
+        if allow_row_identity_join:
+            return ArrayValue(bigframes.core.rewrite.maybe_rewrite_join(join_node))
+        return ArrayValue(join_node)
 
     def _uniform_sampling(self, fraction: float) -> ArrayValue:
         """Sampling the table on given fraction.

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import functools
 import typing
 
 import pandas as pd
@@ -307,7 +308,7 @@ def drop_duplicates(
 ) -> blocks.Block:
     block, dupe_indicator_id = indicate_duplicates(block, columns, keep)
     block, keep_indicator_id = block.apply_unary_op(dupe_indicator_id, ops.invert_op)
-    return block.filter(keep_indicator_id).drop_columns(
+    return block.filter_by_id(keep_indicator_id).drop_columns(
         (dupe_indicator_id, keep_indicator_id)
     )
 
@@ -459,32 +460,14 @@ def dropna(
     """
     Drop na entries from block
     """
+    predicates = [ops.notnull_op.as_expr(column_id) for column_id in column_ids]
+    if len(predicates) == 0:
+        return block
     if how == "any":
-        filtered_block = block
-        for column in column_ids:
-            filtered_block, result_id = filtered_block.apply_unary_op(
-                column, ops.notnull_op
-            )
-            filtered_block = filtered_block.filter(result_id)
-            filtered_block = filtered_block.drop_columns([result_id])
-        return filtered_block
+        predicate = functools.reduce(ops.and_op.as_expr, predicates)
     else:  # "all"
-        filtered_block = block
-        predicate = None
-        for column in column_ids:
-            filtered_block, partial_predicate = filtered_block.apply_unary_op(
-                column, ops.notnull_op
-            )
-            if predicate:
-                filtered_block, predicate = filtered_block.apply_binary_op(
-                    partial_predicate, predicate, ops.or_op
-                )
-            else:
-                predicate = partial_predicate
-        if predicate:
-            filtered_block = filtered_block.filter(predicate)
-        filtered_block = filtered_block.select_columns(block.value_columns)
-        return filtered_block
+        predicate = functools.reduce(ops.or_op.as_expr, predicates)
+    return block.filter(predicate)
 
 
 def nsmallest(
@@ -513,7 +496,7 @@ def nsmallest(
             window_spec=windows.WindowSpec(ordering=tuple(order_refs)),
         )
         block, condition = block.project_expr(ops.le_op.as_expr(counter, ex.const(n)))
-        block = block.filter(condition)
+        block = block.filter_by_id(condition)
         return block.drop_columns([counter, condition])
 
 
@@ -543,7 +526,7 @@ def nlargest(
             window_spec=windows.WindowSpec(ordering=tuple(order_refs)),
         )
         block, condition = block.project_expr(ops.le_op.as_expr(counter, ex.const(n)))
-        block = block.filter(condition)
+        block = block.filter_by_id(condition)
         return block.drop_columns([counter, condition])
 
 

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -37,6 +37,7 @@
 import bigframes.constants as constants
 import bigframes.core as core
 import bigframes.core.expression as ex
+import bigframes.core.expression as scalars
 import bigframes.core.guid as guid
 import bigframes.core.join_def as join_defs
 import bigframes.core.ordering as ordering
@@ -701,7 +702,7 @@ def project_expr(
         block = Block(
             array_val,
             index_columns=self.index_columns,
-            column_labels=[*self.column_labels, label],
+            column_labels=self.column_labels.insert(len(self.column_labels), label),
             index_labels=self.index.names,
         )
         return (block, result_id)
@@ -793,7 +794,7 @@ def apply_window_op(
         if skip_null_groups:
             for key in window_spec.grouping_keys:
                 block, not_null_id = block.apply_unary_op(key, ops.notnull_op)
-                block = block.filter(not_null_id).drop_columns([not_null_id])
+                block = block.filter_by_id(not_null_id).drop_columns([not_null_id])
         result_id = guid.generate_guid()
         expr = block._expr.project_window_op(
             column,
@@ -806,7 +807,9 @@ def apply_window_op(
         block = Block(
             expr,
             index_columns=self.index_columns,
-            column_labels=[*self.column_labels, result_label],
+            column_labels=self.column_labels.insert(
+                len(self.column_labels), result_label
+            ),
             index_labels=self._index_labels,
         )
         return (block, result_id)
@@ -850,14 +853,22 @@ def assign_label(self, column_id: str, new_label: Label) -> Block:
         )
         return self.with_column_labels(new_labels)
 
-    def filter(self, column_id: str, keep_null: bool = False):
+    def filter_by_id(self, column_id: str, keep_null: bool = False):
         return Block(
             self._expr.filter_by_id(column_id, keep_null),
             index_columns=self.index_columns,
             column_labels=self.column_labels,
             index_labels=self.index.names,
         )
 
+    def filter(self, predicate: scalars.Expression):
+        return Block(
+            self._expr.filter(predicate),
+            index_columns=self.index_columns,
+            column_labels=self.column_labels,
+            index_labels=self.index.names,
+        )
+
     def aggregate_all_and_stack(
         self,
         operation: agg_ops.UnaryAggregateOp,
@@ -1086,8 +1097,11 @@ def summarize(
             unpivot_columns=tuple(columns),
             index_col_ids=tuple([label_col_id]),
         )
-        labels = self._get_labels_for_columns(column_ids)
-        return Block(expr, column_labels=labels, index_columns=[label_col_id])
+        return Block(
+            expr,
+            column_labels=self._get_labels_for_columns(column_ids),
+            index_columns=[label_col_id],
+        )
 
     def corr(self):
         """Returns a block object to compute the self-correlation on this block."""
@@ -1156,10 +1170,10 @@ def _standard_stats(self, column_id) -> typing.Sequence[agg_ops.UnaryAggregateOp
 
         return stats
 
-    def _get_labels_for_columns(self, column_ids: typing.Sequence[str]):
+    def _get_labels_for_columns(self, column_ids: typing.Sequence[str]) -> pd.Index:
         """Get column label for value columns, or index name for index columns"""
-        lookup = self.col_id_to_label
-        return [lookup.get(col_id, None) for col_id in column_ids]
+        indices = [self.value_columns.index(col_id) for col_id in column_ids]
+        return self.column_labels.take(indices, allow_fill=False)
 
     def _normalize_expression(
         self,
@@ -1255,7 +1269,7 @@ def _forward_slice(self, start: int = 0, stop=None, step: int = 1):
 
         for cond in conditions:
             block, cond_id = block.project_expr(cond)
-            block = block.filter(cond_id)
+            block = block.filter_by_id(cond_id)
 
         return block.select_columns(self.value_columns)
 
@@ -1292,7 +1306,7 @@ def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]:
             Block(
                 expr,
                 index_columns=self.index_columns,
-                column_labels=[label, *self.column_labels],
+                column_labels=self.column_labels.insert(0, label),
                 index_labels=self._index_labels,
             ),
             result_id,
@@ -1391,10 +1405,9 @@ def pivot(
         if values_in_index or len(values) > 1:
             value_labels = self._get_labels_for_columns(values)
             column_index = self._create_pivot_column_index(value_labels, columns_values)
+            return result_block.with_column_labels(column_index)
         else:
-            column_index = columns_values
-
-        return result_block.with_column_labels(column_index)
+            return result_block.with_column_labels(columns_values)
 
     def stack(self, how="left", levels: int = 1):
         """Unpivot last column axis level into row axis"""
@@ -1517,8 +1530,8 @@ def _column_type(self, col_id: str) -> bigframes.dtypes.Dtype:
 
     @staticmethod
     def _create_pivot_column_index(
-        value_labels: Sequence[typing.Hashable], columns_values: pd.Index
-    ):
+        value_labels: pd.Index, columns_values: pd.Index
+    ) -> pd.Index:
         index_parts = []
         for value in value_labels:
             as_frame = columns_values.to_frame()

diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py
@@ -414,7 +414,7 @@ def _(
     result = _is_true(column).any()
     return cast(
         ibis_types.BooleanScalar,
-        _apply_window_if_present(result, window).fillna(ibis_types.literal(True)),
+        _apply_window_if_present(result, window).fillna(ibis_types.literal(False)),
     )
 
 

diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
@@ -66,7 +66,6 @@ def compile_join(node: nodes.JoinNode, ordered: bool = True):
             left=left_ordered,
             right=right_ordered,
             join=node.join,
-            allow_row_identity_join=node.allow_row_identity_join,
         )
     else:
         left_unordered = compile_unordered_ir(node.left_child)
@@ -75,7 +74,6 @@ def compile_join(node: nodes.JoinNode, ordered: bool = True):
             left=left_unordered,
             right=right_unordered,
             join=node.join,
-            allow_row_identity_join=node.allow_row_identity_join,
         )