From 3a4a9de1f797557dfbbb7317eb3754e16131ddbf Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Wed, 11 Sep 2024 13:35:17 -0700
Subject: [PATCH 01/18] refactor: Simplify join node definition (#966)

---
 bigframes/core/__init__.py              | 149 ++----------
 bigframes/core/blocks.py                | 311 ++++++++++++------------
 bigframes/core/compile/compiled.py      |   2 +-
 bigframes/core/compile/compiler.py      |   6 +-
 bigframes/core/compile/single_column.py |  35 ++-
 bigframes/core/guid.py                  |   2 +-
 bigframes/core/identifiers.py           |  26 ++
 bigframes/core/join_def.py              |  39 +--
 bigframes/core/nodes.py                 |  30 +--
 bigframes/core/schema.py                |   3 +
 10 files changed, 257 insertions(+), 346 deletions(-)
 create mode 100644 bigframes/core/identifiers.py

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index f65509e5b7..b4074dd94f 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -17,9 +17,8 @@
 import datetime
 import functools
 import io
-import itertools
 import typing
-from typing import Iterable, Optional, Sequence
+from typing import Iterable, Optional, Sequence, Tuple
 import warnings
 
 import google.cloud.bigquery
@@ -191,19 +190,14 @@ def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
             nodes.ConcatNode(children=tuple([self.node, *[val.node for val in other]]))
         )
 
-    def project_to_id(self, expression: ex.Expression, output_id: str):
+    def compute_values(self, assignments: Sequence[Tuple[ex.Expression, str]]):
         return ArrayValue(
-            nodes.ProjectionNode(
-                child=self.node,
-                assignments=(
-                    (
-                        expression,
-                        output_id,
-                    ),
-                ),
-            )
+            nodes.ProjectionNode(child=self.node, assignments=tuple(assignments))
         )
 
+    def project_to_id(self, expression: ex.Expression, output_id: str):
+        return self.compute_values(((expression, output_id),))
+
     def assign(self, source_id: str, destination_id: str) -> ArrayValue:
         if destination_id in self.column_ids:  # Mutate case
             exprs = [
@@ -341,124 +335,33 @@ def _reproject_to_table(self) -> ArrayValue:
             )
         )
 
-    def unpivot(
-        self,
-        row_labels: typing.Sequence[typing.Hashable],
-        unpivot_columns: typing.Sequence[
-            typing.Tuple[str, typing.Tuple[typing.Optional[str], ...]]
-        ],
-        *,
-        passthrough_columns: typing.Sequence[str] = (),
-        index_col_ids: typing.Sequence[str] = ["index"],
-        join_side: typing.Literal["left", "right"] = "left",
-    ) -> ArrayValue:
-        """
-        Unpivot ArrayValue columns.
-
-        Args:
-            row_labels: Identifies the source of the row. Must be equal to length to source column list in unpivot_columns argument.
-            unpivot_columns: Mapping of column id to list of input column ids. Lists of input columns may use None.
-            passthrough_columns: Columns that will not be unpivoted. Column id will be preserved.
-            index_col_id (str): The column id to be used for the row labels.
-
-        Returns:
-            ArrayValue: The unpivoted ArrayValue
-        """
-        # There will be N labels, used to disambiguate which of N source columns produced each output row
-        explode_offsets_id = bigframes.core.guid.generate_guid("unpivot_offsets_")
-        labels_array = self._create_unpivot_labels_array(
-            row_labels, index_col_ids, explode_offsets_id
-        )
-
-        # Unpivot creates N output rows for each input row, labels disambiguate these N rows
-        joined_array = self._cross_join_w_labels(labels_array, join_side)
-
-        # Build the output rows as a case statment that selects between the N input columns
-        unpivot_exprs = []
-        # Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
-        for col_id, input_ids in unpivot_columns:
-            # row explode offset used to choose the input column
-            # we use offset instead of label as labels are not necessarily unique
-            cases = itertools.chain(
-                *(
-                    (
-                        ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
-                        ex.free_var(id_or_null)
-                        if (id_or_null is not None)
-                        else ex.const(None),
-                    )
-                    for i, id_or_null in enumerate(input_ids)
-                )
-            )
-            col_expr = ops.case_when_op.as_expr(*cases)
-            unpivot_exprs.append((col_expr, col_id))
-
-        unpivot_col_ids = [id for id, _ in unpivot_columns]
-        return ArrayValue(
-            nodes.ProjectionNode(
-                child=joined_array.node,
-                assignments=(*unpivot_exprs,),
-            )
-        ).select_columns([*index_col_ids, *unpivot_col_ids, *passthrough_columns])
-
-    def _cross_join_w_labels(
-        self, labels_array: ArrayValue, join_side: typing.Literal["left", "right"]
-    ) -> ArrayValue:
-        """
-        Convert each row in self to N rows, one for each label in labels array.
-        """
-        table_join_side = (
-            join_def.JoinSide.LEFT if join_side == "left" else join_def.JoinSide.RIGHT
-        )
-        labels_join_side = table_join_side.inverse()
-        labels_mappings = tuple(
-            join_def.JoinColumnMapping(labels_join_side, id, id)
-            for id in labels_array.schema.names
-        )
-        table_mappings = tuple(
-            join_def.JoinColumnMapping(table_join_side, id, id)
-            for id in self.schema.names
-        )
-        join = join_def.JoinDefinition(
-            conditions=(), mappings=(*labels_mappings, *table_mappings), type="cross"
-        )
-        if join_side == "left":
-            joined_array = self.relational_join(labels_array, join_def=join)
-        else:
-            joined_array = labels_array.relational_join(self, join_def=join)
-        return joined_array
-
-    def _create_unpivot_labels_array(
-        self,
-        former_column_labels: typing.Sequence[typing.Hashable],
-        col_ids: typing.Sequence[str],
-        offsets_id: str,
-    ) -> ArrayValue:
-        """Create an ArrayValue from a list of label tuples."""
-        rows = []
-        for row_offset in range(len(former_column_labels)):
-            row_label = former_column_labels[row_offset]
-            row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
-            row = {
-                col_ids[i]: (row_label[i] if pandas.notnull(row_label[i]) else None)
-                for i in range(len(col_ids))
-            }
-            row[offsets_id] = row_offset
-            rows.append(row)
-
-        return ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=self.session)
-
     def relational_join(
         self,
         other: ArrayValue,
-        join_def: join_def.JoinDefinition,
-    ) -> ArrayValue:
+        conditions: typing.Tuple[typing.Tuple[str, str], ...] = (),
+        type: typing.Literal["inner", "outer", "left", "right", "cross"] = "inner",
+    ) -> typing.Tuple[ArrayValue, typing.Tuple[dict[str, str], dict[str, str]]]:
         join_node = nodes.JoinNode(
             left_child=self.node,
             right_child=other.node,
-            join=join_def,
+            conditions=conditions,
+            type=type,
         )
-        return ArrayValue(join_node)
+        # Maps input ids to output ids for caller convenience
+        l_size = len(self.node.schema)
+        l_mapping = {
+            lcol: ocol
+            for lcol, ocol in zip(
+                self.node.schema.names, join_node.schema.names[:l_size]
+            )
+        }
+        r_mapping = {
+            rcol: ocol
+            for rcol, ocol in zip(
+                other.node.schema.names, join_node.schema.names[l_size:]
+            )
+        }
+        return ArrayValue(join_node), (l_mapping, r_mapping)
 
     def try_align_as_projection(
         self,
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 4db171ec70..42b1a0aeb0 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -44,6 +44,7 @@
 import bigframes.core.expression as ex
 import bigframes.core.expression as scalars
 import bigframes.core.guid as guid
+import bigframes.core.identifiers
 import bigframes.core.join_def as join_defs
 import bigframes.core.ordering as ordering
 import bigframes.core.schema as bf_schema
@@ -1050,7 +1051,6 @@ def aggregate_all_and_stack(
         operation: typing.Union[agg_ops.UnaryAggregateOp, agg_ops.NullaryAggregateOp],
         *,
         axis: int | str = 0,
-        value_col_id: str = "values",
         dropna: bool = True,
     ) -> Block:
         axis_n = utils.get_axis_number(axis)
@@ -1080,15 +1080,18 @@ def aggregate_all_and_stack(
             # TODO: Allow to promote identity/total_order columns instead for better perf
             offset_col = guid.generate_guid()
             expr_with_offsets = self.expr.promote_offsets(offset_col)
-            stacked_expr = expr_with_offsets.unpivot(
-                row_labels=self.column_labels.to_list(),
-                index_col_ids=[guid.generate_guid()],
-                unpivot_columns=[(value_col_id, tuple(self.value_columns))],
+            stacked_expr, (_, value_col_ids, passthrough_cols,) = unpivot(
+                expr_with_offsets,
+                row_labels=self.column_labels,
+                unpivot_columns=[tuple(self.value_columns)],
                 passthrough_columns=[*self.index_columns, offset_col],
             )
+            # these corresponed to passthrough_columns provided to unpivot
+            index_cols = passthrough_cols[:-1]
+            og_offset_col = passthrough_cols[-1]
             index_aggregations = [
                 (ex.UnaryAggregation(agg_ops.AnyValueOp(), ex.free_var(col_id)), col_id)
-                for col_id in [*self.index_columns]
+                for col_id in index_cols
             ]
             # TODO: may need add NullaryAggregation in main_aggregation
             # when agg add support for axis=1, needed for agg("size", axis=1)
@@ -1096,17 +1099,18 @@ def aggregate_all_and_stack(
                 operation, agg_ops.UnaryAggregateOp
             ), f"Expected a unary operation, but got {operation}. Please report this error and how you got here to the BigQuery DataFrames team (bit.ly/bigframes-feedback)."
             main_aggregation = (
-                ex.UnaryAggregation(operation, ex.free_var(value_col_id)),
-                value_col_id,
+                ex.UnaryAggregation(operation, ex.free_var(value_col_ids[0])),
+                value_col_ids[0],
             )
+            # Drop row identity after aggregating over it
             result_expr = stacked_expr.aggregate(
                 [*index_aggregations, main_aggregation],
-                by_column_ids=[offset_col],
+                by_column_ids=[og_offset_col],
                 dropna=dropna,
-            )
+            ).drop_columns([og_offset_col])
             return Block(
-                result_expr.drop_columns([offset_col]),
-                self.index_columns,
+                result_expr,
+                index_columns=index_cols,
                 column_labels=[None],
                 index_labels=self.index.names,
             )
@@ -1318,8 +1322,7 @@ def summarize(
         ],
     ):
         """Get a list of stats as a deferred block object."""
-        label_col_id = guid.generate_guid()
-        labels = [stat.name for stat in stats]
+        labels = pd.Index([stat.name for stat in stats])
         aggregations = [
             (
                 ex.UnaryAggregation(stat, ex.free_var(col_id))
@@ -1331,18 +1334,17 @@ def summarize(
             for col_id in column_ids
         ]
         columns = [
-            (col_id, tuple(f"{col_id}-{stat.name}" for stat in stats))
-            for col_id in column_ids
+            (tuple(f"{col_id}-{stat.name}" for stat in stats)) for col_id in column_ids
         ]
-        expr = self.expr.aggregate(aggregations).unpivot(
+        expr, (index_cols, _, _) = unpivot(
+            self.expr.aggregate(aggregations),
             labels,
             unpivot_columns=tuple(columns),
-            index_col_ids=tuple([label_col_id]),
         )
         return Block(
             expr,
             column_labels=self._get_labels_for_columns(column_ids),
-            index_columns=[label_col_id],
+            index_columns=index_cols,
         )
 
     def calculate_pairwise_metric(self, op=agg_ops.CorrOp()):
@@ -1368,23 +1370,17 @@ def calculate_pairwise_metric(self, op=agg_ops.CorrOp()):
         ]
         expr = self.expr.aggregate(aggregations)
 
-        index_col_ids = [
-            guid.generate_guid() for i in range(self.column_labels.nlevels)
-        ]
         input_count = len(self.value_columns)
         unpivot_columns = tuple(
-            (
-                guid.generate_guid(),
-                tuple(expr.column_ids[input_count * i : input_count * (i + 1)]),
-            )
+            tuple(expr.column_ids[input_count * i : input_count * (i + 1)])
             for i in range(input_count)
         )
         labels = self._get_labels_for_columns(self.value_columns)
 
         # TODO(b/340896143): fix type error
-        expr = expr.unpivot(
-            row_labels=labels,  # type: ignore
-            index_col_ids=index_col_ids,
+        expr, (index_col_ids, _, _) = unpivot(
+            expr,
+            row_labels=labels,
             unpivot_columns=unpivot_columns,
         )
 
@@ -1604,7 +1600,7 @@ def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]:
             Block(
                 expr,
                 index_columns=self.index_columns,
-                column_labels=self.column_labels.insert(0, label),
+                column_labels=self.column_labels.insert(len(self.column_labels), label),
                 index_labels=self._index_labels,
             ),
             result_id,
@@ -1722,8 +1718,6 @@ def stack(self, how="left", levels: int = 1):
         col_labels, row_labels = utils.split_index(self.column_labels, levels=levels)
         row_labels = row_labels.drop_duplicates()
 
-        row_label_tuples = utils.index_as_tuples(row_labels)
-
         if col_labels is None:
             result_index: pd.Index = pd.Index([None])
             result_col_labels: Sequence[Tuple] = list([()])
@@ -1737,26 +1731,24 @@ def stack(self, how="left", levels: int = 1):
             result_col_labels = utils.index_as_tuples(result_index)
 
         # Get matching columns
-        unpivot_columns: List[Tuple[str, List[str]]] = []
+        unpivot_columns: List[Tuple[Optional[str], ...]] = []
         for val in result_col_labels:
-            col_id = guid.generate_guid("unpivot_")
-            input_columns, dtype = self._create_stack_column(val, row_label_tuples)
-            unpivot_columns.append((col_id, input_columns))
+            input_columns, _ = self._create_stack_column(val, row_labels)
+            unpivot_columns.append(input_columns)
 
-        added_index_columns = [guid.generate_guid() for _ in range(row_labels.nlevels)]
-        unpivot_expr = self._expr.unpivot(
-            row_labels=row_label_tuples,
+        unpivot_expr, (added_index_columns, _, passthrough_cols) = unpivot(
+            self._expr,
+            row_labels=row_labels,
             passthrough_columns=self.index_columns,
             unpivot_columns=unpivot_columns,
-            index_col_ids=added_index_columns,
             join_side=how,
         )
         new_index_level_names = self.column_labels.names[-levels:]
         if how == "left":
-            index_columns = [*self.index_columns, *added_index_columns]
+            index_columns = [*passthrough_cols, *added_index_columns]
             index_labels = [*self._index_labels, *new_index_level_names]
         else:
-            index_columns = [*added_index_columns, *self.index_columns]
+            index_columns = [*added_index_columns, *passthrough_cols]
             index_labels = [*new_index_level_names, *self._index_labels]
 
         return Block(
@@ -1780,18 +1772,16 @@ def melt(
         Arguments correspond to pandas.melt arguments.
         """
         # TODO: Implement col_level and ignore_index
-        unpivot_col_id = guid.generate_guid()
-        var_col_ids = tuple([guid.generate_guid() for _ in var_names])
-        # single unpivot col
-        unpivot_col = (unpivot_col_id, tuple(value_vars))
-        value_labels = [self.col_id_to_label[col_id] for col_id in value_vars]
+        value_labels: pd.Index = pd.Index(
+            [self.col_id_to_label[col_id] for col_id in value_vars]
+        )
         id_labels = [self.col_id_to_label[col_id] for col_id in id_vars]
 
-        unpivot_expr = self._expr.unpivot(
+        unpivot_expr, (var_col_ids, unpivot_out, passthrough_cols) = unpivot(
+            self._expr,
             row_labels=value_labels,
             passthrough_columns=id_vars,
-            unpivot_columns=(unpivot_col,),
-            index_col_ids=var_col_ids,
+            unpivot_columns=(tuple(value_vars),),  # single unpivot col
             join_side="right",
         )
 
@@ -1804,7 +1794,7 @@ def melt(
 
         # Need to reorder to get id_vars before var_col and unpivot_col
         unpivot_expr = unpivot_expr.select_columns(
-            [*index_cols, *id_vars, *var_col_ids, unpivot_col_id]
+            [*index_cols, *passthrough_cols, *var_col_ids, *unpivot_out]
         )
 
         return Block(
@@ -1859,6 +1849,7 @@ def transpose(
             value_vars=block.value_columns,
             create_offsets_index=False,
         )
+        row_offset = stacked_block.value_columns[0]
         col_labels = stacked_block.value_columns[-2 - original_col_index.nlevels : -2]
         col_offset = stacked_block.value_columns[-2]  # disambiguator we created earlier
         cell_values = stacked_block.value_columns[-1]
@@ -1867,7 +1858,7 @@ def transpose(
             [*col_labels, col_offset]
         )  # col index is now row index
         result = stacked_block.pivot(
-            columns=[offsets],
+            columns=[row_offset],
             values=[cell_values],
             columns_unique_values=tuple(range(original_row_count)),
         )
@@ -1879,12 +1870,10 @@ def transpose(
             .with_transpose_cache(self)
         )
 
-    def _create_stack_column(
-        self, col_label: typing.Tuple, stack_labels: typing.Sequence[typing.Tuple]
-    ):
+    def _create_stack_column(self, col_label: typing.Tuple, stack_labels: pd.Index):
         dtype = None
         input_columns: list[Optional[str]] = []
-        for uvalue in stack_labels:
+        for uvalue in utils.index_as_tuples(stack_labels):
             label_to_match = (*col_label, *uvalue)
             label_to_match = (
                 label_to_match[0] if len(label_to_match) == 1 else label_to_match
@@ -2013,38 +2002,16 @@ def merge(
         sort: bool,
         suffixes: tuple[str, str] = ("_x", "_y"),
     ) -> Block:
-        left_mappings = [
-            join_defs.JoinColumnMapping(
-                source_table=join_defs.JoinSide.LEFT,
-                source_id=id,
-                destination_id=guid.generate_guid(),
-            )
-            for id in self.expr.column_ids
-        ]
-        right_mappings = [
-            join_defs.JoinColumnMapping(
-                source_table=join_defs.JoinSide.RIGHT,
-                source_id=id,
-                destination_id=guid.generate_guid(),
-            )
-            for id in other.expr.column_ids
-        ]
-
-        join_def = join_defs.JoinDefinition(
-            conditions=tuple(
-                join_defs.JoinCondition(left, right)
-                for left, right in zip(left_join_ids, right_join_ids)
-            ),
-            mappings=(*left_mappings, *right_mappings),
-            type=how,
+        conditions = tuple(
+            (lid, rid) for lid, rid in zip(left_join_ids, right_join_ids)
+        )
+        joined_expr, (get_column_left, get_column_right) = self.expr.relational_join(
+            other.expr, type=how, conditions=conditions
         )
-        joined_expr = self.expr.relational_join(other.expr, join_def=join_def)
         result_columns = []
         matching_join_labels = []
 
         coalesced_ids = []
-        get_column_left = join_def.get_left_mapping()
-        get_column_right = join_def.get_right_mapping()
         for left_id, right_id in zip(left_join_ids, right_join_ids):
             coalesced_id = guid.generate_guid()
             joined_expr = joined_expr.project_to_id(
@@ -2748,34 +2715,10 @@ def join_with_single_row(
     left_expr = left.expr
     # ignore index columns by dropping them
     right_expr = single_row_block.expr.select_columns(single_row_block.value_columns)
-    left_mappings = [
-        join_defs.JoinColumnMapping(
-            source_table=join_defs.JoinSide.LEFT,
-            source_id=id,
-            destination_id=guid.generate_guid(),
-        )
-        for id in left_expr.column_ids
-    ]
-    right_mappings = [
-        join_defs.JoinColumnMapping(
-            source_table=join_defs.JoinSide.RIGHT,
-            source_id=id,
-            destination_id=guid.generate_guid(),
-        )
-        for id in right_expr.column_ids  # skip index column
-    ]
-
-    join_def = join_defs.JoinDefinition(
-        conditions=(),
-        mappings=(*left_mappings, *right_mappings),
-        type="cross",
-    )
-    combined_expr = left_expr.relational_join(
+    combined_expr, (get_column_left, get_column_right) = left_expr.relational_join(
         right_expr,
-        join_def=join_def,
+        type="cross",
     )
-    get_column_left = join_def.get_left_mapping()
-    get_column_right = join_def.get_right_mapping()
     # Drop original indices from each side. and used the coalesced combination generated by the join.
     index_cols_post_join = [get_column_left[id] for id in left.index_columns]
 
@@ -2800,38 +2743,15 @@ def join_mono_indexed(
 ) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]:
     left_expr = left.expr
     right_expr = right.expr
-    left_mappings = [
-        join_defs.JoinColumnMapping(
-            source_table=join_defs.JoinSide.LEFT,
-            source_id=id,
-            destination_id=guid.generate_guid(),
-        )
-        for id in left_expr.column_ids
-    ]
-    right_mappings = [
-        join_defs.JoinColumnMapping(
-            source_table=join_defs.JoinSide.RIGHT,
-            source_id=id,
-            destination_id=guid.generate_guid(),
-        )
-        for id in right_expr.column_ids
-    ]
 
-    join_def = join_defs.JoinDefinition(
+    combined_expr, (get_column_left, get_column_right) = left_expr.relational_join(
+        right_expr,
+        type=how,
         conditions=(
             join_defs.JoinCondition(left.index_columns[0], right.index_columns[0]),
         ),
-        mappings=(*left_mappings, *right_mappings),
-        type=how,
     )
 
-    combined_expr = left_expr.relational_join(
-        right_expr,
-        join_def=join_def,
-    )
-
-    get_column_left = join_def.get_left_mapping()
-    get_column_right = join_def.get_right_mapping()
     left_index = get_column_left[left.index_columns[0]]
     right_index = get_column_right[right.index_columns[0]]
     # Drop original indices from each side. and used the coalesced combination generated by the join.
@@ -2886,39 +2806,15 @@ def join_multi_indexed(
     left_expr = left.expr
     right_expr = right.expr
 
-    left_mappings = [
-        join_defs.JoinColumnMapping(
-            source_table=join_defs.JoinSide.LEFT,
-            source_id=id,
-            destination_id=guid.generate_guid(),
-        )
-        for id in left_expr.column_ids
-    ]
-    right_mappings = [
-        join_defs.JoinColumnMapping(
-            source_table=join_defs.JoinSide.RIGHT,
-            source_id=id,
-            destination_id=guid.generate_guid(),
-        )
-        for id in right_expr.column_ids
-    ]
-
-    join_def = join_defs.JoinDefinition(
+    combined_expr, (get_column_left, get_column_right) = left_expr.relational_join(
+        right_expr,
+        type=how,
         conditions=tuple(
             join_defs.JoinCondition(left, right)
             for left, right in zip(left_join_ids, right_join_ids)
         ),
-        mappings=(*left_mappings, *right_mappings),
-        type=how,
     )
 
-    combined_expr = left_expr.relational_join(
-        right_expr,
-        join_def=join_def,
-    )
-
-    get_column_left = join_def.get_left_mapping()
-    get_column_right = join_def.get_right_mapping()
     left_ids_post_join = [get_column_left[id] for id in left_join_ids]
     right_ids_post_join = [get_column_right[id] for id in right_join_ids]
     # Drop original indices from each side. and used the coalesced combination generated by the join.
@@ -3114,3 +3010,94 @@ def _get_block_schema(
     for label, dtype in zip(block.column_labels, block.dtypes):
         result[label] = typing.cast(bigframes.dtypes.Dtype, dtype)
     return result
+
+
+## Unpivot helpers
+def unpivot(
+    array_value: core.ArrayValue,
+    row_labels: pd.Index,
+    unpivot_columns: Sequence[Tuple[Optional[str], ...]],
+    *,
+    passthrough_columns: typing.Sequence[str] = (),
+    join_side: Literal["left", "right"] = "left",
+) -> Tuple[core.ArrayValue, Tuple[Tuple[str, ...], Tuple[str, ...], Tuple[str, ...]]]:
+    """
+    Unpivot ArrayValue columns.
+
+    Args:
+        row_labels: Identifies the source of the row. Must be equal to length to source column list in unpivot_columns argument.
+        unpivot_columns: Sequence of column ids tuples. Each tuple of columns will be combined into a single output column
+        passthrough_columns: Columns that will not be unpivoted. Column id will be preserved.
+        index_col_id (str): The column id to be used for the row labels.
+
+    Returns:
+        ArrayValue, (index_cols, unpivot_cols, passthrough_cols): The unpivoted ArrayValue and resulting column ids.
+    """
+    # There will be N labels, used to disambiguate which of N source columns produced each output row
+    labels_array = _pd_index_to_array_value(
+        session=array_value.session, index=row_labels
+    )
+
+    # Unpivot creates N output rows for each input row, labels disambiguate these N rows
+    # Join_side is necessary to produce desired row ordering
+    if join_side == "left":
+        joined_array, (column_mapping, labels_mapping) = array_value.relational_join(
+            labels_array, type="cross"
+        )
+    else:
+        joined_array, (labels_mapping, column_mapping) = labels_array.relational_join(
+            array_value, type="cross"
+        )
+    new_passthrough_cols = [column_mapping[col] for col in passthrough_columns]
+    # Last column is offsets
+    index_col_ids = [labels_mapping[col] for col in labels_array.column_ids[:-1]]
+    explode_offsets_id = labels_mapping[labels_array.column_ids[-1]]
+
+    # Build the output rows as a case statment that selects between the N input columns
+    unpivot_exprs: List[Tuple[ex.Expression, str]] = []
+    # Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
+    for input_ids in unpivot_columns:
+        # row explode offset used to choose the input column
+        # we use offset instead of label as labels are not necessarily unique
+        cases = itertools.chain(
+            *(
+                (
+                    ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
+                    ex.free_var(column_mapping[id_or_null])
+                    if (id_or_null is not None)
+                    else ex.const(None),
+                )
+                for i, id_or_null in enumerate(input_ids)
+            )
+        )
+        col_expr = ops.case_when_op.as_expr(*cases)
+        unpivot_exprs.append((col_expr, guid.generate_guid()))
+
+    unpivot_col_ids = [id for _, id in unpivot_exprs]
+
+    return joined_array.compute_values(unpivot_exprs).select_columns(
+        [*index_col_ids, *unpivot_col_ids, *new_passthrough_cols]
+    ), (tuple(index_col_ids), tuple(unpivot_col_ids), tuple(new_passthrough_cols))
+
+
+def _pd_index_to_array_value(
+    session: core.Session,
+    index: pd.Index,
+) -> core.ArrayValue:
+    """
+    Create an ArrayValue from a list of label tuples.
+    The last column will be row offsets.
+    """
+    rows = []
+    labels_as_tuples = utils.index_as_tuples(index)
+    for row_offset in range(len(index)):
+        id_gen = bigframes.core.identifiers.standard_identifiers()
+        row_label = labels_as_tuples[row_offset]
+        row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
+        row = {}
+        for label_part, id in zip(row_label, id_gen):
+            row[id] = label_part if pd.notnull(label_part) else None
+        row[next(id_gen)] = row_offset
+        rows.append(row)
+
+    return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 9a9f598e89..38b8fb50e3 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -791,10 +791,10 @@ def promote_offsets(self, col_id: str) -> OrderedIR:
         if ordering.is_sequential and (ordering.total_order_col is not None):
             expr_builder = self.builder()
             expr_builder.columns = [
+                *self.columns,
                 self._compile_expression(
                     ordering.total_order_col.scalar_expression
                 ).name(col_id),
-                *self.columns,
             ]
             return expr_builder.build()
         # Cannot nest analytic expressions, so reproject to cte first if needed.
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index 80d5f5a893..950c2c2cc7 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -81,7 +81,8 @@ def compile_join(self, node: nodes.JoinNode, ordered: bool = True):
             return bigframes.core.compile.single_column.join_by_column_ordered(
                 left=left_ordered,
                 right=right_ordered,
-                join=node.join,
+                type=node.type,
+                conditions=node.conditions,
             )
         else:
             left_unordered = self.compile_unordered_ir(node.left_child)
@@ -89,7 +90,8 @@ def compile_join(self, node: nodes.JoinNode, ordered: bool = True):
             return bigframes.core.compile.single_column.join_by_column_unordered(
                 left=left_unordered,
                 right=right_unordered,
-                join=node.join,
+                type=node.type,
+                conditions=node.conditions,
             )
 
     @_compile_node.register
diff --git a/bigframes/core/compile/single_column.py b/bigframes/core/compile/single_column.py
index 9b621c9c79..26af969b74 100644
--- a/bigframes/core/compile/single_column.py
+++ b/bigframes/core/compile/single_column.py
@@ -16,20 +16,23 @@
 
 from __future__ import annotations
 
+from typing import Literal, Tuple
+
 import ibis
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.types as ibis_types
 
 import bigframes.core.compile.compiled as compiled
 import bigframes.core.guid as guids
-import bigframes.core.join_def as join_defs
+import bigframes.core.identifiers as ids
 import bigframes.core.ordering as orderings
 
 
 def join_by_column_ordered(
     left: compiled.OrderedIR,
     right: compiled.OrderedIR,
-    join: join_defs.JoinDefinition,
+    conditions: Tuple[Tuple[str, str], ...],
+    type: Literal["inner", "outer", "left", "right", "cross"],
 ) -> compiled.OrderedIR:
     """Join two expressions by column equality.
 
@@ -48,6 +51,11 @@ def join_by_column_ordered(
         finally, all the right columns.
     """
 
+    # Do not reset the generator
+    id_generator = ids.standard_identifiers()
+    l_value_mapping = dict(zip(left.column_ids, id_generator))
+    r_value_mapping = dict(zip(right.column_ids, id_generator))
+
     l_hidden_mapping = {
         id: guids.generate_guid("hidden_") for id in left._hidden_column_ids
     }
@@ -55,8 +63,8 @@ def join_by_column_ordered(
         id: guids.generate_guid("hidden_") for id in right._hidden_column_ids
     }
 
-    l_mapping = {**join.get_left_mapping(), **l_hidden_mapping}
-    r_mapping = {**join.get_right_mapping(), **r_hidden_mapping}
+    l_mapping = {**l_value_mapping, **l_hidden_mapping}
+    r_mapping = {**r_value_mapping, **r_hidden_mapping}
 
     left_table = left._to_ibis_expr(
         ordering_mode="unordered",
@@ -71,14 +79,14 @@ def join_by_column_ordered(
     join_conditions = [
         value_to_join_key(left_table[l_mapping[left_index]])
         == value_to_join_key(right_table[r_mapping[right_index]])
-        for left_index, right_index in join.conditions
+        for left_index, right_index in conditions
     ]
 
     combined_table = ibis.join(
         left_table,
         right_table,
         predicates=join_conditions,
-        how=join.type,  # type: ignore
+        how=type,  # type: ignore
     )
 
     # Preserve ordering accross joins.
@@ -87,7 +95,7 @@ def join_by_column_ordered(
         right._ordering,
         l_mapping,
         r_mapping,
-        left_order_dominates=(join.type != "right"),
+        left_order_dominates=(type != "right"),
     )
 
     # We could filter out the original join columns, but predicates/ordering
@@ -116,7 +124,8 @@ def join_by_column_ordered(
 def join_by_column_unordered(
     left: compiled.UnorderedIR,
     right: compiled.UnorderedIR,
-    join: join_defs.JoinDefinition,
+    conditions: Tuple[Tuple[str, str], ...],
+    type: Literal["inner", "outer", "left", "right", "cross"],
 ) -> compiled.UnorderedIR:
     """Join two expressions by column equality.
 
@@ -134,9 +143,9 @@ def join_by_column_unordered(
         first the coalesced join keys, then, all the left columns, and
         finally, all the right columns.
     """
-    # Value column mapping must use JOIN_NAME_REMAPPER to stay in sync with consumers of join result
-    l_mapping = join.get_left_mapping()
-    r_mapping = join.get_right_mapping()
+    id_generator = ids.standard_identifiers()
+    l_mapping = dict(zip(left.column_ids, id_generator))
+    r_mapping = dict(zip(right.column_ids, id_generator))
     left_table = left._to_ibis_expr(
         col_id_overrides=l_mapping,
     )
@@ -146,14 +155,14 @@ def join_by_column_unordered(
     join_conditions = [
         value_to_join_key(left_table[l_mapping[left_index]])
         == value_to_join_key(right_table[r_mapping[right_index]])
-        for left_index, right_index in join.conditions
+        for left_index, right_index in conditions
     ]
 
     combined_table = ibis.join(
         left_table,
         right_table,
         predicates=join_conditions,
-        how=join.type,  # type: ignore
+        how=type,  # type: ignore
     )
     # We could filter out the original join columns, but predicates/ordering
     # might still reference them in implicit joins.
diff --git a/bigframes/core/guid.py b/bigframes/core/guid.py
index 4eb6c7a9d6..8930d0760a 100644
--- a/bigframes/core/guid.py
+++ b/bigframes/core/guid.py
@@ -18,4 +18,4 @@
 def generate_guid(prefix="col_"):
     global _GUID_COUNTER
     _GUID_COUNTER += 1
-    return prefix + str(_GUID_COUNTER)
+    return f"bfuid_{prefix}{_GUID_COUNTER}"
diff --git a/bigframes/core/identifiers.py b/bigframes/core/identifiers.py
new file mode 100644
index 0000000000..9239c41248
--- /dev/null
+++ b/bigframes/core/identifiers.py
@@ -0,0 +1,26 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Later, plan on migrating ids to use integers to reduce memory usage allow use of bitmaps to represent column sets
+
+from typing import Generator
+
+ID_TYPE = str
+
+
+def standard_identifiers() -> Generator[ID_TYPE, None, None]:
+    i = 0
+    while True:
+        yield f"col_{i}"
+        i = i + 1
diff --git a/bigframes/core/join_def.py b/bigframes/core/join_def.py
index 4079abc8fa..5b7b7e45dd 100644
--- a/bigframes/core/join_def.py
+++ b/bigframes/core/join_def.py
@@ -15,7 +15,9 @@
 
 import dataclasses
 import enum
-from typing import Literal, Mapping, NamedTuple, Tuple
+from typing import Literal, NamedTuple
+
+import bigframes.core.identifiers as ids
 
 
 class JoinSide(enum.Enum):
@@ -32,42 +34,21 @@ def inverse(self) -> JoinSide:
 
 
 class JoinCondition(NamedTuple):
-    left_id: str
-    right_id: str
+    left_id: ids.ID_TYPE
+    right_id: ids.ID_TYPE
 
 
 @dataclasses.dataclass(frozen=True)
 class JoinColumnMapping:
     source_table: JoinSide
-    source_id: str
-    destination_id: str
+    source_id: ids.ID_TYPE
+    destination_id: ids.ID_TYPE
 
 
 @dataclasses.dataclass(frozen=True)
 class CoalescedColumnMapping:
     """Special column mapping used only by implicit joiner only"""
 
-    left_source_id: str
-    right_source_id: str
-    destination_id: str
-
-
-@dataclasses.dataclass(frozen=True)
-class JoinDefinition:
-    conditions: Tuple[JoinCondition, ...]
-    mappings: Tuple[JoinColumnMapping, ...]
-    type: JoinType
-
-    def get_left_mapping(self) -> Mapping[str, str]:
-        return {
-            i.source_id: i.destination_id
-            for i in self.mappings
-            if i.source_table == JoinSide.LEFT
-        }
-
-    def get_right_mapping(self) -> Mapping[str, str]:
-        return {
-            i.source_id: i.destination_id
-            for i in self.mappings
-            if i.source_table == JoinSide.RIGHT
-        }
+    left_source_id: ids.ID_TYPE
+    right_source_id: ids.ID_TYPE
+    destination_id: ids.ID_TYPE
diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py
index 27e76c7910..e90ecd06b6 100644
--- a/bigframes/core/nodes.py
+++ b/bigframes/core/nodes.py
@@ -26,7 +26,7 @@
 
 import bigframes.core.expression as ex
 import bigframes.core.guid
-from bigframes.core.join_def import JoinColumnMapping, JoinDefinition, JoinSide
+import bigframes.core.identifiers as bfet_ids
 from bigframes.core.ordering import OrderingExpression
 import bigframes.core.schema as schemata
 import bigframes.core.window_spec as window
@@ -206,7 +206,8 @@ def order_ambiguous(self) -> bool:
 class JoinNode(BigFrameNode):
     left_child: BigFrameNode
     right_child: BigFrameNode
-    join: JoinDefinition
+    conditions: typing.Tuple[typing.Tuple[str, str], ...]
+    type: typing.Literal["inner", "outer", "left", "right", "cross"]
 
     @property
     def row_preserving(self) -> bool:
@@ -233,19 +234,14 @@ def __hash__(self):
 
     @functools.cached_property
     def schema(self) -> schemata.ArraySchema:
-        def join_mapping_to_schema_item(mapping: JoinColumnMapping):
-            result_id = mapping.destination_id
-            result_dtype = (
-                self.left_child.schema.get_type(mapping.source_id)
-                if mapping.source_table == JoinSide.LEFT
-                else self.right_child.schema.get_type(mapping.source_id)
-            )
-            return schemata.SchemaItem(result_id, result_dtype)
-
-        items = tuple(
-            join_mapping_to_schema_item(mapping) for mapping in self.join.mappings
+        items = []
+        schema_items = itertools.chain(
+            self.left_child.schema.items, self.right_child.schema.items
         )
-        return schemata.ArraySchema(items)
+        identifiers = bfet_ids.standard_identifiers()
+        for id, item in zip(identifiers, schema_items):
+            items.append(schemata.SchemaItem(id, item.dtype))
+        return schemata.ArraySchema(tuple(items))
 
     @functools.cached_property
     def variables_introduced(self) -> int:
@@ -545,7 +541,7 @@ def non_local(self) -> bool:
 
     @property
     def schema(self) -> schemata.ArraySchema:
-        return self.child.schema.prepend(
+        return self.child.schema.append(
             schemata.SchemaItem(self.col_id, bigframes.dtypes.INT_DTYPE)
         )
 
@@ -626,6 +622,10 @@ def relation_ops_created(self) -> int:
 class SelectionNode(UnaryNode):
     input_output_pairs: typing.Tuple[typing.Tuple[str, str], ...]
 
+    def __post_init__(self):
+        for input, _ in self.input_output_pairs:
+            assert input in self.child.schema.names
+
     def __hash__(self):
         return self._node_hash
 
diff --git a/bigframes/core/schema.py b/bigframes/core/schema.py
index ee27c6ff30..03e4de8993 100644
--- a/bigframes/core/schema.py
+++ b/bigframes/core/schema.py
@@ -92,3 +92,6 @@ def update_dtype(
 
     def get_type(self, id: ColumnIdentifierType):
         return self._mapping[id]
+
+    def __len__(self) -> int:
+        return len(self.items)

From 40113d8079446452b87b9c7e794a86f2635d1ff2 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Wed, 11 Sep 2024 14:54:44 -0700
Subject: [PATCH 02/18] refactor: Switch explode node to use column offsets
 (#978)

---
 bigframes/core/__init__.py         |  8 +++++---
 bigframes/core/compile/compiled.py | 11 ++++++++---
 bigframes/core/nodes.py            |  9 ++++++---
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index b4074dd94f..a0563e6edd 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -382,9 +382,8 @@ def explode(self, column_ids: typing.Sequence[str]) -> ArrayValue:
         for column_id in column_ids:
             assert bigframes.dtypes.is_array_like(self.get_column_type(column_id))
 
-        return ArrayValue(
-            nodes.ExplodeNode(child=self.node, column_ids=tuple(column_ids))
-        )
+        offsets = tuple(self.get_offset_for_name(id) for id in column_ids)
+        return ArrayValue(nodes.ExplodeNode(child=self.node, column_ids=offsets))
 
     def _uniform_sampling(self, fraction: float) -> ArrayValue:
         """Sampling the table on given fraction.
@@ -393,3 +392,6 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
             The row numbers of result is non-deterministic, avoid to use.
         """
         return ArrayValue(nodes.RandomSampleNode(self.node, fraction))
+
+    def get_offset_for_name(self, name: str):
+        return self.schema.names.index(name)
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 38b8fb50e3..2a13ce0da0 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -401,8 +401,9 @@ def _uniform_sampling(self, fraction: float) -> UnorderedIR:
             columns=columns,
         )
 
-    def explode(self, column_ids: typing.Sequence[str]) -> UnorderedIR:
+    def explode(self, offsets: typing.Sequence[int]) -> UnorderedIR:
         table = self._to_ibis_expr()
+        column_ids = tuple(table.columns[offset] for offset in offsets)
 
         # The offset array ensures null represents empty arrays after unnesting.
         offset_array_id = bigframes.core.guid.generate_guid("offset_array_")
@@ -712,8 +713,9 @@ def _uniform_sampling(self, fraction: float) -> OrderedIR:
             ordering=self._ordering,
         )
 
-    def explode(self, column_ids: typing.Sequence[str]) -> OrderedIR:
+    def explode(self, offsets: typing.Sequence[int]) -> OrderedIR:
         table = self._to_ibis_expr(ordering_mode="unordered", expose_hidden_cols=True)
+        column_ids = tuple(table.columns[offset] for offset in offsets)
 
         offset_array_id = bigframes.core.guid.generate_guid("offset_array_")
         offset_array = (
@@ -721,7 +723,10 @@ def explode(self, column_ids: typing.Sequence[str]) -> OrderedIR:
                 ibis.greatest(
                     0,
                     ibis.least(
-                        *[table[column_id].length() - 1 for column_id in column_ids]
+                        *[
+                            table[table.columns[offset]].length() - 1
+                            for offset in offsets
+                        ]
                     ),
                 )
             )
diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py
index e90ecd06b6..e11fcafff7 100644
--- a/bigframes/core/nodes.py
+++ b/bigframes/core/nodes.py
@@ -42,6 +42,9 @@
 OVERHEAD_VARIABLES = 5
 
 
+COL_OFFSET = int
+
+
 @dataclass(frozen=True)
 class BigFrameNode:
     """
@@ -826,7 +829,7 @@ def variables_introduced(self) -> int:
 
 @dataclass(frozen=True)
 class ExplodeNode(UnaryNode):
-    column_ids: typing.Tuple[str, ...]
+    column_ids: typing.Tuple[COL_OFFSET, ...]
 
     @property
     def row_preserving(self) -> bool:
@@ -844,9 +847,9 @@ def schema(self) -> schemata.ArraySchema:
                     self.child.schema.get_type(name).pyarrow_dtype.value_type
                 ),
             )
-            if name in self.column_ids
+            if offset in self.column_ids
             else schemata.SchemaItem(name, self.child.schema.get_type(name))
-            for name in self.child.schema.names
+            for offset, name in enumerate(self.child.schema.names)
         )
         return schemata.ArraySchema(items)
 

From c94ead996e3bfa98edd51ff678a3d43a10ee980f Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 12 Sep 2024 09:42:47 -0700
Subject: [PATCH 03/18] chore: apply `remote_function` on the original series
 without reprojecting (#874)

* chore: apply `remote_function` on the original series

This change tests application of remote function without reprojecting the original series.

* add failing mask doctest as a system test for easier debugging

* more comprehensive repr tests

* more tests, move to small tests

* rename "name" param

* manipulate copy of the original udf

* move the funciton copy after i/o types resolution

* rename all params to avoid collisions, widely use bigframes_ prefix for consistency
---
 .../functions/_remote_function_session.py     |  18 +++
 bigframes/functions/remote_function.py        |  11 +-
 bigframes/series.py                           |   8 +-
 tests/system/small/test_remote_function.py    | 110 ++++++++++++++++++
 4 files changed, 138 insertions(+), 9 deletions(-)

diff --git a/bigframes/functions/_remote_function_session.py b/bigframes/functions/_remote_function_session.py
index 893b903aeb..c947fcdc63 100644
--- a/bigframes/functions/_remote_function_session.py
+++ b/bigframes/functions/_remote_function_session.py
@@ -22,6 +22,7 @@
 from typing import Any, cast, Dict, Mapping, Optional, Sequence, TYPE_CHECKING, Union
 import warnings
 
+import cloudpickle
 import google.api_core.exceptions
 from google.cloud import (
     bigquery,
@@ -458,6 +459,11 @@ def wrapper(func):
                 session=session,  # type: ignore
             )
 
+            # To respect the user code/environment let's use a copy of the
+            # original udf, especially since we would be setting some properties
+            # on it
+            func = cloudpickle.loads(cloudpickle.dumps(func))
+
             # In the unlikely case where the user is trying to re-deploy the same
             # function, cleanup the attributes we add below, first. This prevents
             # the pickle from having dependencies that might not otherwise be
@@ -499,6 +505,18 @@ def try_delattr(attr):
                 cloud_function_memory_mib=cloud_function_memory_mib,
             )
 
+            # TODO(shobs): Find a better way to support udfs with param named "name".
+            # This causes an issue in the ibis compilation.
+            func.__signature__ = inspect.signature(func).replace(  # type: ignore
+                parameters=[
+                    inspect.Parameter(
+                        f"bigframes_{param.name}",
+                        param.kind,
+                    )
+                    for param in inspect.signature(func).parameters.values()
+                ]
+            )
+
             # TODO: Move ibis logic to compiler step
             node = ibis.udf.scalar.builtin(
                 func,
diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py
index 39e3bfd8f0..b4c74e90d6 100644
--- a/bigframes/functions/remote_function.py
+++ b/bigframes/functions/remote_function.py
@@ -144,16 +144,21 @@ def read_gbq_function(
 
     # The name "args" conflicts with the Ibis operator, so we use
     # non-standard names for the arguments here.
-    def func(*ignored_args, **ignored_kwargs):
+    def func(*bigframes_args, **bigframes_kwargs):
         f"""Remote function {str(routine_ref)}."""
         nonlocal node  # type: ignore
 
-        expr = node(*ignored_args, **ignored_kwargs)  # type: ignore
+        expr = node(*bigframes_args, **bigframes_kwargs)  # type: ignore
         return ibis_client.execute(expr)
 
     func.__signature__ = inspect.signature(func).replace(  # type: ignore
         parameters=[
-            inspect.Parameter(name, inspect.Parameter.POSITIONAL_OR_KEYWORD)
+            # TODO(shobs): Find a better way to support functions with param
+            # named "name". This causes an issue in the ibis compilation.
+            inspect.Parameter(
+                f"bigframes_{name}",
+                inspect.Parameter.POSITIONAL_OR_KEYWORD,
+            )
             for name in ibis_signature.parameter_names
         ]
     )
diff --git a/bigframes/series.py b/bigframes/series.py
index 5192a9cf49..d9e3bb19dd 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1481,12 +1481,8 @@ def apply(
                     ex.message += f"\n{_remote_function_recommendation_message}"
                 raise
 
-        # We are working with remote function at this point.
-        # Reproject as workaround to applying filter too late. This forces the
-        # filter to be applied before passing data to remote function,
-        # protecting from bad inputs causing errors.
-        reprojected_series = Series(self._block._force_reproject())
-        result_series = reprojected_series._apply_unary_op(
+        # We are working with remote function at this point
+        result_series = self._apply_unary_op(
             ops.RemoteFunctionOp(func=func, apply_on_null=True)
         )
 
diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py
index b000354ed4..5ffda56f92 100644
--- a/tests/system/small/test_remote_function.py
+++ b/tests/system/small/test_remote_function.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import inspect
 import re
 
 import google.api_core.exceptions
@@ -972,3 +973,112 @@ def echo_len(row):
             bigframes.exceptions.PreviewWarning, match="axis=1 scenario is in preview."
         ):
             scalars_df[[column]].apply(echo_len_remote, axis=1)
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_remote_function_application_repr(session, dataset_id_permanent):
+    # This function deliberately has a param with name "name", this is to test
+    # a specific ibis' internal handling of object names
+    def should_mask(name: str) -> bool:
+        hash = 0
+        for char_ in name:
+            hash += ord(char_)
+        return hash % 2 == 0
+
+    assert "name" in inspect.signature(should_mask).parameters
+
+    should_mask = session.remote_function(
+        dataset=dataset_id_permanent, name=get_rf_name(should_mask)
+    )(should_mask)
+
+    s = bigframes.series.Series(["Alice", "Bob", "Caroline"])
+
+    repr(s.apply(should_mask))
+    repr(s.where(s.apply(should_mask)))
+    repr(s.where(~s.apply(should_mask)))
+    repr(s.mask(should_mask))
+    repr(s.mask(should_mask, "REDACTED"))
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_read_gbq_function_application_repr(session, dataset_id, scalars_df_index):
+    gbq_function = f"{dataset_id}.should_mask"
+
+    # This function deliberately has a param with name "name", this is to test
+    # a specific ibis' internal handling of object names
+    session.bqclient.query_and_wait(
+        f"CREATE OR REPLACE FUNCTION `{gbq_function}`(name STRING) RETURNS BOOL AS (MOD(LENGTH(name), 2) = 1)"
+    )
+    routine = session.bqclient.get_routine(gbq_function)
+    assert "name" in [arg.name for arg in routine.arguments]
+
+    # read the function and apply to dataframe
+    should_mask = session.read_gbq_function(gbq_function)
+
+    s = scalars_df_index["string_col"]
+
+    repr(s.apply(should_mask))
+    repr(s.where(s.apply(should_mask)))
+    repr(s.where(~s.apply(should_mask)))
+    repr(s.mask(should_mask))
+    repr(s.mask(should_mask, "REDACTED"))
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_remote_function_apply_after_filter(session, dataset_id_permanent, scalars_dfs):
+
+    # This function is deliberately written to not work with NA input
+    def plus_one(x: int) -> int:
+        return x + 1
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+    int_col_name_with_nulls = "int64_col"
+
+    # make sure there are NA values in the test column
+    assert any([pd.isna(val) for val in scalars_df[int_col_name_with_nulls]])
+
+    # create a remote function
+    plus_one_remote = session.remote_function(
+        dataset=dataset_id_permanent, name=get_rf_name(plus_one)
+    )(plus_one)
+
+    # with nulls in the series the remote function application would fail
+    with pytest.raises(
+        google.api_core.exceptions.BadRequest, match="unsupported operand"
+    ):
+        scalars_df[int_col_name_with_nulls].apply(plus_one_remote).to_pandas()
+
+    # after filtering out nulls the remote function application should works
+    # similar to pandas
+    pd_result = scalars_pandas_df[scalars_pandas_df[int_col_name_with_nulls].notnull()][
+        int_col_name_with_nulls
+    ].apply(plus_one)
+    bf_result = (
+        scalars_df[scalars_df[int_col_name_with_nulls].notnull()][
+            int_col_name_with_nulls
+        ]
+        .apply(plus_one_remote)
+        .to_pandas()
+    )
+
+    # ignore pandas "int64" vs bigframes "Int64" dtype difference
+    pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False)
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_remote_function_apply_assign_partial_ordering_mode(dataset_id_permanent):
+    session = bigframes.Session(bigframes.BigQueryOptions(ordering_mode="partial"))
+
+    df = session.read_gbq("bigquery-public-data.baseball.schedules")[
+        ["duration_minutes"]
+    ]
+
+    def plus_one(x: int) -> int:
+        return x + 1
+
+    plus_one = session.remote_function(
+        dataset=dataset_id_permanent, name=get_rf_name(plus_one)
+    )(plus_one)
+
+    df1 = df.assign(duration_cat=df["duration_minutes"].apply(plus_one))
+    repr(df1)

From 42b0724fc5859614a0771bcee6cb972056543436 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 12 Sep 2024 18:41:48 -0700
Subject: [PATCH 04/18] test: disable claude3 predict tests to protect bqml
 capacity (#983)

---
 tests/system/small/ml/test_llm.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index e3d2b51081..cd5d65c458 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -439,6 +439,7 @@ def test_claude3_text_generator_create_load(
     assert reloaded_model.model_name == model_name
 
 
+@pytest.mark.skip("b/366290533 too many requests are exhausting bqml capacity")
 @pytest.mark.parametrize(
     "model_name",
     ("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
@@ -458,6 +459,7 @@ def test_claude3_text_generator_predict_default_params_success(
     )
 
 
+@pytest.mark.skip("b/366290533 too many requests are exhausting bqml capacity")
 @pytest.mark.parametrize(
     "model_name",
     ("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),

From 30e11d901e72faf02c6d594eaa03c4eb24e69b2a Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Fri, 13 Sep 2024 10:58:55 -0700
Subject: [PATCH 05/18] refactor: Limit access to ArrayValue node field. (#977)

---
 bigframes/core/__init__.py    | 15 +++++++++++++++
 bigframes/core/blocks.py      | 14 +++++---------
 bigframes/core/validations.py |  2 +-
 bigframes/session/executor.py |  4 ++--
 4 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index a0563e6edd..aa3aa63cc2 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -36,6 +36,7 @@
 import bigframes.core.ordering as orderings
 import bigframes.core.rewrite
 import bigframes.core.schema as schemata
+import bigframes.core.tree_properties
 import bigframes.core.utils
 from bigframes.core.window_spec import WindowSpec
 import bigframes.dtypes
@@ -124,6 +125,20 @@ def schema(self) -> schemata.ArraySchema:
     def _compiled_schema(self) -> schemata.ArraySchema:
         return bigframes.core.compile.test_only_ibis_inferred_schema(self.node)
 
+    @property
+    def explicitly_ordered(self) -> bool:
+        # see BigFrameNode.explicitly_ordered
+        return self.node.explicitly_ordered
+
+    @property
+    def order_ambiguous(self) -> bool:
+        # see BigFrameNode.order_ambiguous
+        return self.node.order_ambiguous
+
+    @property
+    def supports_fast_peek(self) -> bool:
+        return bigframes.core.tree_properties.can_fast_peek(self.node)
+
     def as_cached(
         self: ArrayValue,
         cache_table: google.cloud.bigquery.Table,
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 42b1a0aeb0..feeed78885 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -49,7 +49,6 @@
 import bigframes.core.ordering as ordering
 import bigframes.core.schema as bf_schema
 import bigframes.core.sql as sql
-import bigframes.core.tree_properties as tree_properties
 import bigframes.core.utils as utils
 import bigframes.core.window_spec as window_specs
 import bigframes.dtypes
@@ -205,7 +204,7 @@ def shape(self) -> typing.Tuple[int, int]:
         row_count_expr = self.expr.row_count()
 
         # Support in-memory engines for hermetic unit tests.
-        if self.expr.node.session is None:
+        if self.expr.session is None:
             try:
                 row_count = row_count_expr._try_evaluate_local().squeeze()
                 return (row_count, len(self.value_columns))
@@ -283,7 +282,7 @@ def index_name_to_col_id(self) -> typing.Mapping[Label, typing.Sequence[str]]:
 
     @property
     def explicitly_ordered(self) -> bool:
-        return self.expr.node.explicitly_ordered
+        return self.expr.explicitly_ordered
 
     def cols_matching_label(self, partial_label: Label) -> typing.Sequence[str]:
         """
@@ -466,7 +465,7 @@ def _validate_result_schema(
     ):
         actual_schema = tuple(bq_result_schema)
         ibis_schema = self.expr._compiled_schema
-        internal_schema = self.expr.node.schema
+        internal_schema = self.expr.schema
         if not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable:
             return
         if internal_schema.to_bigquery() != actual_schema:
@@ -561,7 +560,7 @@ def to_pandas(
     def try_peek(
         self, n: int = 20, force: bool = False
     ) -> typing.Optional[pd.DataFrame]:
-        if force or tree_properties.can_fast_peek(self.expr.node):
+        if force or self.expr.supports_fast_peek:
             iterator, _ = self.session._peek(self.expr, n)
             df = self._to_dataframe(iterator)
             self._copy_index_to_pandas(df)
@@ -2365,10 +2364,7 @@ def cached(self, *, force: bool = False, session_aware: bool = False) -> None:
         if (not force) and self.session._executor._is_trivially_executable(self.expr):
             return
         elif session_aware:
-            bfet_roots = [obj._block._expr.node for obj in self.session.objects]
-            self.session._executor._cache_with_session_awareness(
-                self.expr, session_forest=bfet_roots
-            )
+            self.session._executor._cache_with_session_awareness(self.expr)
         else:
             self.session._executor._cache_with_cluster_cols(
                 self.expr, cluster_cols=self.index_columns
diff --git a/bigframes/core/validations.py b/bigframes/core/validations.py
index daa1252824..ca65445dab 100644
--- a/bigframes/core/validations.py
+++ b/bigframes/core/validations.py
@@ -66,7 +66,7 @@ def enforce_ordered(
     object: HasSession, opname: str, suggestion: Optional[str] = None
 ) -> None:
     session = object._session
-    if session._strictly_ordered or not object._block.expr.node.order_ambiguous:
+    if session._strictly_ordered or not object._block.expr.order_ambiguous:
         # No ambiguity for how to calculate ordering, so no error or warning
         return None
     if not session._allows_ambiguity:
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index 424e6d7dad..0512fa78e3 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -15,7 +15,7 @@
 from __future__ import annotations
 
 import math
-from typing import cast, Iterable, Literal, Mapping, Optional, Sequence, Tuple, Union
+from typing import cast, Literal, Mapping, Optional, Sequence, Tuple, Union
 import warnings
 import weakref
 
@@ -381,8 +381,8 @@ def _cache_with_offsets(self, array_value: bigframes.core.ArrayValue):
     def _cache_with_session_awareness(
         self,
         array_value: bigframes.core.ArrayValue,
-        session_forest: Iterable[nodes.BigFrameNode],
     ) -> None:
+        session_forest = [obj._block._expr.node for obj in array_value.session.objects]
         # These node types are cheap to re-compute
         target, cluster_cols = bigframes.session.planner.session_aware_cache_plan(
             array_value.node, list(session_forest)

From 0f047b4fae2a10b2a465c506bea561f8bb8d4262 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Fri, 13 Sep 2024 13:03:34 -0700
Subject: [PATCH 06/18] fix: DataFrameGroupby.agg now works with unnamed tuples
 (#985)

---
 bigframes/core/groupby/__init__.py | 10 ++++-----
 tests/system/small/test_groupby.py | 35 ++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 2b80d0389e..a0105f4ef0 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -414,12 +414,10 @@ def _agg_named(self, **kwargs) -> df.DataFrame:
                 raise NotImplementedError(
                     f"Only string aggregate names supported. {constants.FEEDBACK_LINK}"
                 )
-            if not hasattr(v, "column") or not hasattr(v, "aggfunc"):
-                import bigframes.pandas as bpd
-
-                raise TypeError(f"kwargs values must be {bpd.NamedAgg.__qualname__}")
-            col_id = self._resolve_label(v.column)
-            aggregations.append((col_id, agg_ops.lookup_agg_func(v.aggfunc)))
+            if not isinstance(v, tuple) or (len(v) != 2):
+                raise TypeError("kwargs values must be 2-tuples of column, aggfunc")
+            col_id = self._resolve_label(v[0])
+            aggregations.append((col_id, agg_ops.lookup_agg_func(v[1])))
             column_labels.append(k)
         agg_block, _ = self._block.aggregate(
             by_column_ids=self._by_col_ids,
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index 8e3baff4c2..8574860daa 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -247,6 +247,41 @@ def test_dataframe_groupby_agg_named(scalars_df_index, scalars_pandas_df_index):
     pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
 
 
+def test_dataframe_groupby_agg_kw_tuples(scalars_df_index, scalars_pandas_df_index):
+    col_names = ["int64_too", "float64_col", "int64_col", "bool_col", "string_col"]
+    bf_result = (
+        scalars_df_index[col_names]
+        .groupby("string_col")
+        .agg(
+            agg1=("int64_too", "sum"),
+            agg2=("float64_col", "max"),
+        )
+    )
+    pd_result = (
+        scalars_pandas_df_index[col_names]
+        .groupby("string_col")
+        .agg(agg1=("int64_too", "sum"), agg2=("float64_col", "max"))
+    )
+    bf_result_computed = bf_result.to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("kwargs"),
+    [
+        ({"hello": "world"}),
+        ({"too_many_fields": ("one", "two", "three")}),
+    ],
+)
+def test_dataframe_groupby_agg_kw_error(scalars_df_index, kwargs):
+    col_names = ["int64_too", "float64_col", "int64_col", "bool_col", "string_col"]
+    with pytest.raises(
+        TypeError, match=r"kwargs values must be 2-tuples of column, aggfunc"
+    ):
+        (scalars_df_index[col_names].groupby("string_col").agg(**kwargs))
+
+
 @pytest.mark.parametrize(
     ("as_index"),
     [

From 06c3120d5111cf9ecde4decc6af436cc3487ae23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Fri, 13 Sep 2024 15:05:23 -0500
Subject: [PATCH 07/18] refactor: remove circular dependencies preventing local
 doctest runs (#987)

With this change I can once again run

```
pytest --doctest-modules third_party/bigframes_vendored/pandas/core/frame.py
```

Note: having multiple `version.py` files should be fine. release-please
will update all such files it finds.
---
 bigframes/bigquery/__init__.py                   |  3 ++-
 bigframes/constants.py                           |  8 --------
 bigframes/core/block_transforms.py               |  5 +++--
 bigframes/core/blocks.py                         |  2 +-
 bigframes/core/compile/aggregate_compiler.py     |  2 +-
 bigframes/core/compile/ibis_types.py             |  2 +-
 bigframes/core/compile/scalar_op_compiler.py     |  2 +-
 bigframes/core/groupby/__init__.py               |  2 +-
 bigframes/core/indexers.py                       |  2 +-
 bigframes/core/indexes/base.py                   |  2 +-
 bigframes/core/reshape/__init__.py               |  2 +-
 bigframes/core/tools/datetimes.py                |  2 +-
 bigframes/core/validations.py                    |  7 ++++---
 bigframes/dataframe.py                           |  4 ++--
 bigframes/dtypes.py                              |  3 +--
 bigframes/formatting_helpers.py                  |  3 +--
 bigframes/functions/_remote_function_client.py   |  2 +-
 bigframes/functions/_remote_function_session.py  |  3 ++-
 bigframes/functions/remote_function.py           |  2 +-
 bigframes/ml/compose.py                          |  2 +-
 bigframes/ml/linear_model.py                     |  2 +-
 bigframes/ml/llm.py                              |  3 ++-
 bigframes/ml/loader.py                           |  2 +-
 bigframes/ml/metrics/_metrics.py                 |  2 +-
 bigframes/ml/pipeline.py                         |  2 +-
 bigframes/ml/sql.py                              |  3 +--
 bigframes/ml/utils.py                            |  2 +-
 bigframes/operations/_matplotlib/core.py         |  2 +-
 bigframes/operations/_matplotlib/hist.py         |  2 +-
 bigframes/operations/base.py                     |  2 +-
 bigframes/operations/plotting.py                 |  2 +-
 bigframes/operations/strings.py                  |  2 +-
 bigframes/pandas/__init__.py                     |  2 +-
 bigframes/series.py                              |  2 +-
 bigframes/session/__init__.py                    |  2 +-
 bigframes/session/_io/bigquery/read_gbq_table.py |  4 ++--
 bigframes/session/_io/pandas.py                  |  4 ++--
 bigframes/session/loader.py                      |  6 ++++--
 tests/unit/test_constants.py                     |  9 ++++++---
 tests/unit/test_formatting_helpers.py            |  5 +++--
 third_party/bigframes_vendored/constants.py      |  6 ++----
 third_party/bigframes_vendored/version.py        | 15 +++++++++++++++
 42 files changed, 77 insertions(+), 64 deletions(-)
 create mode 100644 third_party/bigframes_vendored/version.py

diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index 1e8e8d578d..303120b88a 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -23,7 +23,8 @@
 import typing
 from typing import Literal, Optional, Union
 
-import bigframes.constants as constants
+import bigframes_vendored.constants as constants
+
 import bigframes.core.groupby as groupby
 import bigframes.core.sql
 import bigframes.ml.utils as utils
diff --git a/bigframes/constants.py b/bigframes/constants.py
index d6fe699713..4d5b6b8eb3 100644
--- a/bigframes/constants.py
+++ b/bigframes/constants.py
@@ -19,14 +19,6 @@
 
 import datetime
 
-import bigframes_vendored.constants
-
-BF_VERSION = bigframes_vendored.constants.BF_VERSION
-FEEDBACK_LINK = bigframes_vendored.constants.FEEDBACK_LINK
-ABSTRACT_METHOD_ERROR_MESSAGE = (
-    bigframes_vendored.constants.ABSTRACT_METHOD_ERROR_MESSAGE
-)
-
 DEFAULT_EXPIRATION = datetime.timedelta(days=7)
 
 # https://cloud.google.com/bigquery/docs/locations
diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index eaee2e2cc0..eaac0dc785 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -17,9 +17,10 @@
 import typing
 from typing import Sequence
 
+import bigframes_vendored.constants as constants
 import pandas as pd
 
-import bigframes.constants as constants
+import bigframes.constants
 import bigframes.core as core
 import bigframes.core.blocks as blocks
 import bigframes.core.expression as ex
@@ -117,7 +118,7 @@ def quantile(
     )
     quantile_cols = []
     labels = []
-    if len(columns) * len(qs) > constants.MAX_COLUMNS:
+    if len(columns) * len(qs) > bigframes.constants.MAX_COLUMNS:
         raise NotImplementedError("Too many aggregates requested.")
     for col in columns:
         for q in qs:
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index feeed78885..3e97b1cb5e 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -32,13 +32,13 @@
 from typing import Iterable, List, Literal, Mapping, Optional, Sequence, Tuple, Union
 import warnings
 
+import bigframes_vendored.constants as constants
 import google.cloud.bigquery as bigquery
 import pandas as pd
 import pyarrow as pa
 
 import bigframes._config.sampling_options as sampling_options
 import bigframes.constants
-import bigframes.constants as constants
 import bigframes.core as core
 import bigframes.core.compile.googlesql as googlesql
 import bigframes.core.expression as ex
diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py
index 58973b10eb..488acd63db 100644
--- a/bigframes/core/compile/aggregate_compiler.py
+++ b/bigframes/core/compile/aggregate_compiler.py
@@ -15,13 +15,13 @@
 import typing
 from typing import cast, Optional
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
 import ibis
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.types as ibis_types
 import pandas as pd
 
-import bigframes.constants as constants
 import bigframes.core.compile.ibis_types as compile_ibis_types
 import bigframes.core.compile.scalar_op_compiler as scalar_compilers
 import bigframes.core.expression as ex
diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py
index f4ec295d5f..77bfb84425 100644
--- a/bigframes/core/compile/ibis_types.py
+++ b/bigframes/core/compile/ibis_types.py
@@ -17,6 +17,7 @@
 from typing import Any, cast, Dict, Iterable, Optional, Tuple, Union
 import warnings
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes
 import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
 import geopandas as gpd  # type: ignore
@@ -29,7 +30,6 @@
 import pandas as pd
 import pyarrow as pa
 
-import bigframes.constants as constants
 import bigframes.dtypes
 
 # Type hints for Ibis data types supported by BigQuery DataFrame
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 9e18b391d6..c0f12865d6 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -17,6 +17,7 @@
 import functools
 import typing
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
 import ibis
 import ibis.common.exceptions
@@ -26,7 +27,6 @@
 import numpy as np
 import pandas as pd
 
-import bigframes.constants as constants
 import bigframes.core.compile.ibis_types
 import bigframes.core.expression as ex
 import bigframes.dtypes
diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index a0105f4ef0..eda517964d 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -17,10 +17,10 @@
 import typing
 from typing import Sequence, Union
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
 import pandas as pd
 
-import bigframes.constants as constants
 from bigframes.core import log_adapter
 import bigframes.core as core
 import bigframes.core.block_transforms as block_ops
diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
index dae5eada70..06d9c4bbab 100644
--- a/bigframes/core/indexers.py
+++ b/bigframes/core/indexers.py
@@ -17,10 +17,10 @@
 import typing
 from typing import Tuple, Union
 
+import bigframes_vendored.constants as constants
 import ibis
 import pandas as pd
 
-import bigframes.constants as constants
 import bigframes.core.blocks
 import bigframes.core.expression as ex
 import bigframes.core.guid as guid
diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py
index 0376e37f96..017702b85a 100644
--- a/bigframes/core/indexes/base.py
+++ b/bigframes/core/indexes/base.py
@@ -19,12 +19,12 @@
 import typing
 from typing import Hashable, Optional, Sequence, Union
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.indexes.base as vendored_pandas_index
 import google.cloud.bigquery as bigquery
 import numpy as np
 import pandas
 
-import bigframes.constants as constants
 import bigframes.core.block_transforms as block_ops
 import bigframes.core.blocks as blocks
 import bigframes.core.expression as ex
diff --git a/bigframes/core/reshape/__init__.py b/bigframes/core/reshape/__init__.py
index a23461bdb9..49ecedcc87 100644
--- a/bigframes/core/reshape/__init__.py
+++ b/bigframes/core/reshape/__init__.py
@@ -16,9 +16,9 @@
 import typing
 from typing import Iterable, Literal, Optional, Union
 
+import bigframes_vendored.constants as constants
 import pandas as pd
 
-import bigframes.constants as constants
 import bigframes.core.expression as ex
 import bigframes.core.ordering as order
 import bigframes.core.utils as utils
diff --git a/bigframes/core/tools/datetimes.py b/bigframes/core/tools/datetimes.py
index 5d8d8c9685..2abb86a2f3 100644
--- a/bigframes/core/tools/datetimes.py
+++ b/bigframes/core/tools/datetimes.py
@@ -16,10 +16,10 @@
 from datetime import datetime
 from typing import Optional, Union
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes
 import pandas as pd
 
-import bigframes.constants as constants
 import bigframes.dataframe
 import bigframes.dtypes
 import bigframes.operations as ops
diff --git a/bigframes/core/validations.py b/bigframes/core/validations.py
index ca65445dab..701752c9fc 100644
--- a/bigframes/core/validations.py
+++ b/bigframes/core/validations.py
@@ -19,7 +19,8 @@
 import functools
 from typing import Optional, Protocol, TYPE_CHECKING, Union
 
-import bigframes.constants
+import bigframes_vendored.constants as constants
+
 import bigframes.exceptions
 
 if TYPE_CHECKING:
@@ -72,9 +73,9 @@ def enforce_ordered(
     if not session._allows_ambiguity:
         suggestion_substr = suggestion + " " if suggestion else ""
         raise bigframes.exceptions.OrderRequiredError(
-            f"Op {opname} not supported when strict ordering is disabled. {suggestion_substr}{bigframes.constants.FEEDBACK_LINK}"
+            f"Op {opname} not supported when strict ordering is disabled. {suggestion_substr}{constants.FEEDBACK_LINK}"
         )
     if not object._block.explicitly_ordered:
         raise bigframes.exceptions.OrderRequiredError(
-            f"Op {opname} requires an ordering. Use .sort_values or .sort_index to provide an ordering. {bigframes.constants.FEEDBACK_LINK}"
+            f"Op {opname} requires an ordering. Use .sort_values or .sort_index to provide an ordering. {constants.FEEDBACK_LINK}"
         )
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 2ae6aefe1b..d7bdd5529b 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -36,6 +36,7 @@
 )
 import warnings
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.frame as vendored_pandas_frame
 import bigframes_vendored.pandas.pandas._typing as vendored_pandas_typing
 import google.api_core.exceptions
@@ -49,7 +50,6 @@
 import bigframes
 import bigframes._config.display_options as display_options
 import bigframes.constants
-import bigframes.constants as constants
 import bigframes.core
 from bigframes.core import log_adapter
 import bigframes.core.block_transforms as block_ops
@@ -3106,7 +3106,7 @@ def to_gbq(
                 self._session.bqclient,
                 temp_table_ref,
                 datetime.datetime.now(datetime.timezone.utc)
-                + constants.DEFAULT_EXPIRATION,
+                + bigframes.constants.DEFAULT_EXPIRATION,
             )
 
         if len(labels) != 0:
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index bfed783e1e..3cd2507231 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -20,14 +20,13 @@
 import typing
 from typing import Dict, Literal, Union
 
+import bigframes_vendored.constants as constants
 import geopandas as gpd  # type: ignore
 import google.cloud.bigquery
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 
-import bigframes.constants as constants
-
 # Type hints for Pandas dtypes supported by BigQuery DataFrame
 Dtype = Union[
     pd.BooleanDtype,
diff --git a/bigframes/formatting_helpers.py b/bigframes/formatting_helpers.py
index 752aeb7a10..de0ae8cc68 100644
--- a/bigframes/formatting_helpers.py
+++ b/bigframes/formatting_helpers.py
@@ -19,6 +19,7 @@
 import random
 from typing import Any, Optional, Union
 
+import bigframes_vendored.constants as constants
 import google.api_core.exceptions as api_core_exceptions
 import google.cloud.bigquery as bigquery
 import humanize
@@ -26,8 +27,6 @@
 import IPython.display as display
 import ipywidgets as widgets
 
-import bigframes.constants as constants
-
 GenericJob = Union[
     bigquery.LoadJob, bigquery.ExtractJob, bigquery.QueryJob, bigquery.CopyJob
 ]
diff --git a/bigframes/functions/_remote_function_client.py b/bigframes/functions/_remote_function_client.py
index 3698bda28b..75385f11a5 100644
--- a/bigframes/functions/_remote_function_client.py
+++ b/bigframes/functions/_remote_function_client.py
@@ -25,9 +25,9 @@
 import tempfile
 from typing import cast, Tuple, TYPE_CHECKING
 
+from bigframes_vendored import constants
 import requests
 
-from bigframes import constants
 import bigframes.functions.remote_function_template
 
 if TYPE_CHECKING:
diff --git a/bigframes/functions/_remote_function_session.py b/bigframes/functions/_remote_function_session.py
index c947fcdc63..0510980178 100644
--- a/bigframes/functions/_remote_function_session.py
+++ b/bigframes/functions/_remote_function_session.py
@@ -22,6 +22,7 @@
 from typing import Any, cast, Dict, Mapping, Optional, Sequence, TYPE_CHECKING, Union
 import warnings
 
+import bigframes_vendored.constants as constants
 import cloudpickle
 import google.api_core.exceptions
 from google.cloud import (
@@ -31,7 +32,7 @@
     resourcemanager_v3,
 )
 
-from bigframes import clients, constants
+from bigframes import clients
 
 if TYPE_CHECKING:
     from bigframes.session import Session
diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py
index b4c74e90d6..4dc6c1ad6b 100644
--- a/bigframes/functions/remote_function.py
+++ b/bigframes/functions/remote_function.py
@@ -24,12 +24,12 @@
 if TYPE_CHECKING:
     from bigframes.session import Session
 
+import bigframes_vendored.constants as constants
 import google.api_core.exceptions
 import google.api_core.retry
 from google.cloud import bigquery
 import google.iam.v1
 
-import bigframes.constants as constants
 import bigframes.core.compile.ibis_types
 import bigframes.dtypes
 import bigframes.functions.remote_function_template
diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py
index 4ea63d2e81..3cfa1851f5 100644
--- a/bigframes/ml/compose.py
+++ b/bigframes/ml/compose.py
@@ -23,10 +23,10 @@
 import typing
 from typing import cast, Iterable, List, Optional, Set, Tuple, Union
 
+from bigframes_vendored import constants
 import bigframes_vendored.sklearn.compose._column_transformer
 from google.cloud import bigquery
 
-from bigframes import constants
 from bigframes.core import log_adapter
 from bigframes.ml import base, core, globals, impute, preprocessing, utils
 import bigframes.pandas as bpd
diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index 0816ef9b24..8fe1d6ec27 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -19,12 +19,12 @@
 
 from typing import Dict, List, Literal, Optional, Union
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.sklearn.linear_model._base
 import bigframes_vendored.sklearn.linear_model._logistic
 from google.cloud import bigquery
 
 import bigframes
-import bigframes.constants as constants
 from bigframes.core import log_adapter
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index a3cd065a55..53a9d40c6e 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -19,10 +19,11 @@
 from typing import cast, Literal, Optional, Union
 import warnings
 
+import bigframes_vendored.constants as constants
 from google.cloud import bigquery
 
 import bigframes
-from bigframes import clients, constants
+from bigframes import clients
 from bigframes.core import blocks, log_adapter
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
diff --git a/bigframes/ml/loader.py b/bigframes/ml/loader.py
index 4e7e808260..de9681660e 100644
--- a/bigframes/ml/loader.py
+++ b/bigframes/ml/loader.py
@@ -17,10 +17,10 @@
 from types import MappingProxyType
 from typing import Union
 
+import bigframes_vendored.constants as constants
 from google.cloud import bigquery
 
 import bigframes
-import bigframes.constants as constants
 from bigframes.ml import (
     cluster,
     compose,
diff --git a/bigframes/ml/metrics/_metrics.py b/bigframes/ml/metrics/_metrics.py
index a40c175000..3c2d6514ae 100644
--- a/bigframes/ml/metrics/_metrics.py
+++ b/bigframes/ml/metrics/_metrics.py
@@ -19,6 +19,7 @@
 import typing
 from typing import Tuple, Union
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.sklearn.metrics._classification as vendored_metrics_classification
 import bigframes_vendored.sklearn.metrics._ranking as vendored_metrics_ranking
 import bigframes_vendored.sklearn.metrics._regression as vendored_metrics_regression
@@ -26,7 +27,6 @@
 import pandas as pd
 import sklearn.metrics as sklearn_metrics  # type: ignore
 
-import bigframes.constants as constants
 from bigframes.ml import utils
 import bigframes.pandas as bpd
 
diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py
index 4cd60c5836..dc3bd1f3f4 100644
--- a/bigframes/ml/pipeline.py
+++ b/bigframes/ml/pipeline.py
@@ -20,11 +20,11 @@
 
 from typing import List, Optional, Tuple, Union
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.sklearn.pipeline
 from google.cloud import bigquery
 
 import bigframes
-import bigframes.constants as constants
 from bigframes.core import log_adapter
 from bigframes.ml import (
     base,
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index d14627f590..7120a5a5fd 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -18,10 +18,9 @@
 
 from typing import Iterable, Literal, Mapping, Optional, Union
 
+import bigframes_vendored.constants as constants
 import google.cloud.bigquery
 
-import bigframes.constants as constants
-
 
 # TODO: Add proper escaping logic from core/compile module
 class BaseSqlGenerator:
diff --git a/bigframes/ml/utils.py b/bigframes/ml/utils.py
index 75dfb916f6..d754b1d002 100644
--- a/bigframes/ml/utils.py
+++ b/bigframes/ml/utils.py
@@ -15,9 +15,9 @@
 import typing
 from typing import Any, Iterable, Literal, Mapping, Optional, Union
 
+import bigframes_vendored.constants as constants
 from google.cloud import bigquery
 
-import bigframes.constants as constants
 from bigframes.core import blocks
 import bigframes.pandas as bpd
 
diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py
index ff8dd86cff..9e59e09877 100644
--- a/bigframes/operations/_matplotlib/core.py
+++ b/bigframes/operations/_matplotlib/core.py
@@ -15,9 +15,9 @@
 import abc
 import typing
 
+import bigframes_vendored.constants as constants
 import pandas as pd
 
-import bigframes.constants as constants
 import bigframes.dtypes as dtypes
 
 DEFAULT_SAMPLING_N = 1000
diff --git a/bigframes/operations/_matplotlib/hist.py b/bigframes/operations/_matplotlib/hist.py
index 720b94d7da..213e2abd77 100644
--- a/bigframes/operations/_matplotlib/hist.py
+++ b/bigframes/operations/_matplotlib/hist.py
@@ -15,10 +15,10 @@
 import itertools
 from typing import Literal
 
+import bigframes_vendored.constants as constants
 import numpy as np
 import pandas as pd
 
-import bigframes.constants as constants
 import bigframes.operations._matplotlib.core as bfplt
 
 
diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py
index 2f87045415..68f46baded 100644
--- a/bigframes/operations/base.py
+++ b/bigframes/operations/base.py
@@ -17,10 +17,10 @@
 import typing
 from typing import List, Sequence
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.pandas._typing as vendored_pandas_typing
 import pandas as pd
 
-import bigframes.constants as constants
 import bigframes.core.blocks as blocks
 import bigframes.core.convert
 import bigframes.core.expression as ex
diff --git a/bigframes/operations/plotting.py b/bigframes/operations/plotting.py
index ff74806993..a45b825354 100644
--- a/bigframes/operations/plotting.py
+++ b/bigframes/operations/plotting.py
@@ -14,9 +14,9 @@
 
 import typing
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.plotting._core as vendordt
 
-import bigframes.constants as constants
 import bigframes.operations._matplotlib as bfplt
 
 
diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py
index 4af142e0d5..2e40115985 100644
--- a/bigframes/operations/strings.py
+++ b/bigframes/operations/strings.py
@@ -17,9 +17,9 @@
 import re
 from typing import cast, Literal, Optional, Union
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.strings.accessor as vendorstr
 
-import bigframes.constants as constants
 from bigframes.core import log_adapter
 import bigframes.dataframe as df
 import bigframes.operations as ops
diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 3809384c95..94ea6becab 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -36,6 +36,7 @@
     Union,
 )
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.reshape.concat as vendored_pandas_concat
 import bigframes_vendored.pandas.core.reshape.encoding as vendored_pandas_encoding
 import bigframes_vendored.pandas.core.reshape.merge as vendored_pandas_merge
@@ -53,7 +54,6 @@
 )
 
 import bigframes._config as config
-import bigframes.constants as constants
 import bigframes.core.blocks
 import bigframes.core.expression as ex
 import bigframes.core.global_session as global_session
diff --git a/bigframes/series.py b/bigframes/series.py
index d9e3bb19dd..d4cb1d3700 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -24,6 +24,7 @@
 import typing
 from typing import Any, cast, Literal, Mapping, Optional, Sequence, Tuple, Union
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.series as vendored_pandas_series
 import google.cloud.bigquery as bigquery
 import numpy
@@ -31,7 +32,6 @@
 import pandas.core.dtypes.common
 import typing_extensions
 
-import bigframes.constants as constants
 import bigframes.core
 from bigframes.core import log_adapter
 import bigframes.core.block_transforms as block_ops
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 045483bd53..7d0cfaee5c 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -37,6 +37,7 @@
 import warnings
 import weakref
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.ibis.backends.bigquery  # noqa
 import bigframes_vendored.pandas.io.gbq as third_party_pandas_gbq
 import bigframes_vendored.pandas.io.parquet as third_party_pandas_parquet
@@ -58,7 +59,6 @@
 
 import bigframes._config.bigquery_options as bigquery_options
 import bigframes.clients
-import bigframes.constants as constants
 import bigframes.core as core
 import bigframes.core.blocks as blocks
 import bigframes.core.compile
diff --git a/bigframes/session/_io/bigquery/read_gbq_table.py b/bigframes/session/_io/bigquery/read_gbq_table.py
index 03b26f9460..7585dd3f45 100644
--- a/bigframes/session/_io/bigquery/read_gbq_table.py
+++ b/bigframes/session/_io/bigquery/read_gbq_table.py
@@ -23,12 +23,12 @@
 from typing import Dict, Iterable, List, Optional, Sequence, Tuple
 import warnings
 
+import bigframes_vendored.constants as constants
 import google.api_core.exceptions
 import google.cloud.bigquery as bigquery
 
 import bigframes
 import bigframes.clients
-import bigframes.constants
 import bigframes.core.compile
 import bigframes.core.compile.default_ordering
 import bigframes.core.sql
@@ -241,7 +241,7 @@ def get_index_cols(
             # test, as it's not possible to subclass enums in Python. See:
             # https://stackoverflow.com/a/33680021/101923
             raise NotImplementedError(
-                f"Got unexpected index_col {repr(index_col)}. {bigframes.constants.FEEDBACK_LINK}"
+                f"Got unexpected index_col {repr(index_col)}. {constants.FEEDBACK_LINK}"
             )
     elif isinstance(index_col, str):
         index_cols: List[str] = [index_col]
diff --git a/bigframes/session/_io/pandas.py b/bigframes/session/_io/pandas.py
index 789426a6e3..83e30fd900 100644
--- a/bigframes/session/_io/pandas.py
+++ b/bigframes/session/_io/pandas.py
@@ -14,6 +14,7 @@
 
 from typing import Dict, Union
 
+import bigframes_vendored.constants as constants
 import geopandas  # type: ignore
 import pandas
 import pandas.arrays
@@ -21,7 +22,6 @@
 import pyarrow.compute  # type: ignore
 import pyarrow.types  # type: ignore
 
-import bigframes.constants
 import bigframes.features
 
 
@@ -54,7 +54,7 @@ def arrow_to_pandas(
     if len(dtypes) != arrow_table.num_columns:
         raise ValueError(
             f"Number of types {len(dtypes)} doesn't match number of columns "
-            f"{arrow_table.num_columns}. {bigframes.constants.FEEDBACK_LINK}"
+            f"{arrow_table.num_columns}. {constants.FEEDBACK_LINK}"
         )
 
     serieses = {}
diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py
index 924fddce12..ce9874e35f 100644
--- a/bigframes/session/loader.py
+++ b/bigframes/session/loader.py
@@ -22,6 +22,7 @@
 import typing
 from typing import Dict, Hashable, IO, Iterable, List, Optional, Sequence, Tuple, Union
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.io.gbq as third_party_pandas_gbq
 import google.api_core.exceptions
 import google.auth.credentials
@@ -36,7 +37,7 @@
 import pandas
 
 import bigframes.clients
-import bigframes.constants as constants
+import bigframes.constants
 import bigframes.core as core
 import bigframes.core.blocks as blocks
 import bigframes.core.compile
@@ -444,7 +445,8 @@ def _read_bigquery_load_job(
         # hours of the anonymous dataset.
         table_expiration = bigquery.Table(table_id)
         table_expiration.expires = (
-            datetime.datetime.now(datetime.timezone.utc) + constants.DEFAULT_EXPIRATION
+            datetime.datetime.now(datetime.timezone.utc)
+            + bigframes.constants.DEFAULT_EXPIRATION
         )
         self._bqclient.update_table(table_expiration, ["expires"])
 
diff --git a/tests/unit/test_constants.py b/tests/unit/test_constants.py
index aabc09c388..4e11419077 100644
--- a/tests/unit/test_constants.py
+++ b/tests/unit/test_constants.py
@@ -12,9 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import bigframes.constants as constants
+import bigframes_vendored.constants
+
+import bigframes.version
 
 
 def test_feedback_link_includes_version():
-    assert len(constants.BF_VERSION) > 0
-    assert constants.BF_VERSION in constants.FEEDBACK_LINK
+    version = bigframes.version.__version__
+    assert len(version) > 0
+    assert version in bigframes_vendored.constants.FEEDBACK_LINK
diff --git a/tests/unit/test_formatting_helpers.py b/tests/unit/test_formatting_helpers.py
index 3c966752c9..d4fe039484 100644
--- a/tests/unit/test_formatting_helpers.py
+++ b/tests/unit/test_formatting_helpers.py
@@ -14,12 +14,13 @@
 
 import unittest.mock as mock
 
+import bigframes_vendored.constants as constants
 import google.api_core.exceptions as api_core_exceptions
 import google.cloud.bigquery as bigquery
 import pytest
 
-import bigframes.constants as constants
 import bigframes.formatting_helpers as formatting_helpers
+import bigframes.version
 
 
 def test_wait_for_query_job_error_includes_feedback_link():
@@ -54,4 +55,4 @@ def test_wait_for_job_error_includes_version():
         formatting_helpers.wait_for_job(mock_job)
 
     cap_exc.match("Test message 123.")
-    cap_exc.match(constants.BF_VERSION)
+    cap_exc.match(bigframes.version.__version__)
diff --git a/third_party/bigframes_vendored/constants.py b/third_party/bigframes_vendored/constants.py
index 91084b38f9..1effdffcbe 100644
--- a/third_party/bigframes_vendored/constants.py
+++ b/third_party/bigframes_vendored/constants.py
@@ -16,14 +16,12 @@
 
 This module should not depend on any others in the package.
 """
-import bigframes.version
-
-BF_VERSION = bigframes.version.__version__
+import bigframes_vendored.version
 
 FEEDBACK_LINK = (
     "Share your usecase with the BigQuery DataFrames team at the "
     "https://bit.ly/bigframes-feedback survey."
-    f"You are currently running BigFrames version {BF_VERSION}"
+    f"You are currently running BigFrames version {bigframes_vendored.version.__version__}"
 )
 
 ABSTRACT_METHOD_ERROR_MESSAGE = (
diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py
new file mode 100644
index 0000000000..2c0c6e4d3a
--- /dev/null
+++ b/third_party/bigframes_vendored/version.py
@@ -0,0 +1,15 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__version__ = "1.17.0"

From 89ea44fb66314b134fc0a10d816c1659978d4182 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Fri, 13 Sep 2024 17:22:12 -0500
Subject: [PATCH 08/18] deps: update to ibis-framework 9.x and newer sqlglot
 (#827)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* deps: update to ibis-framework 9.x and newer sqlglot

* update sqlglot and ibis

* bump minimum pandas

* bump pyarrow

* fix bfill and ffill

* nearly implement describe

* remove remaining reference to vendored_ibis_ops.ApproximateMultiQuantile

* support ToJsonString

* partial support for quantile

* fix inmemorytable

* fixed Series.explode

* nearly fix to_datetime

* remove tests I added

* patch for python 3.9 support

* fix unit tests

* fix explode with time type

* fix array_agg

* fix array_agg for asc order

* actually fix array_agg

* fix remote function

* fix in-memory nullable integer compilation

* fix test_df_construct_pandas_default on Python 3.9

* fix ShiftOp windows

* fix inf to SQL by treating values as literal in in memory table

* fix unit tests for ibis-framework 9.2.0

* fix Python 3.10 unit tests by syncing deps

* fixing remote function after merge

* fix visit_NonNullLiteral for int types

* visit_WindowFunction to fix s.median() method

* fix lint

* fix s.diff with window

* fix mypy

* patch visit_And to fix is_monotonic methods

* fix mypy and fillna warning

* undo window changes for test_series_autocorr

* undo fill_null because it was missed at 9.0 version

* vendor more of ibis for python 3.9 compatibility

* add default arg for nulls_first for python 3.9 support

* restore integer conversion

* fix window tests: diff, duplicated, shift

* fixing ibis parenthesize_inputs bugs and related tests

* fixing lint

* disable test_query_complexity_error

* fix doctest np.int64(0) upgrades

* fix doctest np.int64(0) upgrades more

* fix groupby diff

* addressing system-3.12/doctest issues related to numpy 2.1.1

* fix test_df_apply_axis_1_complex

* address compiler errors after merge

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* fix unit-test compile errors

* remove unused ibis codes

* fix fillna deprecated warning

* add _remove_null_ordering_from_unsupported_window back to fix test_precision_score etc ml tests

* fix is_monotonic_decreasing test

* fix explode after merge

* fix numpy on remote function test

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* ml numpy sql generations

---------

Co-authored-by: Chelsea Lin <124939984+chelsea-lin@users.noreply.github.com>
Co-authored-by: Chelsea Lin <chelsealin@google.com>
Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 bigframes/core/block_transforms.py            |   7 +-
 bigframes/core/blocks.py                      |  15 +-
 bigframes/core/compile/aggregate_compiler.py  |  38 +-
 bigframes/core/compile/compiled.py            |  55 +-
 bigframes/core/compile/default_ordering.py    |   7 +-
 bigframes/core/compile/scalar_op_compiler.py  |  17 +-
 bigframes/core/compile/single_column.py       |   6 +-
 bigframes/core/groupby/__init__.py            |  13 +-
 bigframes/core/window_spec.py                 |   1 -
 bigframes/dataframe.py                        |  34 +-
 .../functions/_remote_function_session.py     |   3 +-
 bigframes/functions/remote_function.py        |   3 +-
 bigframes/ml/preprocessing.py                 |   1 +
 bigframes/ml/sql.py                           |   7 +-
 bigframes/operations/__init__.py              |   4 +-
 bigframes/series.py                           |  48 +-
 noxfile.py                                    |   3 +-
 setup.py                                      |   4 +-
 testing/constraints-3.11.txt                  |   3 +
 testing/constraints-3.12.txt                  |   3 +
 testing/constraints-3.9.txt                   |   4 +-
 tests/system/conftest.py                      |   4 +-
 tests/system/large/test_remote_function.py    |   6 +-
 tests/system/small/test_dataframe.py          |   3 +
 tests/system/small/test_ibis.py               |  44 -
 tests/system/small/test_numpy.py              |   7 +
 tests/system/small/test_series.py             |   9 +-
 .../ibis/backends/bigquery/__init__.py        |   3 -
 .../ibis/backends/bigquery/backend.py         |  85 +-
 .../ibis/backends/bigquery/compiler.py        |  59 --
 .../ibis/backends/bigquery/registry.py        |  72 --
 .../ibis/backends/sql/compilers/base.py       |  22 +-
 .../sql/compilers/bigquery/__init__.py        | 221 ++---
 .../ibis/backends/sql/rewrites.py             | 779 +++++++++++-------
 .../ibis/expr/operations/__init__.py          |   1 -
 .../ibis/expr/operations/arrays.py            |  18 -
 .../ibis/expr/operations/json.py              |   2 +
 .../ibis/expr/operations/reductions.py        |  13 +-
 .../bigframes_vendored/pandas/core/frame.py   |  14 +-
 .../bigframes_vendored/pandas/core/series.py  |  60 +-
 .../sklearn/metrics/_classification.py        |   4 +-
 .../sklearn/metrics/_ranking.py               |   8 +-
 .../sklearn/metrics/_regression.py            |   4 +-
 43 files changed, 813 insertions(+), 901 deletions(-)
 delete mode 100644 tests/system/small/test_ibis.py
 delete mode 100644 third_party/bigframes_vendored/ibis/backends/bigquery/compiler.py
 delete mode 100644 third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
 delete mode 100644 third_party/bigframes_vendored/ibis/expr/operations/arrays.py

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index eaac0dc785..1990647e0a 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -387,10 +387,9 @@ def value_counts(
 
 def pct_change(block: blocks.Block, periods: int = 1) -> blocks.Block:
     column_labels = block.column_labels
-    window_spec = windows.rows(
-        preceding=periods if periods > 0 else None,
-        following=-periods if periods < 0 else None,
-    )
+
+    # Window framing clause is not allowed for analytic function lag.
+    window_spec = windows.unbound()
 
     original_columns = block.value_columns
     block, shift_columns = block.multi_apply_window_op(
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 3e97b1cb5e..39cfed588f 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -50,7 +50,7 @@
 import bigframes.core.schema as bf_schema
 import bigframes.core.sql as sql
 import bigframes.core.utils as utils
-import bigframes.core.window_spec as window_specs
+import bigframes.core.window_spec as windows
 import bigframes.dtypes
 import bigframes.exceptions
 import bigframes.features
@@ -900,7 +900,7 @@ def multi_apply_window_op(
         self,
         columns: typing.Sequence[str],
         op: agg_ops.WindowOp,
-        window_spec: window_specs.WindowSpec,
+        window_spec: windows.WindowSpec,
         *,
         skip_null_groups: bool = False,
         never_skip_nulls: bool = False,
@@ -959,7 +959,7 @@ def apply_window_op(
         self,
         column: str,
         op: agg_ops.WindowOp,
-        window_spec: window_specs.WindowSpec,
+        window_spec: windows.WindowSpec,
         *,
         result_label: Label = None,
         skip_null_groups: bool = False,
@@ -1475,7 +1475,7 @@ def grouped_head(
         value_columns: typing.Sequence[str],
         n: int,
     ):
-        window_spec = window_specs.cumulative_rows(grouping_keys=tuple(by_column_ids))
+        window_spec = windows.cumulative_rows(grouping_keys=tuple(by_column_ids))
 
         block, result_id = self.apply_window_op(
             value_columns[0],
@@ -2383,10 +2383,7 @@ def _is_monotonic(
             return self._stats_cache[column_name][op_name]
 
         period = 1
-        window = window_specs.rows(
-            preceding=period,
-            following=None,
-        )
+        window_spec = windows.rows()
 
         # any NaN value means not monotonic
         block, last_notna_id = self.apply_unary_op(column_ids[0], ops.notnull_op)
@@ -2402,7 +2399,7 @@ def _is_monotonic(
         last_result_id = None
         for column_id in column_ids[::-1]:
             block, lag_result_id = block.apply_window_op(
-                column_id, agg_ops.ShiftOp(period), window
+                column_id, agg_ops.ShiftOp(period), window_spec
             )
             block, strict_monotonic_id = block.apply_binary_op(
                 column_id, lag_result_id, ops.gt_op if increasing else ops.lt_op
diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py
index 488acd63db..91a3045efb 100644
--- a/bigframes/core/compile/aggregate_compiler.py
+++ b/bigframes/core/compile/aggregate_compiler.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import functools
 import typing
-from typing import cast, Optional
+from typing import cast, List, Optional
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
@@ -31,6 +31,17 @@
 scalar_compiler = scalar_compilers.scalar_op_compiler
 
 
+# TODO(swast): We can remove this if ibis adds general approx_quantile
+# See: https://github.com/ibis-project/ibis/issues/9541
+@ibis.udf.agg.builtin
+def approx_quantiles(expression: float, number) -> List[float]:
+    """APPROX_QUANTILES
+
+    https://cloud.google.com/bigquery/docs/reference/standard-sql/approximate_aggregate_functions#approx_quantiles
+    """
+    return []  # pragma: NO COVER
+
+
 def compile_aggregate(
     aggregate: ex.Aggregation,
     bindings: typing.Dict[str, ibis_types.Value],
@@ -176,15 +187,12 @@ def _(
     column: ibis_types.NumericColumn,
     window=None,
 ) -> ibis_types.NumericValue:
-    # PERCENTILE_CONT has very few allowed windows. For example, "window
-    # framing clause is not allowed for analytic function percentile_cont".
+    # APPROX_QUANTILES has very few allowed windows.
     if window is not None:
         raise NotImplementedError(
             f"Approx Quartiles with windowing is not supported. {constants.FEEDBACK_LINK}"
         )
-    value = vendored_ibis_ops.ApproximateMultiQuantile(
-        column, num_bins=4  # type: ignore
-    ).to_expr()[op.quartile]
+    value = approx_quantiles(column, 4)[op.quartile]  # type: ignore
     return cast(ibis_types.NumericValue, value)
 
 
@@ -513,11 +521,15 @@ def _(
     column: ibis_types.Column,
     window=None,
 ) -> ibis_types.BooleanValue:
-    # BQ will return null for empty column, result would be true in pandas.
-    result = _is_true(column).all()
+    # BQ will return null for empty column, result would be false in pandas.
+    result = _apply_window_if_present(_is_true(column).all(), window)
+    literal = ibis_types.literal(True)
+
     return cast(
         ibis_types.BooleanScalar,
-        _apply_window_if_present(result, window).fillna(ibis_types.literal(True)),
+        result.fill_null(literal)
+        if hasattr(result, "fill_null")
+        else result.fillna(literal),
     )
 
 
@@ -528,10 +540,14 @@ def _(
     window=None,
 ) -> ibis_types.BooleanValue:
     # BQ will return null for empty column, result would be false in pandas.
-    result = _is_true(column).any()
+    result = _apply_window_if_present(_is_true(column).any(), window)
+    literal = ibis_types.literal(False)
+
     return cast(
         ibis_types.BooleanScalar,
-        _apply_window_if_present(result, window).fillna(ibis_types.literal(False)),
+        result.fill_null(literal)
+        if hasattr(result, "fill_null")
+        else result.fillna(literal),
     )
 
 
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 2a13ce0da0..cd00c98381 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -19,10 +19,9 @@
 import typing
 from typing import Collection, Literal, Optional, Sequence
 
-import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
+import bigframes_vendored.ibis.backends.bigquery.backend as ibis_bigquery
 import google.cloud.bigquery
 import ibis
-import ibis.backends.bigquery as ibis_bigquery
 import ibis.backends.bigquery.datatypes
 import ibis.common.deferred  # type: ignore
 import ibis.expr.datatypes as ibis_dtypes
@@ -407,18 +406,13 @@ def explode(self, offsets: typing.Sequence[int]) -> UnorderedIR:
 
         # The offset array ensures null represents empty arrays after unnesting.
         offset_array_id = bigframes.core.guid.generate_guid("offset_array_")
-        offset_array = (
-            vendored_ibis_ops.GenerateArray(
-                ibis.greatest(
-                    0,
-                    ibis.least(
-                        *[table[column_id].length() - 1 for column_id in column_ids]
-                    ),
-                )
-            )
-            .to_expr()
-            .name(offset_array_id),
-        )
+        offset_array = ibis.range(
+            0,
+            ibis.greatest(
+                1,  # We always want at least 1 element to fill in NULLs for empty arrays.
+                ibis.least(*[table[column_id].length() for column_id in column_ids]),
+            ),
+        ).name(offset_array_id)
         table_w_offset_array = table.select(
             offset_array,
             *self._column_names,
@@ -718,21 +712,13 @@ def explode(self, offsets: typing.Sequence[int]) -> OrderedIR:
         column_ids = tuple(table.columns[offset] for offset in offsets)
 
         offset_array_id = bigframes.core.guid.generate_guid("offset_array_")
-        offset_array = (
-            vendored_ibis_ops.GenerateArray(
-                ibis.greatest(
-                    0,
-                    ibis.least(
-                        *[
-                            table[table.columns[offset]].length() - 1
-                            for offset in offsets
-                        ]
-                    ),
-                )
-            )
-            .to_expr()
-            .name(offset_array_id),
-        )
+        offset_array = ibis.range(
+            0,
+            ibis.greatest(
+                1,  # We always want at least 1 element to fill in NULLs for empty arrays.
+                ibis.least(*[table[column_id].length() for column_id in column_ids]),
+            ),
+        ).name(offset_array_id)
         table_w_offset_array = table.select(
             offset_array,
             *self._column_names,
@@ -870,7 +856,7 @@ def project_window_op(
 
         clauses = []
         if op.skips_nulls and not never_skip_nulls:
-            clauses.append((column.isnull(), ibis.NA))
+            clauses.append((column.isnull(), ibis.null()))
         if window_spec.min_periods:
             if op.skips_nulls:
                 # Most operations do not count NULL values towards min_periods
@@ -891,7 +877,7 @@ def project_window_op(
             clauses.append(
                 (
                     observation_count < ibis_types.literal(window_spec.min_periods),
-                    ibis.NA,
+                    ibis.null(),
                 )
             )
         if clauses:
@@ -1322,9 +1308,10 @@ def _ibis_window_from_spec(
                     bounds.preceding, bounds.following, how="range"
                 )
             if isinstance(bounds, RowsWindowBounds):
-                window = window.preceding_following(
-                    bounds.preceding, bounds.following, how="rows"
-                )
+                if bounds.preceding is not None or bounds.following is not None:
+                    window = window.preceding_following(
+                        bounds.preceding, bounds.following, how="rows"
+                    )
             else:
                 raise ValueError(f"unrecognized window bounds {bounds}")
         return window
diff --git a/bigframes/core/compile/default_ordering.py b/bigframes/core/compile/default_ordering.py
index 7d7a41f742..a6b625caca 100644
--- a/bigframes/core/compile/default_ordering.py
+++ b/bigframes/core/compile/default_ordering.py
@@ -49,7 +49,12 @@ def _convert_to_nonnull_string(column: ibis_types.Column) -> ibis_types.StringVa
         # Needed for JSON, STRUCT and ARRAY datatypes
         result = vendored_ibis_ops.ToJsonString(column).to_expr()  # type: ignore
     # Escape backslashes and use backslash as delineator
-    escaped = cast(ibis_types.StringColumn, result.fillna("")).replace("\\", "\\\\")  # type: ignore
+    escaped = cast(
+        ibis_types.StringColumn,
+        result.fill_null("") if hasattr(result, "fill_null") else result.fillna(""),
+    ).replace(
+        "\\", "\\\\"
+    )  # type: ignore
     return cast(ibis_types.StringColumn, ibis.literal("\\")).concat(escaped)
 
 
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index c0f12865d6..799a408d5b 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -842,7 +842,7 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp):
 @scalar_op_compiler.register_unary_op(ops.ToDatetimeOp, pass_op=True)
 def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp):
     if x.type() == ibis_dtypes.str:
-        return vendored_ibis_ops.SafeCastToDatetime(x).to_expr()
+        return x.try_cast(ibis_dtypes.Timestamp(None))
     else:
         # Numerical inputs.
         if op.format:
@@ -995,8 +995,14 @@ def eq_nulls_match_op(
     y: ibis_types.Value,
 ):
     """Variant of eq_op where nulls match each other. Only use where dtypes are known to be same."""
-    left = x.cast(ibis_dtypes.str).fillna(ibis_types.literal("$NULL_SENTINEL$"))
-    right = y.cast(ibis_dtypes.str).fillna(ibis_types.literal("$NULL_SENTINEL$"))
+    literal = ibis_types.literal("$NULL_SENTINEL$")
+    if hasattr(x, "fill_null"):
+        left = x.cast(ibis_dtypes.str).fill_null(literal)
+        right = y.cast(ibis_dtypes.str).fill_null(literal)
+    else:
+        left = x.cast(ibis_dtypes.str).fillna(literal)
+        right = y.cast(ibis_dtypes.str).fillna(literal)
+
     return left == right
 
 
@@ -1379,7 +1385,10 @@ def fillna_op(
     x: ibis_types.Value,
     y: ibis_types.Value,
 ):
-    return x.fillna(typing.cast(ibis_types.Scalar, y))
+    if hasattr(x, "fill_null"):
+        return x.fill_null(typing.cast(ibis_types.Scalar, y))
+    else:
+        return x.fillna(typing.cast(ibis_types.Scalar, y))
 
 
 @scalar_op_compiler.register_binary_op(ops.round_op)
diff --git a/bigframes/core/compile/single_column.py b/bigframes/core/compile/single_column.py
index 26af969b74..325df8e180 100644
--- a/bigframes/core/compile/single_column.py
+++ b/bigframes/core/compile/single_column.py
@@ -179,4 +179,8 @@ def value_to_join_key(value: ibis_types.Value):
     """Converts nullable values to non-null string SQL will not match null keys together - but pandas does."""
     if not value.type().is_string():
         value = value.cast(ibis_dtypes.str)
-    return value.fillna(ibis_types.literal("$NULL_SENTINEL$"))
+    return (
+        value.fill_null(ibis_types.literal("$NULL_SENTINEL$"))
+        if hasattr(value, "fill_null")
+        else value.fillna(ibis_types.literal("$NULL_SENTINEL$"))
+    )
diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index eda517964d..5cb0e65729 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -255,19 +255,17 @@ def cumprod(self, *args, **kwargs) -> df.DataFrame:
 
     @validations.requires_ordering()
     def shift(self, periods=1) -> series.Series:
-        window = window_specs.rows(
+        # Window framing clause is not allowed for analytic function lag.
+        window = window_specs.unbound(
             grouping_keys=tuple(self._by_col_ids),
-            preceding=periods if periods > 0 else None,
-            following=-periods if periods < 0 else None,
         )
         return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
 
     @validations.requires_ordering()
     def diff(self, periods=1) -> series.Series:
+        # Window framing clause is not allowed for analytic function lag.
         window = window_specs.rows(
             grouping_keys=tuple(self._by_col_ids),
-            preceding=periods if periods > 0 else None,
-            following=-periods if periods < 0 else None,
         )
         return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
 
@@ -685,10 +683,9 @@ def cumcount(self, *args, **kwargs) -> series.Series:
     @validations.requires_ordering()
     def shift(self, periods=1) -> series.Series:
         """Shift index by desired number of periods."""
+        # Window framing clause is not allowed for analytic function lag.
         window = window_specs.rows(
             grouping_keys=tuple(self._by_col_ids),
-            preceding=periods if periods > 0 else None,
-            following=-periods if periods < 0 else None,
         )
         return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
 
@@ -696,8 +693,6 @@ def shift(self, periods=1) -> series.Series:
     def diff(self, periods=1) -> series.Series:
         window = window_specs.rows(
             grouping_keys=tuple(self._by_col_ids),
-            preceding=periods if periods > 0 else None,
-            following=-periods if periods < 0 else None,
         )
         return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
 
diff --git a/bigframes/core/window_spec.py b/bigframes/core/window_spec.py
index f011e2848d..3d80afea5a 100644
--- a/bigframes/core/window_spec.py
+++ b/bigframes/core/window_spec.py
@@ -70,7 +70,6 @@ def rows(
     Returns:
         WindowSpec
     """
-    assert (preceding is not None) or (following is not None)
     bounds = RowsWindowBounds(preceding=preceding, following=following)
     return WindowSpec(
         grouping_keys=grouping_keys,
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index d7bdd5529b..d7738a569d 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -65,7 +65,7 @@
 import bigframes.core.utils as utils
 import bigframes.core.validations as validations
 import bigframes.core.window
-import bigframes.core.window_spec as window_spec
+import bigframes.core.window_spec as windows
 import bigframes.dtypes
 import bigframes.exceptions
 import bigframes.formatting_helpers as formatter
@@ -1958,12 +1958,12 @@ def replace(
 
     @validations.requires_ordering()
     def ffill(self, *, limit: typing.Optional[int] = None) -> DataFrame:
-        window = window_spec.rows(preceding=limit, following=0)
+        window = windows.rows(preceding=limit, following=0)
         return self._apply_window_op(agg_ops.LastNonNullOp(), window)
 
     @validations.requires_ordering()
     def bfill(self, *, limit: typing.Optional[int] = None) -> DataFrame:
-        window = window_spec.rows(preceding=0, following=limit)
+        window = windows.rows(preceding=0, following=limit)
         return self._apply_window_op(agg_ops.FirstNonNullOp(), window)
 
     def isin(self, values) -> DataFrame:
@@ -2670,7 +2670,7 @@ def _perform_join_by_index(
     @validations.requires_ordering()
     def rolling(self, window: int, min_periods=None) -> bigframes.core.window.Window:
         # To get n size window, need current row and n-1 preceding rows.
-        window_def = window_spec.rows(
+        window_def = windows.rows(
             preceding=window - 1, following=0, min_periods=min_periods or window
         )
         return bigframes.core.window.Window(
@@ -2679,7 +2679,7 @@ def rolling(self, window: int, min_periods=None) -> bigframes.core.window.Window
 
     @validations.requires_ordering()
     def expanding(self, min_periods: int = 1) -> bigframes.core.window.Window:
-        window = window_spec.cumulative_rows(min_periods=min_periods)
+        window = windows.cumulative_rows(min_periods=min_periods)
         return bigframes.core.window.Window(
             self._block, window, self._block.value_columns
         )
@@ -2790,7 +2790,7 @@ def cumsum(self):
             raise ValueError("All values must be numeric to apply cumsum.")
         return self._apply_window_op(
             agg_ops.sum_op,
-            window_spec.cumulative_rows(),
+            windows.cumulative_rows(),
         )
 
     @validations.requires_ordering()
@@ -2803,38 +2803,32 @@ def cumprod(self) -> DataFrame:
             raise ValueError("All values must be numeric to apply cumsum.")
         return self._apply_window_op(
             agg_ops.product_op,
-            window_spec.cumulative_rows(),
+            windows.cumulative_rows(),
         )
 
     @validations.requires_ordering()
     def cummin(self) -> DataFrame:
         return self._apply_window_op(
             agg_ops.min_op,
-            window_spec.cumulative_rows(),
+            windows.cumulative_rows(),
         )
 
     @validations.requires_ordering()
     def cummax(self) -> DataFrame:
         return self._apply_window_op(
             agg_ops.max_op,
-            window_spec.cumulative_rows(),
+            windows.cumulative_rows(),
         )
 
     @validations.requires_ordering()
     def shift(self, periods: int = 1) -> DataFrame:
-        window = window_spec.rows(
-            preceding=periods if periods > 0 else None,
-            following=-periods if periods < 0 else None,
-        )
-        return self._apply_window_op(agg_ops.ShiftOp(periods), window)
+        window_spec = windows.rows()
+        return self._apply_window_op(agg_ops.ShiftOp(periods), window_spec)
 
     @validations.requires_ordering()
     def diff(self, periods: int = 1) -> DataFrame:
-        window = window_spec.rows(
-            preceding=periods if periods > 0 else None,
-            following=-periods if periods < 0 else None,
-        )
-        return self._apply_window_op(agg_ops.DiffOp(periods), window)
+        window_spec = windows.rows()
+        return self._apply_window_op(agg_ops.DiffOp(periods), window_spec)
 
     @validations.requires_ordering()
     def pct_change(self, periods: int = 1) -> DataFrame:
@@ -2845,7 +2839,7 @@ def pct_change(self, periods: int = 1) -> DataFrame:
     def _apply_window_op(
         self,
         op: agg_ops.WindowOp,
-        window_spec: window_spec.WindowSpec,
+        window_spec: windows.WindowSpec,
     ):
         block, result_ids = self._block.multi_apply_window_op(
             self._block.value_columns,
diff --git a/bigframes/functions/_remote_function_session.py b/bigframes/functions/_remote_function_session.py
index 0510980178..6bc7a4b079 100644
--- a/bigframes/functions/_remote_function_session.py
+++ b/bigframes/functions/_remote_function_session.py
@@ -522,7 +522,8 @@ def try_delattr(attr):
             node = ibis.udf.scalar.builtin(
                 func,
                 name=rf_name,
-                schema=f"{dataset_ref.project}.{dataset_ref.dataset_id}",
+                catalog=dataset_ref.project,
+                database=dataset_ref.dataset_id,
                 signature=(ibis_signature.input_types, ibis_signature.output_type),
             )
             func.bigframes_cloud_function = (
diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py
index 4dc6c1ad6b..1f4fdaad7b 100644
--- a/bigframes/functions/remote_function.py
+++ b/bigframes/functions/remote_function.py
@@ -170,7 +170,8 @@ def func(*bigframes_args, **bigframes_kwargs):
     node = ibis.udf.scalar.builtin(
         func,
         name=routine_ref.routine_id,
-        schema=f"{routine_ref.project}.{routine_ref.dataset_id}",
+        catalog=routine_ref.project,
+        database=routine_ref.dataset_id,
         signature=(ibis_signature.input_types, ibis_signature.output_type),
     )
     func.bigframes_remote_function = str(routine_ref)  # type: ignore
diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py
index 13d2041ef3..2c327f63f8 100644
--- a/bigframes/ml/preprocessing.py
+++ b/bigframes/ml/preprocessing.py
@@ -307,6 +307,7 @@ def _compile_to_sql(
             for column in columns:
                 min_value = X[column].min()
                 max_value = X[column].max()
+
                 bin_size = (max_value - min_value) / self.n_bins
                 array_split_points[column] = [
                     min_value + i * bin_size for i in range(self.n_bins - 1)
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index 7120a5a5fd..1cb327f19c 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -123,7 +123,12 @@ def ml_bucketize(
         name: str,
     ) -> str:
         """Encode ML.BUCKETIZE for BQML"""
-        return f"""ML.BUCKETIZE({numeric_expr_sql}, {array_split_points}, FALSE) AS {name}"""
+        # Use Python value rather than Numpy value to serialization.
+        points = [
+            point.item() if hasattr(point, "item") else point
+            for point in array_split_points
+        ]
+        return f"""ML.BUCKETIZE({numeric_expr_sql}, {points}, FALSE) AS {name}"""
 
     def ml_quantile_bucketize(
         self,
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index 51a962b13b..55b8fa1802 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -892,7 +892,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
 
 # Just parameterless unary ops for now
 # TODO: Parameter mappings
-NUMPY_TO_OP: typing.Final = {
+NUMPY_TO_OP: dict[np.ufunc, UnaryOp] = {
     np.sin: sin_op,
     np.cos: cos_op,
     np.tan: tan_op,
@@ -917,7 +917,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
 }
 
 
-NUMPY_TO_BINOP: typing.Final = {
+NUMPY_TO_BINOP: dict[np.ufunc, BinaryOp] = {
     np.add: add_op,
     np.subtract: sub_op,
     np.multiply: mul_op,
diff --git a/bigframes/series.py b/bigframes/series.py
index d4cb1d3700..3a75ab9ccc 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -45,7 +45,7 @@
 import bigframes.core.utils as utils
 import bigframes.core.validations as validations
 import bigframes.core.window
-import bigframes.core.window_spec
+import bigframes.core.window_spec as windows
 import bigframes.dataframe
 import bigframes.dtypes
 import bigframes.formatting_helpers as formatter
@@ -465,13 +465,11 @@ def case_when(self, caselist) -> Series:
 
     @validations.requires_ordering()
     def cumsum(self) -> Series:
-        return self._apply_window_op(
-            agg_ops.sum_op, bigframes.core.window_spec.cumulative_rows()
-        )
+        return self._apply_window_op(agg_ops.sum_op, windows.cumulative_rows())
 
     @validations.requires_ordering()
     def ffill(self, *, limit: typing.Optional[int] = None) -> Series:
-        window = bigframes.core.window_spec.rows(preceding=limit, following=0)
+        window = windows.rows(preceding=limit, following=0)
         return self._apply_window_op(agg_ops.LastNonNullOp(), window)
 
     pad = ffill
@@ -479,42 +477,30 @@ def ffill(self, *, limit: typing.Optional[int] = None) -> Series:
 
     @validations.requires_ordering()
     def bfill(self, *, limit: typing.Optional[int] = None) -> Series:
-        window = bigframes.core.window_spec.rows(preceding=0, following=limit)
+        window = windows.rows(preceding=0, following=limit)
         return self._apply_window_op(agg_ops.FirstNonNullOp(), window)
 
     @validations.requires_ordering()
     def cummax(self) -> Series:
-        return self._apply_window_op(
-            agg_ops.max_op, bigframes.core.window_spec.cumulative_rows()
-        )
+        return self._apply_window_op(agg_ops.max_op, windows.cumulative_rows())
 
     @validations.requires_ordering()
     def cummin(self) -> Series:
-        return self._apply_window_op(
-            agg_ops.min_op, bigframes.core.window_spec.cumulative_rows()
-        )
+        return self._apply_window_op(agg_ops.min_op, windows.cumulative_rows())
 
     @validations.requires_ordering()
     def cumprod(self) -> Series:
-        return self._apply_window_op(
-            agg_ops.product_op, bigframes.core.window_spec.cumulative_rows()
-        )
+        return self._apply_window_op(agg_ops.product_op, windows.cumulative_rows())
 
     @validations.requires_ordering()
     def shift(self, periods: int = 1) -> Series:
-        window = bigframes.core.window_spec.rows(
-            preceding=periods if periods > 0 else None,
-            following=-periods if periods < 0 else None,
-        )
-        return self._apply_window_op(agg_ops.ShiftOp(periods), window)
+        window_spec = windows.rows()
+        return self._apply_window_op(agg_ops.ShiftOp(periods), window_spec)
 
     @validations.requires_ordering()
     def diff(self, periods: int = 1) -> Series:
-        window = bigframes.core.window_spec.rows(
-            preceding=periods if periods > 0 else None,
-            following=-periods if periods < 0 else None,
-        )
-        return self._apply_window_op(agg_ops.DiffOp(periods), window)
+        window_spec = windows.rows()
+        return self._apply_window_op(agg_ops.DiffOp(periods), window_spec)
 
     @validations.requires_ordering()
     def pct_change(self, periods: int = 1) -> Series:
@@ -1053,7 +1039,7 @@ def mode(self) -> Series:
         block, max_value_count_col_id = block.apply_window_op(
             value_count_col_id,
             agg_ops.max_op,
-            window_spec=bigframes.core.window_spec.unbound(),
+            window_spec=windows.unbound(),
         )
         block, is_mode_col_id = block.apply_binary_op(
             value_count_col_id,
@@ -1286,9 +1272,7 @@ def _apply_aggregation(
     ) -> Any:
         return self._block.get_stat(self._value_column, op)
 
-    def _apply_window_op(
-        self, op: agg_ops.WindowOp, window_spec: bigframes.core.window_spec.WindowSpec
-    ):
+    def _apply_window_op(self, op: agg_ops.WindowOp, window_spec: windows.WindowSpec):
         block = self._block
         block, result_id = block.apply_window_op(
             self._value_column, op, window_spec=window_spec, result_label=self.name
@@ -1345,7 +1329,7 @@ def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series:
     @validations.requires_ordering()
     def rolling(self, window: int, min_periods=None) -> bigframes.core.window.Window:
         # To get n size window, need current row and n-1 preceding rows.
-        window_spec = bigframes.core.window_spec.rows(
+        window_spec = windows.rows(
             preceding=window - 1, following=0, min_periods=min_periods or window
         )
         return bigframes.core.window.Window(
@@ -1354,9 +1338,7 @@ def rolling(self, window: int, min_periods=None) -> bigframes.core.window.Window
 
     @validations.requires_ordering()
     def expanding(self, min_periods: int = 1) -> bigframes.core.window.Window:
-        window_spec = bigframes.core.window_spec.cumulative_rows(
-            min_periods=min_periods
-        )
+        window_spec = windows.cumulative_rows(min_periods=min_periods)
         return bigframes.core.window.Window(
             self._block, window_spec, self._block.value_columns, is_series=True
         )
diff --git a/noxfile.py b/noxfile.py
index 5dbcdea583..f4142af21b 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -62,8 +62,9 @@
 UNIT_TEST_EXTRAS: List[str] = []
 UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {}
 
+# There are 4 different ibis-framework 9.x versions we want to test against.
 # 3.10 is needed for Windows tests.
-SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.12"]
+SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12"]
 SYSTEM_TEST_STANDARD_DEPENDENCIES = [
     "jinja2",
     "mock",
diff --git a/setup.py b/setup.py
index 2ae61a44f3..908a3c244f 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@
     "google-cloud-iam >=2.12.1",
     "google-cloud-resource-manager >=1.10.3",
     "google-cloud-storage >=2.0.0",
-    "ibis-framework[bigquery] >=8.0.0,<9.0.0dev",
+    "ibis-framework[bigquery] >=9.0.0,<=9.3.0",
     "jellyfish >=0.8.9",
     "numpy >=1.24.0",
     "pandas >=1.5.3",
@@ -59,7 +59,7 @@
     # Keep sqlglot versions in sync with ibis-framework. This avoids problems
     # where the incorrect version of sqlglot is installed, such as
     # https://github.com/googleapis/python-bigquery-dataframes/issues/315
-    "sqlglot >=20.8.0,<=20.11",
+    "sqlglot >=23.6.3,<25.2",
     "tabulate >= 0.9",
     "ipywidgets >=7.7.1",
     "humanize >= 4.6.0",
diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt
index e69de29bb2..60ac0af60f 100644
--- a/testing/constraints-3.11.txt
+++ b/testing/constraints-3.11.txt
@@ -0,0 +1,3 @@
+# Some internal modules have moved,
+# so make sure we test on all ibis-framework 9.x versions.
+ibis-framework==9.1.0
diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt
index e69de29bb2..dbbb5a2d88 100644
--- a/testing/constraints-3.12.txt
+++ b/testing/constraints-3.12.txt
@@ -0,0 +1,3 @@
+# Some internal modules have moved,
+# so make sure we test on all ibis-framework 9.x versions.
+ibis-framework==9.2.0
diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt
index a446a4bfa7..4a9d1ae281 100644
--- a/testing/constraints-3.9.txt
+++ b/testing/constraints-3.9.txt
@@ -12,7 +12,7 @@ google-cloud-bigquery-connection==1.12.0
 google-cloud-iam==2.12.1
 google-cloud-resource-manager==1.10.3
 google-cloud-storage==2.0.0
-ibis-framework==8.0.0
+ibis-framework==9.0.0
 jellyfish==0.8.9
 numpy==1.24.0
 pandas==1.5.3
@@ -21,7 +21,7 @@ pydata-google-auth==1.8.2
 requests==2.27.1
 scikit-learn==1.2.2
 sqlalchemy==1.4
-sqlglot==20.8.0
+sqlglot==23.6.3
 tabulate==0.9
 ipywidgets==7.7.1
 humanize==4.6.0
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index b382a5593c..49cd887cfd 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -29,7 +29,7 @@
 import google.cloud.functions_v2 as functions_v2
 import google.cloud.resourcemanager_v3 as resourcemanager_v3
 import google.cloud.storage as storage  # type: ignore
-import ibis.backends.base
+import ibis.backends
 import numpy as np
 import pandas as pd
 import pytest
@@ -105,7 +105,7 @@ def bigquery_client_tokyo(session_tokyo: bigframes.Session) -> bigquery.Client:
 
 
 @pytest.fixture(scope="session")
-def ibis_client(session: bigframes.Session) -> ibis.backends.base.BaseBackend:
+def ibis_client(session: bigframes.Session) -> ibis.backends.BaseBackend:
     return session.ibis_client
 
 
diff --git a/tests/system/large/test_remote_function.py b/tests/system/large/test_remote_function.py
index 77ea4627ec..e224f65a01 100644
--- a/tests/system/large/test_remote_function.py
+++ b/tests/system/large/test_remote_function.py
@@ -1726,8 +1726,10 @@ def test_df_apply_axis_1_complex(session, pd_df):
 
         def serialize_row(row):
             custom = {
-                "name": row.name,
-                "index": [idx for idx in row.index],
+                "name": row.name.item() if hasattr(row.name, "item") else row.name,
+                "index": [
+                    idx.item() if hasattr(idx, "item") else idx for idx in row.index
+                ],
                 "values": [
                     val.item() if hasattr(val, "item") else val for val in row.values
                 ],
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index f51b597650..9e046dc62e 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -4568,6 +4568,9 @@ def test_recursion_limit(scalars_df_index):
     scalars_df_index.to_pandas()
 
 
+@pytest.mark.skipif(
+    reason="b/366477265: Skip until query complexity error can be reliably triggered."
+)
 def test_query_complexity_error(scalars_df_index):
     # This test requires automatic caching/query decomposition to be turned off
     bf_df = scalars_df_index
diff --git a/tests/system/small/test_ibis.py b/tests/system/small/test_ibis.py
deleted file mode 100644
index e2648d1eba..0000000000
--- a/tests/system/small/test_ibis.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright 2023 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for monkeypatched ibis code."""
-
-import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
-import ibis.expr.types as ibis_types
-
-import bigframes
-
-
-def test_approximate_quantiles(session: bigframes.Session, scalars_table_id: str):
-    num_bins = 3
-    ibis_client = session.ibis_client
-    project, dataset, table_id = scalars_table_id.split(".")
-    ibis_table: ibis_types.Table = ibis_client.table(  # type: ignore
-        table_id,
-        schema=dataset,
-        database=project,
-    )
-    ibis_column: ibis_types.NumericColumn = ibis_table["int64_col"]
-    quantiles: ibis_types.ArrayScalar = vendored_ibis_ops.ApproximateMultiQuantile(
-        ibis_column,  # type: ignore
-        num_bins=num_bins,  # type: ignore
-    ).to_expr()
-    value = quantiles[1]
-    num_edges = quantiles.length()
-
-    sql = ibis_client.compile(value)
-    num_edges_result = num_edges.to_pandas()
-
-    assert "APPROX_QUANTILES" in sql
-    assert num_edges_result == num_bins + 1
diff --git a/tests/system/small/test_numpy.py b/tests/system/small/test_numpy.py
index 8f62d9628c..37a707b9d0 100644
--- a/tests/system/small/test_numpy.py
+++ b/tests/system/small/test_numpy.py
@@ -70,6 +70,13 @@ def test_df_ufuncs(scalars_dfs, opname):
     ).to_pandas()
     pd_result = getattr(np, opname)(scalars_pandas_df[["float64_col", "int64_col"]])
 
+    # In NumPy versions 2 and later, `np.floor` and `np.ceil` now produce integer
+    # outputs for the "int64_col" column.
+    if opname in ["floor", "ceil"] and isinstance(
+        pd_result["int64_col"].dtypes, pd.Int64Dtype
+    ):
+        pd_result["int64_col"] = pd_result["int64_col"].astype(pd.Float64Dtype())
+
     pd.testing.assert_frame_equal(bf_result, pd_result)
 
 
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 9a6783ee5c..b8f7926aec 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -2346,8 +2346,13 @@ def test_value_counts(scalars_dfs, kwargs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "int64_too"
 
-    bf_result = scalars_df[col_name].value_counts(**kwargs).to_pandas()
-    pd_result = scalars_pandas_df[col_name].value_counts(**kwargs)
+    # Pandas `value_counts` can produce non-deterministic results with tied counts.
+    # Remove duplicates to enforce a consistent output.
+    s = scalars_df[col_name].drop(0)
+    pd_s = scalars_pandas_df[col_name].drop(0)
+
+    bf_result = s.value_counts(**kwargs).to_pandas()
+    pd_result = pd_s.value_counts(**kwargs)
 
     pd.testing.assert_series_equal(
         bf_result,
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py b/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py
index 1d2d05a741..e69de29bb2 100644
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py
@@ -1,3 +0,0 @@
-# Import all sub-modules to monkeypatch everything.
-import bigframes_vendored.ibis.backends.bigquery.compiler  # noqa
-import bigframes_vendored.ibis.backends.bigquery.registry  # noqa
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/backend.py b/third_party/bigframes_vendored/ibis/backends/bigquery/backend.py
index f917ef950d..d4d5156572 100644
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/backend.py
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/backend.py
@@ -8,10 +8,10 @@
 import contextlib
 import glob
 import os
-import re
 from typing import Any, Optional, TYPE_CHECKING
 
 from bigframes_vendored.ibis.backends.bigquery.datatypes import BigQueryType
+import bigframes_vendored.ibis.backends.sql.compilers as sc
 import google.api_core.exceptions
 import google.auth.credentials
 import google.cloud.bigquery as bq
@@ -27,7 +27,6 @@
 )
 from ibis.backends.bigquery.datatypes import BigQuerySchema
 from ibis.backends.sql import SQLBackend
-import ibis.backends.sql.compilers as sc
 import ibis.common.exceptions as com
 import ibis.expr.operations as ops
 import ibis.expr.schema as sch
@@ -81,52 +80,6 @@ def _create_client_info_gapic(application_name):
     return ClientInfo(user_agent=_create_user_agent(application_name))
 
 
-_MEMTABLE_PATTERN = re.compile(
-    r"^_?ibis_(?:[A-Za-z_][A-Za-z_0-9]*)_memtable_[a-z0-9]{26}$"
-)
-
-
-def _qualify_memtable(
-    node: sge.Expression, *, dataset: str | None, project: str | None
-) -> sge.Expression:
-    """Add a BigQuery dataset and project to memtable references."""
-    if isinstance(node, sge.Table) and _MEMTABLE_PATTERN.match(node.name) is not None:
-        node.args["db"] = dataset
-        node.args["catalog"] = project
-        # make sure to quote table location
-        node = _force_quote_table(node)
-    return node
-
-
-def _remove_null_ordering_from_unsupported_window(
-    node: sge.Expression,
-) -> sge.Expression:
-    """Remove null ordering in window frame clauses not supported by BigQuery.
-
-    BigQuery has only partial support for NULL FIRST/LAST in RANGE windows so
-    we remove it from any window frame clause that doesn't support it.
-
-    Here's the support matrix:
-
-    ✅ sum(x) over (order by y desc nulls last)
-    🚫 sum(x) over (order by y asc nulls last)
-    ✅ sum(x) over (order by y asc nulls first)
-    🚫 sum(x) over (order by y desc nulls first)
-    """
-    if isinstance(node, sge.Window):
-        order = node.args.get("order")
-        if order is not None:
-            for key in order.args["expressions"]:
-                kargs = key.args
-                if kargs.get("desc") is True and kargs.get("nulls_first", False):
-                    kargs["nulls_first"] = False
-                elif kargs.get("desc") is False and not kargs.setdefault(
-                    "nulls_first", True
-                ):
-                    kargs["nulls_first"] = True
-    return node
-
-
 def _force_quote_table(table: sge.Table) -> sge.Table:
     """Force quote all the parts of a bigquery path.
 
@@ -156,6 +109,10 @@ class Backend(SQLBackend, CanCreateDatabase, CanCreateSchema):
     def __init__(self, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
         self.__session_dataset: bq.DatasetReference | None = None
+        self._query_cache.lookup = lambda name: self.table(
+            name,
+            database=(self._session_dataset.project, self._session_dataset.dataset_id),
+        ).op()
 
     @property
     def _session_dataset(self):
@@ -163,32 +120,6 @@ def _session_dataset(self):
             self.__session_dataset = self._make_session()
         return self.__session_dataset
 
-    def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
-        raw_name = op.name
-
-        session_dataset = self._session_dataset
-        project = session_dataset.project
-        dataset = session_dataset.dataset_id
-
-        table_ref = bq.TableReference(session_dataset, raw_name)
-        try:
-            self.client.get_table(table_ref)
-        except google.api_core.exceptions.NotFound:
-            table_id = sg.table(
-                raw_name, db=dataset, catalog=project, quoted=False
-            ).sql(dialect=self.name)
-            bq_schema = BigQuerySchema.from_ibis(op.schema)
-            load_job = self.client.load_table_from_dataframe(
-                op.data.to_frame(),
-                table_id,
-                job_config=bq.LoadJobConfig(
-                    # fail if the table already exists and contains data
-                    write_disposition=bq.WriteDisposition.WRITE_EMPTY,
-                    schema=bq_schema,
-                ),
-            )
-            load_job.result()
-
     def _read_file(
         self,
         path: str | Path,
@@ -793,7 +724,6 @@ def to_pyarrow(
         **kwargs: Any,
     ) -> pa.Table:
         self._import_pyarrow()
-        self._register_in_memory_tables(expr)
         sql = self.compile(expr, limit=limit, params=params, **kwargs)
         self._log(sql)
         query = self.raw_sql(sql, params=params, **kwargs)
@@ -816,7 +746,6 @@ def to_pyarrow_batches(
 
         schema = expr.as_table().schema()
 
-        self._register_in_memory_tables(expr)
         sql = self.compile(expr, limit=limit, params=params, **kwargs)
         self._log(sql)
         query = self.raw_sql(sql, params=params, page_size=chunk_size, **kwargs)
@@ -1009,9 +938,6 @@ def create_table(
         if obj is not None and not isinstance(obj, ir.Table):
             obj = ibis.memtable(obj, schema=schema)
 
-        if obj is not None:
-            self._register_in_memory_tables(obj)
-
         if temp:
             dataset = self._session_dataset.dataset_id
             if database is not None:
@@ -1107,7 +1033,6 @@ def create_view(
             expression=self.compile(obj),
             replace=overwrite,
         )
-        self._register_in_memory_tables(obj)
         self.raw_sql(stmt.sql(self.name))
         return self.table(name, database=(catalog, database))
 
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/compiler.py b/third_party/bigframes_vendored/ibis/backends/bigquery/compiler.py
deleted file mode 100644
index 414f0a7c81..0000000000
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/compiler.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Contains code from https://github.com/ibis-project/ibis/blob/master/ibis/backends/bigquery/compiler.py
-"""Module to convert from Ibis expression to SQL string."""
-
-from __future__ import annotations
-
-import re
-
-from ibis.backends.base.sql import compiler as sql_compiler
-import ibis.backends.bigquery.compiler
-from ibis.backends.bigquery.datatypes import BigQueryType
-import ibis.expr.datatypes as dt
-import ibis.expr.operations as ops
-
-_NAME_REGEX = re.compile(r'[^!"$()*,./;?@[\\\]^`{}~\n]+')
-_EXACT_NAME_REGEX = re.compile(f"^{_NAME_REGEX.pattern}$")
-
-
-class BigQueryTableSetFormatter(sql_compiler.TableSetFormatter):
-    def _quote_identifier(self, name):
-        """Restore 6.x version of identifier quoting.
-
-        7.x uses sqlglot which as of December 2023 doesn't know about the
-        extended unicode names for BigQuery yet.
-        """
-        if _EXACT_NAME_REGEX.match(name) is not None:
-            return name
-        return f"`{name}`"
-
-    def _format_in_memory_table(self, op):
-        """Restore 6.x version of InMemoryTable.
-
-        BigQuery DataFrames explicitly uses InMemoryTable only when we know
-        the data is small enough to embed in SQL.
-        """
-        schema = op.schema
-        names = schema.names
-        types = schema.types
-
-        raw_rows = []
-        for row in op.data.to_frame().itertuples(index=False):
-            raw_row = ", ".join(
-                f"{self._translate(lit)} AS {name}"
-                for lit, name in zip(
-                    map(ops.Literal, row, types), map(self._quote_identifier, names)
-                )
-            )
-            raw_rows.append(f"STRUCT({raw_row})")
-        array_type = BigQueryType.from_ibis(dt.Array(op.schema.as_struct()))
-
-        return f"UNNEST({array_type}[{', '.join(raw_rows)}])"
-
-
-# Override implementation.
-ibis.backends.bigquery.compiler.BigQueryTableSetFormatter._quote_identifier = (
-    BigQueryTableSetFormatter._quote_identifier
-)
-ibis.backends.bigquery.compiler.BigQueryTableSetFormatter._format_in_memory_table = (
-    BigQueryTableSetFormatter._format_in_memory_table
-)
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
deleted file mode 100644
index ecef2115e5..0000000000
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Contains code from https://github.com/ibis-project/ibis/blob/master/ibis/backends/bigquery/registry.py
-"""Module to convert from Ibis expression to SQL string."""
-
-import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
-from ibis.backends.bigquery.registry import OPERATION_REGISTRY
-import ibis.expr.operations.reductions as ibis_reductions
-
-
-def _approx_quantiles(translator, op: vendored_ibis_ops.ApproximateMultiQuantile):
-    arg = translator.translate(op.arg)
-    num_bins = translator.translate(op.num_bins)
-    return f"APPROX_QUANTILES({arg}, {num_bins})"
-
-
-def _first_non_null_value(translator, op: vendored_ibis_ops.FirstNonNullValue):
-    arg = translator.translate(op.arg)
-    return f"FIRST_VALUE({arg} IGNORE NULLS)"
-
-
-def _last_non_null_value(translator, op: vendored_ibis_ops.LastNonNullValue):
-    arg = translator.translate(op.arg)
-    return f"LAST_VALUE({arg} IGNORE NULLS)"
-
-
-def _to_json_string(translator, op: vendored_ibis_ops.ToJsonString):
-    arg = translator.translate(op.arg)
-    return f"TO_JSON_STRING({arg})"
-
-
-def _generate_array(translator, op: vendored_ibis_ops.GenerateArray):
-    arg = translator.translate(op.arg)
-    return f"GENERATE_ARRAY(0, {arg})"
-
-
-def _safe_cast_to_datetime(translator, op: vendored_ibis_ops.SafeCastToDatetime):
-    arg = translator.translate(op.arg)
-    return f"SAFE_CAST({arg} AS DATETIME)"
-
-
-def _quantile(translator, op: ibis_reductions.Quantile):
-    arg = translator.translate(op.arg)
-    quantile = translator.translate(op.quantile)
-    return f"PERCENTILE_CONT({arg}, {quantile})"
-
-
-def _array_aggregate(translator, op: vendored_ibis_ops.ArrayAggregate):
-    """This method provides the same functionality as the collect() method in Ibis, with
-    the added capability of ordering the results using order_by.
-    https://github.com/ibis-project/ibis/issues/9170
-    """
-    arg = translator.translate(op.arg)
-
-    order_by_sql = ""
-    if len(op.order_by) > 0:
-        order_by = ", ".join([translator.translate(column) for column in op.order_by])
-        order_by_sql = f"ORDER BY {order_by}"
-
-    return f"ARRAY_AGG({arg} IGNORE NULLS {order_by_sql})"
-
-
-patched_ops = {
-    vendored_ibis_ops.ApproximateMultiQuantile: _approx_quantiles,  # type:ignore
-    vendored_ibis_ops.FirstNonNullValue: _first_non_null_value,  # type:ignore
-    vendored_ibis_ops.LastNonNullValue: _last_non_null_value,  # type:ignore
-    vendored_ibis_ops.ToJsonString: _to_json_string,  # type:ignore
-    vendored_ibis_ops.GenerateArray: _generate_array,  # type:ignore
-    vendored_ibis_ops.SafeCastToDatetime: _safe_cast_to_datetime,  # type:ignore
-    ibis_reductions.Quantile: _quantile,  # type:ignore
-    vendored_ibis_ops.ArrayAggregate: _array_aggregate,  # type:ignore
-}
-
-OPERATION_REGISTRY.update(patched_ops)
diff --git a/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py b/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py
index c74de82099..cbd8e4e2d9 100644
--- a/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py
+++ b/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py
@@ -610,7 +610,6 @@ def translate(self, op, *, params: Mapping[ir.Value, Any]) -> sge.Expression:
             op,
             params=params,
             rewrites=self.rewrites,
-            post_rewrites=self.post_rewrites,
             fuse_selects=options.sql.fuse_selects,
         )
 
@@ -1125,7 +1124,7 @@ def visit_Coalesce(self, op, *, arg):
 
     ### Ordering and window functions
 
-    def visit_SortKey(self, op, *, expr, ascending: bool, nulls_first: bool):
+    def visit_SortKey(self, op, *, expr, ascending: bool, nulls_first: bool = False):
         return sge.Ordered(this=expr, desc=not ascending, nulls_first=nulls_first)
 
     def visit_ApproxMedian(self, op, *, arg, where):
@@ -1262,11 +1261,9 @@ def _cleanup_names(self, exprs: Mapping[str, sge.Expression]):
             else:
                 yield value.as_(name, quoted=self.quoted, copy=False)
 
-    def visit_Select(
-        self, op, *, parent, selections, predicates, qualified, sort_keys, distinct
-    ):
+    def visit_Select(self, op, *, parent, selections, predicates, qualified, sort_keys):
         # if we've constructed a useless projection return the parent relation
-        if not (selections or predicates or qualified or sort_keys or distinct):
+        if not (selections or predicates or qualified or sort_keys):
             return parent
 
         result = parent
@@ -1293,9 +1290,6 @@ def visit_Select(
         if sort_keys:
             result = result.order_by(*sort_keys, copy=False)
 
-        if distinct:
-            result = result.distinct()
-
         return result
 
     def visit_DummyTable(self, op, *, values):
@@ -1392,7 +1386,10 @@ def visit_Aggregate(self, op, *, parent, groups, metrics):
 
     @classmethod
     def _add_parens(cls, op, sg_expr):
-        if isinstance(op, cls.NEEDS_PARENS):
+        # Patch for https://github.com/ibis-project/ibis/issues/9975
+        if isinstance(op, cls.NEEDS_PARENS) or (
+            isinstance(op, ops.Alias) and isinstance(op.arg, cls.NEEDS_PARENS)
+        ):
             return sge.paren(sg_expr, copy=False)
         return sg_expr
 
@@ -1480,6 +1477,11 @@ def visit_Limit(self, op, *, parent, n, offset):
             return result.subquery(alias, copy=False)
         return result
 
+    def visit_Distinct(self, op, *, parent):
+        return (
+            sg.select(STAR, copy=False).distinct(copy=False).from_(parent, copy=False)
+        )
+
     def visit_CTE(self, op, *, parent):
         return sg.table(parent.alias_or_name, quoted=self.quoted)
 
diff --git a/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py b/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py
index fc8d93a433..3015991a26 100644
--- a/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py
+++ b/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py
@@ -1,5 +1,4 @@
 # Contains code from https://github.com/ibis-project/ibis/blob/main/ibis/backends/sql/compilers/bigquery/__init__.py
-
 """Module to convert from Ibis expression to SQL string."""
 
 from __future__ import annotations
@@ -9,28 +8,25 @@
 import re
 from typing import Any, TYPE_CHECKING
 
-from bigframes_vendored.ibis.backends.bigquery.datatypes import (
-    BigQueryType,
-    BigQueryUDFType,
-)
+import bigframes_vendored.ibis.backends.bigquery.datatypes as bq_datatypes
 from bigframes_vendored.ibis.backends.sql.compilers.base import (
     AggGen,
     NULL,
     SQLGlotCompiler,
     STAR,
 )
-from bigframes_vendored.ibis.backends.sql.rewrites import (
+from ibis import util
+from ibis.backends.sql.datatypes import BigQueryType, BigQueryUDFType
+from ibis.backends.sql.rewrites import (
     exclude_unsupported_window_frame_from_ops,
     exclude_unsupported_window_frame_from_rank,
     exclude_unsupported_window_frame_from_row_number,
-    split_select_distinct_with_order_by,
 )
-from ibis import util
-from ibis.backends.sql.compilers.bigquery.udf.core import PythonToJavaScriptTranslator
 import ibis.common.exceptions as com
 from ibis.common.temporal import DateUnit, IntervalUnit, TimestampUnit, TimeUnit
 import ibis.expr.datatypes as dt
 import ibis.expr.operations as ops
+import numpy as np
 import sqlglot as sg
 from sqlglot.dialects import BigQuery
 import sqlglot.expressions as sge
@@ -40,6 +36,7 @@
 
     import ibis.expr.types as ir
 
+
 _NAME_REGEX = re.compile(r'[^!"$()*,./;?@[\\\]^`{}~\n]+')
 
 
@@ -55,8 +52,6 @@ def _qualify_memtable(
     if isinstance(node, sge.Table) and _MEMTABLE_PATTERN.match(node.name) is not None:
         node.args["db"] = dataset
         node.args["catalog"] = project
-        # make sure to quote table location
-        node = _force_quote_table(node)
     return node
 
 
@@ -64,12 +59,9 @@ def _remove_null_ordering_from_unsupported_window(
     node: sge.Expression,
 ) -> sge.Expression:
     """Remove null ordering in window frame clauses not supported by BigQuery.
-
     BigQuery has only partial support for NULL FIRST/LAST in RANGE windows so
     we remove it from any window frame clause that doesn't support it.
-
     Here's the support matrix:
-
     ✅ sum(x) over (order by y desc nulls last)
     🚫 sum(x) over (order by y asc nulls last)
     ✅ sum(x) over (order by y asc nulls first)
@@ -89,27 +81,6 @@ def _remove_null_ordering_from_unsupported_window(
     return node
 
 
-def _force_quote_table(table: sge.Table) -> sge.Table:
-    """Force quote all the parts of a bigquery path.
-
-    The BigQuery identifier quoting semantics are bonkers
-    https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers
-
-    my-table is OK, but not mydataset.my-table
-
-    mytable-287 is OK, but not mytable-287a
-
-    Just quote everything.
-    """
-    for key in ("this", "db", "catalog"):
-        if (val := table.args[key]) is not None:
-            if isinstance(val, sg.exp.Identifier) and not val.quoted:
-                val.args["quoted"] = True
-            else:
-                table.args[key] = sg.to_identifier(val, quoted=True)
-    return table
-
-
 class BigQueryCompiler(SQLGlotCompiler):
     dialect = BigQuery
     type_mapper = BigQueryType
@@ -123,7 +94,6 @@ class BigQueryCompiler(SQLGlotCompiler):
         exclude_unsupported_window_frame_from_rank,
         *SQLGlotCompiler.rewrites,
     )
-    post_rewrites = (split_select_distinct_with_order_by,)
 
     supports_qualify = True
 
@@ -214,7 +184,6 @@ def to_sqlglot(
         session_project: str | None = None,
     ) -> Any:
         """Compile an Ibis expression.
-
         Parameters
         ----------
         expr
@@ -228,18 +197,16 @@ def to_sqlglot(
             Optional dataset ID to qualify memtable references.
         session_project
             Optional project ID to qualify memtable references.
-
         Returns
         -------
         Any
             The output of compilation. The type of this value depends on the
             backend.
-
         """
         sql = super().to_sqlglot(expr, limit=limit, params=params)
 
         table_expr = expr.as_table()
-        geocols = table_expr.schema().geospatial
+        geocols = getattr(table_expr.schema(), "geospatial", None)
 
         result = sql.transform(
             _qualify_memtable,
@@ -280,64 +247,6 @@ def to_sqlglot(
         sources.append(result)
         return sources
 
-    def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> sge.Create:
-        name = type(udf_node).__name__
-        type_mapper = self.udf_type_mapper
-
-        body = PythonToJavaScriptTranslator(udf_node.__func__).compile()
-        config = udf_node.__config__
-        libraries = config.get("libraries", [])
-
-        signature = [
-            sge.ColumnDef(
-                this=sg.to_identifier(name, quoted=self.quoted),
-                kind=type_mapper.from_ibis(param.annotation.pattern.dtype),
-            )
-            for name, param in udf_node.__signature__.parameters.items()
-        ]
-
-        lines = ['"""']
-
-        if config.get("strict", True):
-            lines.append('"use strict";')
-
-        lines += [
-            body,
-            "",
-            f"return {udf_node.__func_name__}({', '.join(udf_node.argnames)});",
-            '"""',
-        ]
-
-        func = sge.Create(
-            kind="FUNCTION",
-            this=sge.UserDefinedFunction(
-                this=sg.to_identifier(name), expressions=signature, wrapped=True
-            ),
-            # not exactly what I had in mind, but it works
-            #
-            # quoting is too simplistic to handle multiline strings
-            expression=sge.Var(this="\n".join(lines)),
-            exists=False,
-            properties=sge.Properties(
-                expressions=[
-                    sge.TemporaryProperty(),
-                    sge.ReturnsProperty(this=type_mapper.from_ibis(udf_node.dtype)),
-                    sge.StabilityProperty(
-                        this="IMMUTABLE" if config.get("determinism") else "VOLATILE"
-                    ),
-                    sge.LanguageProperty(this=sg.to_identifier("js")),
-                ]
-                + [
-                    sge.Property(
-                        this=sg.to_identifier("library"), value=self.f.array(*libraries)
-                    )
-                ]
-                * bool(libraries)
-            ),
-        )
-
-        return func
-
     @staticmethod
     def _minimize_spec(start, end, spec):
         if (
@@ -528,7 +437,7 @@ def visit_ArrayContains(self, op, *, arg, other):
     def visit_StringContains(self, op, *, haystack, needle):
         return self.f.strpos(haystack, needle) > 0
 
-    def visti_StringFind(self, op, *, arg, substr, start, end):
+    def visit_StringFind(self, op, *, arg, substr, start, end):
         if start is not None:
             raise NotImplementedError(
                 "`start` not implemented for BigQuery string find"
@@ -573,6 +482,9 @@ def visit_NonNullLiteral(self, op, *, value, dtype):
                 )
         elif dtype.is_uuid():
             return sge.convert(str(value))
+
+        elif dtype.is_int64():
+            return sge.convert(np.int64(value))
         return None
 
     def visit_IntervalFromInteger(self, op, *, arg, unit):
@@ -1110,5 +1022,116 @@ def visit_ArrayAny(self, op, *, arg):
     def visit_ArrayAll(self, op, *, arg):
         return self._array_reduction(arg=arg, reduction="logical_and")
 
+    # Customized ops for bigframes
+
+    def visit_InMemoryTable(self, op, *, name, schema, data):
+        # Avoid creating temp tables for small data, which is how memtable is
+        # used in BigQuery DataFrames. Inspired by:
+        # https://github.com/ibis-project/ibis/blob/efa6fb72bf4c790450d00a926d7bd809dade5902/ibis/backends/druid/compiler.py#L95
+        tuples = data.to_frame().itertuples(index=False)
+        quoted = self.quoted
+        columns = [sg.column(col, quoted=quoted) for col in schema.names]
+        array_expr = sge.DataType(
+            this=sge.DataType.Type.STRUCT,
+            expressions=[
+                sge.ColumnDef(
+                    this=sge.to_identifier(field, quoted=self.quoted),
+                    kind=bq_datatypes.BigQueryType.from_ibis(type_),
+                )
+                for field, type_ in zip(schema.names, schema.types)
+            ],
+            nested=True,
+        )
+        array_values = [
+            sge.Tuple(
+                expressions=tuple(
+                    self.visit_Literal(None, value=value, dtype=type_)
+                    for value, type_ in zip(row, schema.types)
+                )
+            )
+            for row in tuples
+        ]
+        expr = sge.Unnest(
+            expressions=[
+                sge.DataType(
+                    this=sge.DataType.Type.ARRAY,
+                    expressions=[array_expr],
+                    nested=True,
+                    values=array_values,
+                ),
+            ],
+            alias=sge.TableAlias(
+                this=sg.to_identifier(name, quoted=quoted),
+                columns=columns,
+            ),
+        )
+        # return expr
+        return sg.select(sge.Star()).from_(expr)
+
+    def visit_ArrayAggregate(self, op, *, arg, order_by, where):
+        if len(order_by) > 0:
+            expr = sge.Order(
+                this=arg,
+                expressions=[
+                    # Avoid adding NULLS FIRST / NULLS LAST in SQL, which is
+                    # unsupported in ARRAY_AGG by reconstructing the node as
+                    # plain SQL text.
+                    f"({order_column.args['this'].sql(dialect='bigquery')}) {'DESC' if order_column.args.get('desc') else 'ASC'}"
+                    for order_column in order_by
+                ],
+            )
+        else:
+            expr = arg
+        return sge.IgnoreNulls(this=self.agg.array_agg(expr, where=where))
+
+    def visit_FirstNonNullValue(self, op, *, arg):
+        return sge.IgnoreNulls(this=sge.FirstValue(this=arg))
+
+    def visit_LastNonNullValue(self, op, *, arg):
+        return sge.IgnoreNulls(this=sge.LastValue(this=arg))
+
+    def visit_ToJsonString(self, op, *, arg):
+        return self.f.to_json_string(arg)
+
+    def visit_Quantile(self, op, *, arg, quantile, where):
+        return sge.PercentileCont(this=arg, expression=quantile)
+
+    def visit_WindowFunction(self, op, *, how, func, start, end, group_by, order_by):
+        # Patch for https://github.com/ibis-project/ibis/issues/9872
+        if start is None and end is None:
+            spec = None
+        else:
+            if start is None:
+                start = {}
+            if end is None:
+                end = {}
+
+            start_value = start.get("value", "UNBOUNDED")
+            start_side = start.get("side", "PRECEDING")
+            end_value = end.get("value", "UNBOUNDED")
+            end_side = end.get("side", "FOLLOWING")
+
+            if getattr(start_value, "this", None) == "0":
+                start_value = "CURRENT ROW"
+                start_side = None
+
+            if getattr(end_value, "this", None) == "0":
+                end_value = "CURRENT ROW"
+                end_side = None
+
+            spec = sge.WindowSpec(
+                kind=how.upper(),
+                start=start_value,
+                start_side=start_side,
+                end=end_value,
+                end_side=end_side,
+                over="OVER",
+            )
+            spec = self._minimize_spec(op.start, op.end, spec)
+
+        order = sge.Order(expressions=order_by) if order_by else None
+
+        return sge.Window(this=func, partition_by=group_by, order=order, spec=spec)
+
 
 compiler = BigQueryCompiler()
diff --git a/third_party/bigframes_vendored/ibis/backends/sql/rewrites.py b/third_party/bigframes_vendored/ibis/backends/sql/rewrites.py
index 1f67902395..28954003f3 100644
--- a/third_party/bigframes_vendored/ibis/backends/sql/rewrites.py
+++ b/third_party/bigframes_vendored/ibis/backends/sql/rewrites.py
@@ -1,367 +1,516 @@
 # Contains code from https://github.com/ibis-project/ibis/blob/main/ibis/backends/sql/rewrites.py
 
-"""Some common rewrite functions to be shared between backends."""
+"""Lower the ibis expression graph to a SQL-like relational algebra."""
 
 from __future__ import annotations
 
-from collections import defaultdict
+from collections.abc import Mapping
+from functools import reduce
+import operator
+from typing import Any, TYPE_CHECKING
 
+from ibis.common.annotations import attribute
 from ibis.common.collections import FrozenDict  # noqa: TCH001
-from ibis.common.deferred import _, deferred, Item, var
-from ibis.common.exceptions import ExpressionError, IbisInputError
-from ibis.common.graph import Node as Traversable
-from ibis.common.graph import traverse
-from ibis.common.grounds import Concrete
-from ibis.common.patterns import Check, pattern, replace
+from ibis.common.deferred import var
+import ibis.common.exceptions as com
+from ibis.common.graph import Graph
+from ibis.common.patterns import InstanceOf, Object, Pattern, replace
 from ibis.common.typing import VarTuple  # noqa: TCH001
+import ibis.expr.datatypes as dt
 import ibis.expr.operations as ops
-from ibis.util import Namespace, promote_list
+from ibis.expr.rewrites import d, p, replace_parameter
+from ibis.expr.schema import Schema
+from public import public
 import toolz
 
-p = Namespace(pattern, module=ops)
-d = Namespace(deferred, module=ops)
-
+if TYPE_CHECKING:
+    from collections.abc import Sequence
 
 x = var("x")
 y = var("y")
-name = var("name")
-
-
-class DerefMap(Concrete, Traversable):
-    """Trace and replace fields from earlier relations in the hierarchy.
-    In order to provide a nice user experience, we need to allow expressions
-    from earlier relations in the hierarchy. Consider the following example:
-    t = ibis.table([('a', 'int64'), ('b', 'string')], name='t')
-    t1 = t.select([t.a, t.b])
-    t2 = t1.filter(t.a > 0)  # note that not t1.a is referenced here
-    t3 = t2.select(t.a)  # note that not t2.a is referenced here
-    However the relational operations in the IR are strictly enforcing that
-    the expressions are referencing the immediate parent only. So we need to
-    track fields upwards the hierarchy to replace `t.a` with `t1.a` and `t2.a`
-    in the example above. This is called dereferencing.
-    Whether we can treat or not a field of a relation semantically equivalent
-    with a field of an earlier relation in the hierarchy depends on the
-    `.values` mapping of the relation. Leaf relations, like `t` in the example
-    above, have an empty `.values` mapping, so we cannot dereference fields
-    from them. On the other hand a projection, like `t1` in the example above,
-    has a `.values` mapping like `{'a': t.a, 'b': t.b}`, so we can deduce that
-    `t1.a` is semantically equivalent with `t.a` and so on.
-    """
 
-    """The relations we want the values to point to."""
-    rels: VarTuple[ops.Relation]
-
-    """Substitution mapping from values of earlier relations to the fields of `rels`."""
-    subs: FrozenDict[ops.Value, ops.Field]
-
-    """Ambiguous field references."""
-    ambigs: FrozenDict[ops.Value, VarTuple[ops.Value]]
-
-    @classmethod
-    def from_targets(cls, rels, extra=None):
-        """Create a dereference map from a list of target relations.
-        Usually a single relation is passed except for joins where multiple
-        relations are involved.
-        Parameters
-        ----------
-        rels : list of ops.Relation
-            The target relations to dereference to.
-        extra : dict, optional
-            Extra substitutions to be added to the dereference map.
-        Returns
-        -------
-        DerefMap
-        """
-        rels = promote_list(rels)
-        mapping = defaultdict(dict)
-        for rel in rels:
-            for field in rel.fields.values():
-                for value, distance in cls.backtrack(field):
-                    mapping[value][field] = distance
-
-        subs, ambigs = {}, {}
-        for from_, to in mapping.items():
-            mindist = min(to.values())
-            minkeys = [k for k, v in to.items() if v == mindist]
-            # if all the closest fields are from the same relation, then we
-            # can safely substitute them and we pick the first one arbitrarily
-            if all(minkeys[0].relations == k.relations for k in minkeys):
-                subs[from_] = minkeys[0]
-            else:
-                ambigs[from_] = minkeys
-
-        if extra is not None:
-            subs.update(extra)
-
-        return cls(rels, subs, ambigs)
-
-    @classmethod
-    def backtrack(cls, value):
-        """Backtrack the field in the relation hierarchy.
-        The field is traced back until no modification is made, so only follow
-        ops.Field nodes not arbitrary values.
-        Parameters
-        ----------
-        value : ops.Value
-            The value to backtrack.
-        Yields
-        ------
-        tuple[ops.Field, int]
-            The value node and the distance from the original value.
-        """
-        distance = 0
-        # track down the field in the hierarchy until no modification
-        # is made so only follow ops.Field nodes not arbitrary values;
-        while isinstance(value, ops.Field):
-            yield value, distance
-            value = value.rel.values.get(value.name)
-            distance += 1
-        if (
-            value is not None
-            and value.relations
-            and not value.find(ops.Impure, filter=ops.Value)
-        ):
-            yield value, distance
-
-    def dereference(self, value):
-        """Dereference a value to the target relations.
-        Also check for ambiguous field references. If a field reference is found
-        which is marked as ambiguous, then raise an error.
-        Parameters
-        ----------
-        value : ops.Value
-            The value to dereference.
-        Returns
-        -------
-        ops.Value
-            The dereferenced value.
-        """
-        ambigs = value.find(lambda x: x in self.ambigs, filter=ops.Value)
-        if ambigs:
-            raise IbisInputError(
-                f"Ambiguous field reference {ambigs!r} in expression {value!r}"
-            )
-        return value.replace(self.subs, filter=ops.Value)
-
-
-def flatten_predicates(node):
-    """Yield the expressions corresponding to the `And` nodes of a predicate.
-    Examples
-    --------
-    >>> import ibis
-    >>> t = ibis.table([("a", "int64"), ("b", "string")], name="t")
-    >>> filt = (t.a == 1) & (t.b == "foo")
-    >>> predicates = flatten_predicates(filt.op())
-    >>> len(predicates)
-    2
-    >>> predicates[0].to_expr().name("left")
-    r0 := UnboundTable: t
-      a int64
-      b string
-    left: r0.a == 1
-    >>> predicates[1].to_expr().name("right")
-    r0 := UnboundTable: t
-      a int64
-      b string
-    right: r0.b == 'foo'
+
+@public
+class CTE(ops.Relation):
+    """Common table expression."""
+
+    parent: ops.Relation
+
+    @attribute
+    def schema(self):
+        return self.parent.schema
+
+    @attribute
+    def values(self):
+        return self.parent.values
+
+
+@public
+class Select(ops.Relation):
+    """Relation modelled after SQL's SELECT statement."""
+
+    parent: ops.Relation
+    selections: FrozenDict[str, ops.Value] = {}
+    predicates: VarTuple[ops.Value[dt.Boolean]] = ()
+    qualified: VarTuple[ops.Value[dt.Boolean]] = ()
+    sort_keys: VarTuple[ops.SortKey] = ()
+
+    def is_star_selection(self):
+        return tuple(self.values.items()) == tuple(self.parent.fields.items())
+
+    @attribute
+    def values(self):
+        return self.selections
+
+    @attribute
+    def schema(self):
+        return Schema({k: v.dtype for k, v in self.selections.items()})
+
+
+@public
+class FirstValue(ops.Analytic):
+    """Retrieve the first element."""
+
+    arg: ops.Column[dt.Any]
+
+    @attribute
+    def dtype(self):
+        return self.arg.dtype
+
+
+@public
+class LastValue(ops.Analytic):
+    """Retrieve the last element."""
+
+    arg: ops.Column[dt.Any]
+
+    @attribute
+    def dtype(self):
+        return self.arg.dtype
+
+
+# TODO(kszucs): there is a better strategy to rewrite the relational operations
+# to Select nodes by wrapping the leaf nodes in a Select node and then merging
+# Project, Filter, Sort, etc. incrementally into the Select node. This way we
+# can have tighter control over simplification logic.
+
+
+@replace(p.Project)
+def project_to_select(_, **kwargs):
+    """Convert a Project node to a Select node."""
+    return Select(_.parent, selections=_.values)
+
+
+def partition_predicates(predicates):
+    qualified = []
+    unqualified = []
+
+    for predicate in predicates:
+        if predicate.find(ops.WindowFunction, filter=ops.Value):
+            qualified.append(predicate)
+        else:
+            unqualified.append(predicate)
+
+    return unqualified, qualified
+
+
+@replace(p.Filter)
+def filter_to_select(_, **kwargs):
+    """Convert a Filter node to a Select node."""
+    predicates, qualified = partition_predicates(_.predicates)
+    return Select(
+        _.parent, selections=_.values, predicates=predicates, qualified=qualified
+    )
+
+
+@replace(p.Sort)
+def sort_to_select(_, **kwargs):
+    """Convert a Sort node to a Select node."""
+    return Select(_.parent, selections=_.values, sort_keys=_.keys)
+
+
+if hasattr(p, "DropColumns"):
+
+    @replace(p.DropColumns)
+    def drop_columns_to_select(_, **kwargs):
+        """Convert a DropColumns node to a Select node."""
+        # if we're dropping fewer than 50% of the parent table's columns then the
+        # compiled query will likely be smaller than if we list everything *NOT*
+        # being dropped
+        if len(_.columns_to_drop) < len(_.schema) // 2:
+            return _
+        return Select(_.parent, selections=_.values)
+
+
+if hasattr(p, "FillNull"):
+
+    @replace(p.FillNull)
+    def fill_null_to_select(_, **kwargs):
+        """Rewrite FillNull to a Select node."""
+        if isinstance(_.replacements, Mapping):
+            mapping = _.replacements
+        else:
+            mapping = {
+                name: _.replacements
+                for name, type in _.parent.schema.items()
+                if type.nullable
+            }
+
+        if not mapping:
+            return _.parent
+
+        selections = {}
+        for name in _.parent.schema.names:
+            col = ops.Field(_.parent, name)
+            if (value := mapping.get(name)) is not None:
+                col = ops.Alias(ops.Coalesce((col, value)), name)
+            selections[name] = col
+
+        return Select(_.parent, selections=selections)
+
+
+if hasattr(p, "DropNull"):
+
+    @replace(p.DropNull)
+    def drop_null_to_select(_, **kwargs):
+        """Rewrite DropNull to a Select node."""
+        if _.subset is None:
+            columns = [ops.Field(_.parent, name) for name in _.parent.schema.names]
+        else:
+            columns = _.subset
+
+        if columns:
+            preds = [
+                reduce(
+                    ops.And if _.how == "any" else ops.Or,
+                    [ops.NotNull(c) for c in columns],
+                )
+            ]
+        elif _.how == "all":
+            preds = [ops.Literal(False, dtype=dt.bool)]
+        else:
+            return _.parent
+
+        return Select(_.parent, selections=_.values, predicates=tuple(preds))
+
+
+@replace(p.WindowFunction(p.First | p.Last))
+def first_to_firstvalue(_, **kwargs):
+    """Convert a First or Last node to a FirstValue or LastValue node."""
+    if _.func.where is not None:
+        raise com.UnsupportedOperationError(
+            f"`{type(_.func).__name__.lower()}` with `where` is unsupported "
+            "in a window function"
+        )
+    klass = FirstValue if isinstance(_.func, ops.First) else LastValue
+    return _.copy(func=klass(_.func.arg))
+
+
+def complexity(node):
+    """Assign a complexity score to a node.
+
+    Subsequent projections can be merged into a single projection by replacing
+    the fields referenced in the outer projection with the computed expressions
+    from the inner projection. This inlining can result in very complex value
+    expressions depending on the projections. In order to prevent excessive
+    inlining, we assign a complexity score to each node.
+
+    The complexity score assigns 1 to each value expression and adds up in the
+    tree hierarchy unless there is a Field node where we don't add up the
+    complexity of the referenced relation. This way we treat fields kind of like
+    reusable variables considering them less complex than they were inlined.
     """
 
-    def predicate(node):
-        if isinstance(node, ops.And):
-            # proceed and don't yield the node
-            return True, None
+    def accum(node, *args):
+        if isinstance(node, ops.Field):
+            return 1
         else:
-            # halt and yield the node
-            return False, node
-
-    return list(traverse(predicate, node))
-
-
-@replace(p.Field(p.JoinChain))
-def peel_join_field(_):
-    return _.rel.values[_.name]
-
-
-@replace(p.ScalarParameter)
-def replace_parameter(_, params, **kwargs):
-    """Replace scalar parameters with their values."""
-    return ops.Literal(value=params[_], dtype=_.dtype)
-
-
-@replace(p.StringSlice)
-def lower_stringslice(_, **kwargs):
-    """Rewrite StringSlice in terms of Substring."""
-    if _.end is None:
-        return ops.Substring(_.arg, start=_.start)
-    if _.start is None:
-        return ops.Substring(_.arg, start=0, length=_.end)
-    if (
-        isinstance(_.start, ops.Literal)
-        and isinstance(_.start.value, int)
-        and isinstance(_.end, ops.Literal)
-        and isinstance(_.end.value, int)
-    ):
-        # optimization for constant values
-        length = _.end.value - _.start.value
-    else:
-        length = ops.Subtract(_.end, _.start)
-    return ops.Substring(_.arg, start=_.start, length=length)
+            return 1 + sum(args)
 
+    return node.map_nodes(accum)[node]
 
-@replace(p.Analytic)
-def wrap_analytic(_, **__):
-    # Wrap analytic functions in a window function
-    return ops.WindowFunction(_)
 
+@replace(Object(Select, Object(Select)))
+def merge_select_select(_, **kwargs):
+    """Merge subsequent Select relations into one.
 
-@replace(p.Reduction)
-def project_wrap_reduction(_, rel):
-    # Query all the tables that the reduction depends on
-    if _.relations == {rel}:
-        # The reduction is fully originating from the `rel`, so turn
-        # it into a window function of `rel`
-        return ops.WindowFunction(_)
-    else:
-        # 1. The reduction doesn't depend on any table, constructed from
-        #    scalar values, so turn it into a scalar subquery.
-        # 2. The reduction is originating from `rel` and other tables,
-        #    so this is a correlated scalar subquery.
-        # 3. The reduction is originating entirely from other tables,
-        #    so this is an uncorrelated scalar subquery.
-        return ops.ScalarSubquery(_.to_expr().as_table())
-
-
-def rewrite_project_input(value, relation):
-    # we need to detect reductions which are either turned into window functions
-    # or scalar subqueries depending on whether they are originating from the
-    # relation
-    return value.replace(
-        wrap_analytic | project_wrap_reduction,
-        filter=p.Value & ~p.WindowFunction,
-        context={"rel": relation},
+    This rewrites eliminates `_.parent` by merging the outer and the inner
+    `predicates`, `sort_keys` and keeping the outer `selections`. All selections
+    from the inner Select are inlined into the outer Select.
+    """
+    # don't merge if either the outer or the inner select has window functions
+    blocking = (
+        ops.WindowFunction,
+        ops.ExistsSubquery,
+        ops.InSubquery,
+        ops.Unnest,
+        ops.Impure,
     )
+    if _.find_below(blocking, filter=ops.Value):
+        return _
+    if _.parent.find_below(blocking, filter=ops.Value):
+        return _
 
+    subs = {ops.Field(_.parent, k): v for k, v in _.parent.values.items()}
+    selections = {k: v.replace(subs, filter=ops.Value) for k, v in _.selections.items()}
 
-ReductionLike = p.Reduction | p.Field(p.Aggregate(groups={}))
+    predicates = tuple(p.replace(subs, filter=ops.Value) for p in _.predicates)
+    unique_predicates = toolz.unique(_.parent.predicates + predicates)
 
+    qualified = tuple(p.replace(subs, filter=ops.Value) for p in _.qualified)
+    unique_qualified = toolz.unique(_.parent.qualified + qualified)
 
-@replace(ReductionLike)
-def filter_wrap_reduction(_):
-    # Wrap reductions or fields referencing an aggregation without a group by -
-    # which are scalar fields - in a scalar subquery. In the latter case we
-    # use the reduction value from the aggregation.
-    if isinstance(_, ops.Field):
-        value = _.rel.values[_.name]
-    else:
-        value = _
-    return ops.ScalarSubquery(value.to_expr().as_table())
+    sort_keys = tuple(s.replace(subs, filter=ops.Value) for s in _.sort_keys)
+    sort_key_exprs = {s.expr for s in sort_keys}
+    parent_sort_keys = tuple(
+        k for k in _.parent.sort_keys if k.expr not in sort_key_exprs
+    )
+    unique_sort_keys = sort_keys + parent_sort_keys
+
+    result = Select(
+        _.parent.parent,
+        selections=selections,
+        predicates=unique_predicates,
+        qualified=unique_qualified,
+        sort_keys=unique_sort_keys,
+    )
+    return result if complexity(result) <= complexity(_) else _
 
 
-def rewrite_filter_input(value):
-    return value.replace(
-        wrap_analytic | filter_wrap_reduction, filter=p.Value & ~p.WindowFunction
+def extract_ctes(node: ops.Relation) -> set[ops.Relation]:
+    cte_types = (Select, ops.Aggregate, ops.JoinChain, ops.Set, ops.Limit, ops.Sample)
+    dont_count = (ops.Field, ops.CountStar, ops.CountDistinctStar)
+
+    g = Graph.from_bfs(node, filter=~InstanceOf(dont_count))
+    result = set()
+    for op, dependents in g.invert().items():
+        if isinstance(op, ops.View) or (
+            len(dependents) > 1 and isinstance(op, cte_types)
+        ):
+            result.add(op)
+
+    return result
+
+
+def sqlize(
+    node: ops.Node,
+    params: Mapping[ops.ScalarParameter, Any],
+    rewrites: Sequence[Pattern] = (),
+    fuse_selects: bool = True,
+) -> tuple[ops.Node, list[ops.Node]]:
+    """Lower the ibis expression graph to a SQL-like relational algebra.
+
+    Parameters
+    ----------
+    node
+        The root node of the expression graph.
+    params
+        A mapping of scalar parameters to their values.
+    rewrites
+        Supplementary rewrites to apply to the expression graph.
+    fuse_selects
+        Whether to merge subsequent Select nodes into one where possible.
+
+    Returns
+    -------
+    Tuple of the rewritten expression graph and a list of CTEs.
+
+    """
+    assert isinstance(node, ops.Relation)
+
+    # apply the backend specific rewrites
+    if rewrites:
+        node = node.replace(reduce(operator.or_, rewrites))
+
+    # lower the expression graph to a SQL-like relational algebra
+    context = {"params": params}
+    replacements = (
+        replace_parameter | project_to_select | filter_to_select | sort_to_select
     )
 
+    if hasattr(p, "FillNull"):
+        replacements = replacements | fill_null_to_select
+
+    if hasattr(p, "DropNull"):
+        replacements = replacements | drop_null_to_select
 
-@replace(p.Analytic | p.Reduction)
-def window_wrap_reduction(_, window):
-    # Wrap analytic and reduction functions in a window function. Used in the
-    # value.over() API.
-    return ops.WindowFunction(
-        _,
-        how=window.how,
-        start=window.start,
-        end=window.end,
-        group_by=window.groupings,
-        order_by=window.orderings,
+    if hasattr(p, "DropColumns"):
+        replacements = replacements | drop_columns_to_select
+
+    replacements = replacements | first_to_firstvalue
+    sqlized = node.replace(
+        replacements,
+        context=context,
     )
 
+    # squash subsequent Select nodes into one
+    if fuse_selects:
+        simplified = sqlized.replace(merge_select_select)
+    else:
+        simplified = sqlized
 
-@replace(p.WindowFunction)
-def window_merge_frames(_, window):
-    # Merge window frames, used in the value.over() and groupby.select() APIs.
-    if _.how != window.how:
-        raise ExpressionError(
-            f"Unable to merge {_.how} window with {window.how} window"
-        )
-    elif _.start and window.start and _.start != window.start:
-        raise ExpressionError(
-            "Unable to merge windows with conflicting `start` boundary"
-        )
-    elif _.end and window.end and _.end != window.end:
-        raise ExpressionError("Unable to merge windows with conflicting `end` boundary")
+    # extract common table expressions while wrapping them in a CTE node
+    ctes = extract_ctes(simplified)
+
+    def wrap(node, _, **kwargs):
+        new = node.__recreate__(kwargs)
+        return CTE(new) if node in ctes else new
+
+    result = simplified.replace(wrap)
+    ctes = reversed([cte.parent for cte in result.find(CTE)])
+
+    return result, ctes
+
+
+# supplemental rewrites selectively used on a per-backend basis
+
+
+@replace(p.WindowFunction(func=p.NTile(y), order_by=()))
+def add_order_by_to_empty_ranking_window_functions(_, **kwargs):
+    """Add an ORDER BY clause to rank window functions that don't have one."""
+    return _.copy(order_by=(y,))
+
+
+"""Replace checks against an empty right side with `False`."""
+empty_in_values_right_side = p.InValues(options=()) >> d.Literal(False, dtype=dt.bool)
 
-    start = _.start or window.start
-    end = _.end or window.end
-    group_by = tuple(toolz.unique(_.group_by + window.groupings))
 
-    order_keys = {}
-    for sort_key in window.orderings + _.order_by:
-        order_keys[sort_key.expr] = sort_key.ascending, sort_key.nulls_first
+@replace(
+    p.WindowFunction(p.RankBase | p.NTile)
+    | p.StringFind
+    | p.FindInSet
+    | p.ArrayPosition
+)
+def one_to_zero_index(_, **kwargs):
+    """Subtract one from one-index functions."""
+    return ops.Subtract(_, 1)
 
-    order_by = (
-        ops.SortKey(expr, ascending=ascending, nulls_first=nulls_first)
-        for expr, (ascending, nulls_first) in order_keys.items()
+
+@replace(ops.NthValue)
+def add_one_to_nth_value_input(_, **kwargs):
+    if isinstance(_.nth, ops.Literal):
+        nth = ops.Literal(_.nth.value + 1, dtype=_.nth.dtype)
+    else:
+        nth = ops.Add(_.nth, 1)
+    return _.copy(nth=nth)
+
+
+@replace(p.WindowFunction(order_by=()))
+def rewrite_empty_order_by_window(_, **kwargs):
+    return _.copy(order_by=(ops.NULL,))
+
+
+@replace(p.WindowFunction(p.RowNumber | p.NTile))
+def exclude_unsupported_window_frame_from_row_number(_, **kwargs):
+    return ops.Subtract(_.copy(start=None, end=0), 1)
+
+
+@replace(p.WindowFunction(p.MinRank | p.DenseRank, start=None))
+def exclude_unsupported_window_frame_from_rank(_, **kwargs):
+    return ops.Subtract(
+        _.copy(start=None, end=0, order_by=_.order_by or (ops.NULL,)), 1
     )
-    return _.copy(start=start, end=end, group_by=group_by, order_by=order_by)
 
 
-def rewrite_window_input(value, window):
-    context = {"window": window}
-    # if self is a reduction or analytic function, wrap it in a window function
-    node = value.replace(
-        window_wrap_reduction,
-        filter=p.Value & ~p.WindowFunction,
-        context=context,
+@replace(
+    p.WindowFunction(
+        p.Lag | p.Lead | p.PercentRank | p.CumeDist | p.Any | p.All, start=None
     )
-    # if self is already a window function, merge the existing window frame
-    # with the requested window frame
-    return node.replace(window_merge_frames, filter=p.Value, context=context)
-
-
-# TODO(kszucs): schema comparison should be updated to not distinguish between
-# different column order
-@replace(p.Project(y @ p.Relation) & Check(_.schema == y.schema))
-def complete_reprojection(_, y):
-    # TODO(kszucs): this could be moved to the pattern itself but not sure how
-    # to express it, especially in a shorter way then the following check
-    for name in _.schema:
-        if _.values[name] != ops.Field(y, name):
-            return _
-    return y
+)
+def exclude_unsupported_window_frame_from_ops(_, **kwargs):
+    return _.copy(start=None, end=0, order_by=_.order_by or (ops.NULL,))
+
+
+# Rewrite rules for lowering a high-level operation into one composed of more
+# primitive operations.
+
+
+@replace(p.Log2)
+def lower_log2(_, **kwargs):
+    """Rewrite `log2` as `log`."""
+    return ops.Log(_.arg, base=2)
 
 
-@replace(p.Project(y @ p.Project))
-def subsequent_projects(_, y):
-    rule = p.Field(y, name) >> Item(y.values, name)
-    values = {k: v.replace(rule, filter=ops.Value) for k, v in _.values.items()}
-    return ops.Project(y.parent, values)
+@replace(p.Log10)
+def lower_log10(_, **kwargs):
+    """Rewrite `log10` as `log`."""
+    return ops.Log(_.arg, base=10)
 
 
-@replace(p.Filter(y @ p.Filter))
-def subsequent_filters(_, y):
-    rule = p.Field(y, name) >> d.Field(y.parent, name)
-    preds = tuple(v.replace(rule, filter=ops.Value) for v in _.predicates)
-    return ops.Filter(y.parent, y.predicates + preds)
+@replace(p.Bucket)
+def lower_bucket(_, **kwargs):
+    """Rewrite `Bucket` as `SearchedCase`."""
+    cases = []
+    results = []
 
+    if _.closed == "left":
+        l_cmp = ops.LessEqual
+        r_cmp = ops.Less
+    else:
+        l_cmp = ops.Less
+        r_cmp = ops.LessEqual
+
+    user_num_buckets = len(_.buckets) - 1
+
+    bucket_id = 0
+    if _.include_under:
+        if user_num_buckets > 0:
+            cmp = ops.Less if _.close_extreme else r_cmp
+        else:
+            cmp = ops.LessEqual if _.closed == "right" else ops.Less
+        cases.append(cmp(_.arg, _.buckets[0]))
+        results.append(bucket_id)
+        bucket_id += 1
+
+    for j, (lower, upper) in enumerate(zip(_.buckets, _.buckets[1:])):
+        if _.close_extreme and (
+            (_.closed == "right" and j == 0)
+            or (_.closed == "left" and j == (user_num_buckets - 1))
+        ):
+            cases.append(
+                ops.And(ops.LessEqual(lower, _.arg), ops.LessEqual(_.arg, upper))
+            )
+            results.append(bucket_id)
+        else:
+            cases.append(ops.And(l_cmp(lower, _.arg), r_cmp(_.arg, upper)))
+            results.append(bucket_id)
+        bucket_id += 1
 
-@replace(p.Filter(y @ p.Project))
-def reorder_filter_project(_, y):
-    rule = p.Field(y, name) >> Item(y.values, name)
-    preds = tuple(v.replace(rule, filter=ops.Value) for v in _.predicates)
+    if _.include_over:
+        if user_num_buckets > 0:
+            cmp = ops.Less if _.close_extreme else l_cmp
+        else:
+            cmp = ops.Less if _.closed == "right" else ops.LessEqual
+
+        cases.append(cmp(_.buckets[-1], _.arg))
+        results.append(bucket_id)
+        bucket_id += 1
+
+    return ops.SearchedCase(
+        cases=tuple(cases), results=tuple(results), default=ops.NULL
+    )
 
-    inner = ops.Filter(y.parent, preds)
-    rule = p.Field(y.parent, name) >> d.Field(inner, name)
-    projs = {k: v.replace(rule, filter=ops.Value) for k, v in y.values.items()}
 
-    return ops.Project(inner, projs)
+@replace(p.Capitalize)
+def lower_capitalize(_, **kwargs):
+    """Rewrite Capitalize in terms of substring, concat, upper, and lower."""
+    first = ops.Uppercase(ops.Substring(_.arg, start=0, length=1))
+    # use length instead of length - 1 to avoid backends complaining about
+    # asking for negative length
+    #
+    # there are at most length - 1 characters, so asking for length is fine
+    rest = ops.Lowercase(ops.Substring(_.arg, start=1, length=ops.StringLength(_.arg)))
+    return ops.StringConcat((first, rest))
 
 
-def simplify(node):
-    # TODO(kszucs): add a utility to the graph module to do rewrites in multiple
-    # passes after each other
-    node = node.replace(reorder_filter_project)
-    node = node.replace(reorder_filter_project)
-    node = node.replace(subsequent_projects | subsequent_filters)
-    node = node.replace(complete_reprojection)
-    return node
+@replace(p.Sample)
+def lower_sample(_, **kwargs):
+    """Rewrite Sample as `t.filter(random() <= fraction)`.
+
+    Errors as unsupported if a `seed` is specified.
+    """
+    if _.seed is not None:
+        raise com.UnsupportedOperationError(
+            "`Table.sample` with a random seed is unsupported"
+        )
+    return ops.Filter(_.parent, (ops.LessEqual(ops.RandomScalar(), _.fraction),))
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/__init__.py b/third_party/bigframes_vendored/ibis/expr/operations/__init__.py
index 3ae5fc10e4..2c2efe528d 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/__init__.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/__init__.py
@@ -2,6 +2,5 @@
 from __future__ import annotations
 
 from bigframes_vendored.ibis.expr.operations.analytic import *  # noqa: F401 F403
-from bigframes_vendored.ibis.expr.operations.arrays import *  # noqa: F401 F403
 from bigframes_vendored.ibis.expr.operations.json import *  # noqa: F401 F403
 from bigframes_vendored.ibis.expr.operations.reductions import *  # noqa: F401 F403
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/arrays.py b/third_party/bigframes_vendored/ibis/expr/operations/arrays.py
deleted file mode 100644
index a0ad915a9b..0000000000
--- a/third_party/bigframes_vendored/ibis/expr/operations/arrays.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Contains code from https://github.com/ibis-project/ibis/blob/master/ibis/expr/operations/arrays.py
-from __future__ import annotations
-
-import ibis.expr.datatypes as dt
-from ibis.expr.operations.core import Unary
-
-
-class GenerateArray(Unary):
-    """
-    Generates an array of values, similar to ibis.range(), but with simpler and
-    more efficient SQL generation.
-    """
-
-    dtype = dt.Array(dt.int64)
-
-
-class SafeCastToDatetime(Unary):
-    dtype = dt.Timestamp(timezone=None)
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/json.py b/third_party/bigframes_vendored/ibis/expr/operations/json.py
index 1eb0554137..ea1f766a71 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/json.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/json.py
@@ -5,5 +5,7 @@
 import ibis.expr.operations.core as ibis_ops_core
 
 
+# TODO(swast): Remove once supported upstream.
+# See: https://github.com/ibis-project/ibis/issues/9542
 class ToJsonString(ibis_ops_core.Unary):
     dtype = dt.string
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/reductions.py b/third_party/bigframes_vendored/ibis/expr/operations/reductions.py
index bd971e408a..a428c73449 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/reductions.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/reductions.py
@@ -9,17 +9,6 @@
 from ibis.expr.operations.reductions import Filterable, Reduction
 
 
-class ApproximateMultiQuantile(Filterable, Reduction):
-    """Calculate (approximately) evenly-spaced quantiles.
-
-    See: https://cloud.google.com/bigquery/docs/reference/standard-sql/approximate_aggregate_functions#approx_quantiles
-    """
-
-    arg: ibis_ops_core.Value
-    num_bins: ibis_ops_core.Value[dt.Int64]
-    dtype = dt.Array(dt.float64)
-
-
 class ArrayAggregate(Filterable, Reduction):
     """
     Collects the elements of this expression into an ordered array. Similar to
@@ -34,4 +23,4 @@ def dtype(self):
         return dt.Array(self.arg.dtype)
 
 
-__all__ = ["ApproximateMultiQuantile", "ArrayAggregate"]
+__all__ = ["ArrayAggregate"]
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index fe1c8a12ff..e7f555c729 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -544,7 +544,7 @@ def to_dict(
 
             >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
             >>> df.to_dict()
-            {'col1': {0: 1, 1: 2}, 'col2': {0: 3, 1: 4}}
+            {'col1': {np.int64(0): 1, np.int64(1): 2}, 'col2': {np.int64(0): 3, np.int64(1): 4}}
 
         You can specify the return orientation.
 
@@ -1773,7 +1773,7 @@ def iterrows(self):
             ...     })
             >>> index, row = next(df.iterrows())
             >>> index
-            0
+            np.int64(0)
             >>> row
             A    1
             B    4
@@ -1794,7 +1794,7 @@ def itertuples(self, index: bool = True, name: str | None = "Pandas"):
             ...     'B': [4, 5, 6],
             ...     })
             >>> next(df.itertuples(name="Pair"))
-            Pair(Index=0, A=1, B=4)
+            Pair(Index=np.int64(0), A=np.int64(1), B=np.int64(4))
 
         Args:
             index (bool, default True):
@@ -6486,12 +6486,12 @@ def iat(self):
         Get value at specified row/column pair
 
             >>> df.iat[1, 2]
-            1
+            np.int64(1)
 
         Get value within a series
 
             >>> df.loc[0].iat[1]
-            2
+            np.int64(2)
 
         Returns:
             bigframes.core.indexers.IatDataFrameIndexer: Indexers object.
@@ -6519,12 +6519,12 @@ def at(self):
         Get value at specified row/column pair
 
             >>> df.at[4, 'B']
-            2
+            np.int64(2)
 
         Get value within a series
 
             >>> df.loc[5].at['B']
-            4
+            np.int64(4)
 
         Returns:
             bigframes.core.indexers.AtDataFrameIndexer: Indexers object.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index a30ed9cd92..a6363e3285 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -205,7 +205,7 @@ def hasnans(self) -> bool:
             3    <NA>
             dtype: Float64
             >>> s.hasnans
-            True
+            np.True_
 
         Returns:
             bool
@@ -592,7 +592,7 @@ def agg(self, func):
             dtype: Int64
 
             >>> s.agg('min')
-            1
+            np.int64(1)
 
             >>> s.agg(['min', 'max'])
             min    1
@@ -626,7 +626,7 @@ def count(self):
             2    <NA>
             dtype: Float64
             >>> s.count()
-            2
+            np.int64(2)
 
         Returns:
             int or Series (if level specified): Number of non-null values in the
@@ -834,12 +834,12 @@ def corr(self, other, method="pearson", min_periods=None) -> float:
             >>> s1 = bpd.Series([.2, .0, .6, .2])
             >>> s2 = bpd.Series([.3, .6, .0, .1])
             >>> s1.corr(s2)
-            -0.8510644963469901
+            np.float64(-0.8510644963469901)
 
             >>> s1 = bpd.Series([1, 2, 3], index=[0, 1, 2])
             >>> s2 = bpd.Series([1, 2, 3], index=[2, 1, 0])
             >>> s1.corr(s2)
-            -1.0
+            np.float64(-1.0)
 
         Args:
             other (Series):
@@ -870,15 +870,15 @@ def autocorr(self, lag: int = 1) -> float:
 
             >>> s = bpd.Series([0.25, 0.5, 0.2, -0.05])
             >>> s.autocorr()  # doctest: +ELLIPSIS
-            0.10355...
+            np.float64(0.10355263309024067)
             >>> s.autocorr(lag=2)
-            -1.0
+            np.float64(-1.0)
 
             If the Pearson correlation is not well defined, then 'NaN' is returned.
 
             >>> s = bpd.Series([1, 0, 0, 0])
             >>> s.autocorr()
-            nan
+            np.float64(nan)
 
         Args:
             lag (int, default 1):
@@ -951,12 +951,12 @@ def dot(self, other) -> Series | np.ndarray:
             >>> s = bpd.Series([0, 1, 2, 3])
             >>> other = bpd.Series([-1, 2, -3, 4])
             >>> s.dot(other)
-            8
+            np.int64(8)
 
         You can also use the operator ``@`` for the dot product:
 
             >>> s @ other
-            8
+            np.int64(8)
 
         Args:
             other (Series):
@@ -3080,7 +3080,7 @@ def max(
             1    3
             dtype: Int64
             >>> s.max()
-            3
+            np.int64(3)
 
         Calculating the max of a Series containing ``NA`` values:
 
@@ -3091,7 +3091,7 @@ def max(
             2    <NA>
             dtype: Int64
             >>> s.max()
-            3
+            np.int64(3)
 
         Returns:
             scalar: Scalar.
@@ -3120,7 +3120,7 @@ def min(
             1    3
             dtype: Int64
             >>> s.min()
-            1
+            np.int64(1)
 
         Calculating the min of a Series containing ``NA`` values:
 
@@ -3131,7 +3131,7 @@ def min(
             2    <NA>
             dtype: Int64
             >>> s.min()
-            1
+            np.int64(1)
 
         Returns:
             scalar: Scalar.
@@ -3207,7 +3207,7 @@ def sum(self):
             1    3
             dtype: Int64
             >>> s.sum()
-            4
+            np.int64(4)
 
         Calculating the sum of a Series containing ``NA`` values:
 
@@ -3218,7 +3218,7 @@ def sum(self):
             2    <NA>
             dtype: Int64
             >>> s.sum()
-            4
+            np.int64(4)
 
         Returns:
             scalar: Scalar.
@@ -3241,7 +3241,7 @@ def mean(self):
             1    3
             dtype: Int64
             >>> s.mean()
-            2.0
+            np.float64(2.0)
 
         Calculating the mean of a Series containing ``NA`` values:
 
@@ -3252,7 +3252,7 @@ def mean(self):
             2    <NA>
             dtype: Int64
             >>> s.mean()
-            2.0
+            np.float64(2.0)
 
         Returns:
             scalar: Scalar.
@@ -3285,7 +3285,7 @@ def quantile(
             >>> bpd.options.display.progress_bar = None
             >>> s = bpd.Series([1, 2, 3, 4])
             >>> s.quantile(.5)
-            2.5
+            np.float64(2.5)
             >>> s.quantile([.25, .5, .75])
             0.25    1.75
             0.5      2.5
@@ -3576,10 +3576,10 @@ def argmax(self):
             dtype: Float64
 
             >>> s.argmax()
-            2
+            np.int64(2)
 
             >>> s.argmin()
-            0
+            np.int64(0)
 
         The maximum cereal calories is the third element and the minimum cereal
         calories is the first element, since series is zero-indexed.
@@ -3612,10 +3612,10 @@ def argmin(self):
             dtype: Float64
 
             >>> s.argmax()
-            2
+            np.int64(2)
 
             >>> s.argmin()
-            0
+            np.int64(0)
 
         The maximum cereal calories is the third element and the minimum cereal
         calories is the first element, since series is zero-indexed.
@@ -3887,11 +3887,11 @@ def is_monotonic_increasing(self) -> bool:
 
             >>> s = bpd.Series([1, 2, 2])
             >>> s.is_monotonic_increasing
-            True
+            np.True_
 
             >>> s = bpd.Series([3, 2, 1])
             >>> s.is_monotonic_increasing
-            False
+            np.False_
 
         Returns:
             bool: Boolean.
@@ -3910,11 +3910,11 @@ def is_monotonic_decreasing(self) -> bool:
 
             >>> s = bpd.Series([3, 2, 2, 1])
             >>> s.is_monotonic_decreasing
-            True
+            np.True_
 
             >>> s = bpd.Series([1, 2, 3])
             >>> s.is_monotonic_decreasing
-            False
+            np.False_
 
         Returns:
             bool: Boolean.
@@ -4041,7 +4041,7 @@ def iat(self):
         Get value at specified row number
 
             >>> s.iat[1]
-            2
+            np.int64(2)
 
         Returns:
             bigframes.core.indexers.IatSeriesIndexer: Indexers object.
@@ -4066,7 +4066,7 @@ def at(self):
         Get value at specified row label
 
             >>> s.at['B']
-            2
+            np.int64(2)
 
 
         Returns:
@@ -4314,7 +4314,7 @@ def __getitem__(self, indexer):
 
             >>> s = bpd.Series([15, 30, 45])
             >>> s[1]
-            30
+            np.int64(30)
             >>> s[0:2]
             0    15
             1    30
diff --git a/third_party/bigframes_vendored/sklearn/metrics/_classification.py b/third_party/bigframes_vendored/sklearn/metrics/_classification.py
index 8e8b2c1952..c1a909e849 100644
--- a/third_party/bigframes_vendored/sklearn/metrics/_classification.py
+++ b/third_party/bigframes_vendored/sklearn/metrics/_classification.py
@@ -36,13 +36,13 @@ def accuracy_score(y_true, y_pred, normalize=True) -> float:
         >>> y_pred = bpd.DataFrame([0, 1, 2, 3])
         >>> accuracy_score = bigframes.ml.metrics.accuracy_score(y_true, y_pred)
         >>> accuracy_score
-        0.5
+        np.float64(0.5)
 
     If False, return the number of correctly classified samples:
 
         >>> accuracy_score = bigframes.ml.metrics.accuracy_score(y_true, y_pred, normalize=False)
         >>> accuracy_score
-        2
+        np.int64(2)
 
     Args:
         y_true (Series or DataFrame of shape (n_samples,)):
diff --git a/third_party/bigframes_vendored/sklearn/metrics/_ranking.py b/third_party/bigframes_vendored/sklearn/metrics/_ranking.py
index dee8b350c0..7b97526de2 100644
--- a/third_party/bigframes_vendored/sklearn/metrics/_ranking.py
+++ b/third_party/bigframes_vendored/sklearn/metrics/_ranking.py
@@ -37,7 +37,7 @@ def auc(x, y) -> float:
         >>> y = bpd.DataFrame([2, 3, 4, 5])
         >>> auc = bigframes.ml.metrics.auc(x, y)
         >>> auc
-        3.5
+        np.float64(3.5)
 
         The input can be Series:
 
@@ -47,7 +47,7 @@ def auc(x, y) -> float:
         ... )
         >>> auc = bigframes.ml.metrics.auc(df["x"], df["y"])
         >>> auc
-        3.5
+        np.float64(3.5)
 
 
     Args:
@@ -77,7 +77,7 @@ def roc_auc_score(y_true, y_score) -> float:
         >>> y_score = bpd.DataFrame([0.1, 0.4, 0.35, 0.8, 0.65, 0.9, 0.5, 0.3, 0.6, 0.45])
         >>> roc_auc_score = bigframes.ml.metrics.roc_auc_score(y_true, y_score)
         >>> roc_auc_score
-        0.625
+        np.float64(0.625)
 
     The input can be Series:
 
@@ -87,7 +87,7 @@ def roc_auc_score(y_true, y_score) -> float:
         ... )
         >>> roc_auc_score = bigframes.ml.metrics.roc_auc_score(df["y_true"], df["y_score"])
         >>> roc_auc_score
-        0.625
+        np.float64(0.625)
 
     Args:
         y_true (Series or DataFrame of shape (n_samples,)):
diff --git a/third_party/bigframes_vendored/sklearn/metrics/_regression.py b/third_party/bigframes_vendored/sklearn/metrics/_regression.py
index c3e579bd29..56f78c6d0b 100644
--- a/third_party/bigframes_vendored/sklearn/metrics/_regression.py
+++ b/third_party/bigframes_vendored/sklearn/metrics/_regression.py
@@ -52,7 +52,7 @@ def r2_score(y_true, y_pred, force_finite=True) -> float:
         >>> y_pred = bpd.DataFrame([2.5, 0.0, 2, 8])
         >>> r2_score = bigframes.ml.metrics.r2_score(y_true, y_pred)
         >>> r2_score
-        0.9486081370449679
+        np.float64(0.9486081370449679)
 
     Args:
         y_true (Series or DataFrame of shape (n_samples,)):
@@ -79,7 +79,7 @@ def mean_squared_error(y_true, y_pred) -> float:
         >>> y_pred = bpd.DataFrame([2.5, 0.0, 2, 8])
         >>> mse = bigframes.ml.metrics.mean_squared_error(y_true, y_pred)
         >>> mse
-        0.375
+        np.float64(0.375)
 
     Args:
         y_true (Series or DataFrame of shape (n_samples,)):

From 596b03bb3ea27cead9b90200b9ef3cdcd99ca184 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@users.noreply.github.com>
Date: Fri, 13 Sep 2024 15:54:22 -0700
Subject: [PATCH 09/18] fix: Fix a bug that raises exception when re-indexing
 columns with their original order (#988)

---
 bigframes/dataframe.py               |  5 +++++
 tests/system/small/test_dataframe.py | 15 +++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index d7738a569d..29f22c28b9 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1913,6 +1913,11 @@ def _reindex_rows(
     def _reindex_columns(self, columns):
         block = self._block
         new_column_index, indexer = self.columns.reindex(columns)
+
+        if indexer is None:
+            # The new index is the same as the old one. Do nothing.
+            return self
+
         result_cols = []
         for label, index in zip(columns, indexer):
             if index >= 0:
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 9e046dc62e..b9291085fa 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3664,6 +3664,21 @@ def test_df_reindex_columns(scalars_df_index, scalars_pandas_df_index):
     )
 
 
+def test_df_reindex_columns_with_same_order(scalars_df_index, scalars_pandas_df_index):
+    # First, make sure the two dataframes have the same columns in order.
+    columns = ["int64_col", "int64_too"]
+    bf = scalars_df_index[columns]
+    pd_df = scalars_pandas_df_index[columns]
+
+    bf_result = bf.reindex(columns=columns).to_pandas()
+    pd_result = pd_df.reindex(columns=columns)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_df_equals_identical(scalars_df_index, scalars_pandas_df_index):
     unsupported = [
         "geography_col",

From deac6d2d6e459b26c05f6e5ff328ea03a3cff45f Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@users.noreply.github.com>
Date: Mon, 16 Sep 2024 11:09:11 -0700
Subject: [PATCH 10/18] feat: add "include" param to describe for string types
 (#973)

---
 bigframes/dataframe.py                     | 88 ++++++++++++++++-----
 bigframes/operations/aggregations.py       | 11 +++
 tests/system/small/test_dataframe.py       | 81 +++++++++++++++++++
 tests/unit/operations/__init__.py          | 13 +++
 tests/unit/operations/test_aggregations.py | 92 ++++++++++++++++++++++
 5 files changed, 267 insertions(+), 18 deletions(-)
 create mode 100644 tests/unit/operations/__init__.py
 create mode 100644 tests/unit/operations/test_aggregations.py

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 29f22c28b9..c4597ab843 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -18,6 +18,7 @@
 
 import datetime
 import inspect
+import itertools
 import re
 import sys
 import textwrap
@@ -70,6 +71,7 @@
 import bigframes.exceptions
 import bigframes.formatting_helpers as formatter
 import bigframes.operations as ops
+import bigframes.operations.aggregations
 import bigframes.operations.aggregations as agg_ops
 import bigframes.operations.plotting as plotting
 import bigframes.operations.structs
@@ -2207,14 +2209,17 @@ def agg(
         self, func: str | typing.Sequence[str]
     ) -> DataFrame | bigframes.series.Series:
         if utils.is_list_like(func):
-            if any(
-                dtype not in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
-                for dtype in self.dtypes
-            ):
-                raise NotImplementedError(
-                    f"Multiple aggregations only supported on numeric columns. {constants.FEEDBACK_LINK}"
-                )
             aggregations = [agg_ops.lookup_agg_func(f) for f in func]
+
+            for dtype, agg in itertools.product(self.dtypes, aggregations):
+                if not bigframes.operations.aggregations.is_agg_op_supported(
+                    dtype, agg
+                ):
+                    raise NotImplementedError(
+                        f"Type {dtype} does not support aggregation {agg}. "
+                        f"Share your usecase with the BigQuery DataFrames team at the {constants.FEEDBACK_LINK}"
+                    )
+
             return DataFrame(
                 self._block.summarize(
                     self._block.value_columns,
@@ -2280,16 +2285,55 @@ def melt(
             self._block.melt(id_col_ids, val_col_ids, var_name, value_name)
         )
 
-    def describe(self) -> DataFrame:
-        df_numeric = self._drop_non_numeric(permissive=False)
-        if len(df_numeric.columns) == 0:
-            raise NotImplementedError(
-                f"df.describe() currently only supports numeric values. {constants.FEEDBACK_LINK}"
+    _NUMERICAL_DISCRIBE_AGGS = (
+        "count",
+        "mean",
+        "std",
+        "min",
+        "25%",
+        "50%",
+        "75%",
+        "max",
+    )
+    _NON_NUMERICAL_DESCRIBE_AGGS = ("count", "nunique")
+
+    def describe(self, include: None | Literal["all"] = None) -> DataFrame:
+        if include is None:
+            numeric_df = self._drop_non_numeric(permissive=False)
+            if len(numeric_df.columns) == 0:
+                # Describe eligible non-numerical columns
+                result = self._drop_non_string().agg(self._NON_NUMERICAL_DESCRIBE_AGGS)
+            else:
+                # Otherwise, only describe numerical columns
+                result = numeric_df.agg(self._NUMERICAL_DISCRIBE_AGGS)
+            return typing.cast(DataFrame, result)
+
+        elif include == "all":
+            numeric_result = typing.cast(
+                DataFrame,
+                self._drop_non_numeric(permissive=False).agg(
+                    self._NUMERICAL_DISCRIBE_AGGS
+                ),
+            )
+            string_result = typing.cast(
+                DataFrame,
+                self._drop_non_string().agg(self._NON_NUMERICAL_DESCRIBE_AGGS),
             )
-        result = df_numeric.agg(
-            ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]
-        )
-        return typing.cast(DataFrame, result)
+
+            if len(numeric_result.columns) == 0:
+                return string_result
+            elif len(string_result.columns) == 0:
+                return numeric_result
+            else:
+                import bigframes.core.reshape as rs
+
+                # Use reindex after join to preserve the original column order.
+                return rs.concat(
+                    [numeric_result, string_result], axis=1
+                )._reindex_columns(self.columns)
+
+        else:
+            raise ValueError(f"Unsupported include type: {include}")
 
     def skew(self, *, numeric_only: bool = False):
         if not numeric_only:
@@ -2487,7 +2531,7 @@ def unstack(self, level: LevelsType = -1):
         return DataFrame(pivot_block)
 
     def _drop_non_numeric(self, permissive=True) -> DataFrame:
-        types_to_keep = (
+        numerical_types = (
             set(bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE)
             if permissive
             else set(bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_RESTRICTIVE)
@@ -2495,10 +2539,18 @@ def _drop_non_numeric(self, permissive=True) -> DataFrame:
         non_numeric_cols = [
             col_id
             for col_id, dtype in zip(self._block.value_columns, self._block.dtypes)
-            if dtype not in types_to_keep
+            if dtype not in numerical_types
         ]
         return DataFrame(self._block.drop_columns(non_numeric_cols))
 
+    def _drop_non_string(self) -> DataFrame:
+        string_cols = [
+            col_id
+            for col_id, dtype in zip(self._block.value_columns, self._block.dtypes)
+            if dtype == bigframes.dtypes.STRING_DTYPE
+        ]
+        return DataFrame(self._block.select_columns(string_cols))
+
     def _drop_non_bool(self) -> DataFrame:
         non_bool_cols = [
             col_id
diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py
index 675ead1188..f20429e449 100644
--- a/bigframes/operations/aggregations.py
+++ b/bigframes/operations/aggregations.py
@@ -562,3 +562,14 @@ def lookup_agg_func(key: str) -> typing.Union[UnaryAggregateOp, NullaryAggregate
         return _AGGREGATIONS_LOOKUP[key]
     else:
         raise ValueError(f"Unrecognize aggregate function: {key}")
+
+
+def is_agg_op_supported(dtype: dtypes.Dtype, op: AggregateOp) -> bool:
+    if dtype in dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE:
+        return True
+
+    if dtype == dtypes.STRING_DTYPE:
+        return isinstance(op, (CountOp, NuniqueOp))
+
+    # For all other types, support no aggregation
+    return False
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index b9291085fa..fe63a1ed28 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -2612,6 +2612,87 @@ def test_df_describe(scalars_dfs):
     ).all()
 
 
+@skip_legacy_pandas
+@pytest.mark.parametrize("include", [None, "all"])
+def test_df_describe_non_numerical(scalars_dfs, include):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    non_numerical_columns = ["string_col"]
+
+    modified_bf = scalars_df[non_numerical_columns]
+    bf_result = modified_bf.describe(include=include).to_pandas()
+
+    modified_pd_df = scalars_pandas_df[non_numerical_columns]
+    pd_result = modified_pd_df.describe(include=include)
+
+    # Reindex results with the specified keys and their order, because
+    # the relative order is not important.
+    bf_result = bf_result.reindex(["count", "nunique"])
+    pd_result = pd_result.reindex(
+        ["count", "unique"]
+        # BF counter part of "unique" is called "nunique"
+    ).rename(index={"unique": "nunique"})
+
+    pd.testing.assert_frame_equal(
+        pd_result[non_numerical_columns].astype("Int64"),
+        bf_result[non_numerical_columns],
+        check_index_type=False,
+    )
+
+
+@skip_legacy_pandas
+def test_df_describe_mixed_types_include_all(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    numerical_columns = [
+        "int64_col",
+        "float64_col",
+    ]
+    non_numerical_columns = ["string_col"]
+    supported_columns = numerical_columns + non_numerical_columns
+
+    modified_bf = scalars_df[supported_columns]
+    bf_result = modified_bf.describe(include="all").to_pandas()
+
+    modified_pd_df = scalars_pandas_df[supported_columns]
+    pd_result = modified_pd_df.describe(include="all")
+
+    # Drop quartiles, as they are approximate
+    bf_min = bf_result.loc["min", :]
+    bf_p25 = bf_result.loc["25%", :]
+    bf_p50 = bf_result.loc["50%", :]
+    bf_p75 = bf_result.loc["75%", :]
+    bf_max = bf_result.loc["max", :]
+
+    # Reindex results with the specified keys and their order, because
+    # the relative order is not important.
+    bf_result = bf_result.reindex(["count", "nunique", "mean", "std", "min", "max"])
+    pd_result = pd_result.reindex(
+        ["count", "unique", "mean", "std", "min", "max"]
+        # BF counter part of "unique" is called "nunique"
+    ).rename(index={"unique": "nunique"})
+
+    pd.testing.assert_frame_equal(
+        pd_result[numerical_columns].astype("Float64"),
+        bf_result[numerical_columns],
+        check_index_type=False,
+    )
+
+    pd.testing.assert_frame_equal(
+        pd_result[non_numerical_columns].astype("Int64"),
+        bf_result[non_numerical_columns],
+        check_index_type=False,
+    )
+
+    # Double-check that quantiles are at least plausible.
+    assert (
+        (bf_min <= bf_p25)
+        & (bf_p25 <= bf_p50)
+        & (bf_p50 <= bf_p50)
+        & (bf_p75 <= bf_max)
+    ).all()
+
+
 def test_df_transpose():
     # Include some floats to ensure type coercion
     values = [[0, 3.5, True], [1, 4.5, False], [2, 6.5, None]]
diff --git a/tests/unit/operations/__init__.py b/tests/unit/operations/__init__.py
new file mode 100644
index 0000000000..6d5e14bcf4
--- /dev/null
+++ b/tests/unit/operations/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/unit/operations/test_aggregations.py b/tests/unit/operations/test_aggregations.py
new file mode 100644
index 0000000000..4cb6934c9d
--- /dev/null
+++ b/tests/unit/operations/test_aggregations.py
@@ -0,0 +1,92 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import bigframes.dtypes as dtypes
+from bigframes.operations.aggregations import (
+    all_op,
+    any_op,
+    count_op,
+    dense_rank_op,
+    first_op,
+    is_agg_op_supported,
+    max_op,
+    mean_op,
+    median_op,
+    min_op,
+    nunique_op,
+    product_op,
+    rank_op,
+    size_op,
+    std_op,
+    sum_op,
+    var_op,
+)
+
+_ALL_OPS = set(
+    [
+        size_op,
+        sum_op,
+        mean_op,
+        median_op,
+        product_op,
+        max_op,
+        min_op,
+        std_op,
+        var_op,
+        count_op,
+        nunique_op,
+        rank_op,
+        dense_rank_op,
+        all_op,
+        any_op,
+        first_op,
+    ]
+)
+_STRING_SUPPORTED_OPS = set([count_op, nunique_op])
+
+
+@pytest.mark.parametrize("dtype", dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE)
+@pytest.mark.parametrize("op", _ALL_OPS)
+def test_is_agg_op_supported_numerical_support_all(dtype, op):
+    assert is_agg_op_supported(dtype, op) is True
+
+
+@pytest.mark.parametrize("dtype", [dtypes.STRING_DTYPE])
+@pytest.mark.parametrize("op", _STRING_SUPPORTED_OPS)
+def test_is_agg_op_supported_string_support_ops(dtype, op):
+    assert is_agg_op_supported(dtype, op) is True
+
+
+@pytest.mark.parametrize("dtype", [dtypes.STRING_DTYPE])
+@pytest.mark.parametrize("op", _ALL_OPS - _STRING_SUPPORTED_OPS)
+def test_is_agg_op_supported_string_not_support_ops(dtype, op):
+    assert is_agg_op_supported(dtype, op) is False
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        dtypes.BYTES_DTYPE,
+        dtypes.DATE_DTYPE,
+        dtypes.TIME_DTYPE,
+        dtypes.DATETIME_DTYPE,
+        dtypes.TIMESTAMP_DTYPE,
+        dtypes.GEO_DTYPE,
+    ],
+)
+@pytest.mark.parametrize("op", _ALL_OPS)
+def test_is_agg_op_supported_non_numerical_no_support(dtype, op):
+    assert is_agg_op_supported(dtype, op) is False

From f7c03dcaf7ee4d62497f6653851e390795fc60a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Mon, 16 Sep 2024 14:22:26 -0500
Subject: [PATCH 11/18] feat: add `subset` parameter to `DataFrame.dropna` to
 select which columns to consider (#981)

* feat: add `subset` parameter to `DataFrame.dropna` to select which columns to consider

* fix dropna with subset=None

* refactor: remove circular dependencies preventing local doctest runs

With this change I can once again run

```
pytest --doctest-modules third_party/bigframes_vendored/pandas/core/frame.py
```

Note: having multiple `version.py` files should be fine. release-please
will update all such files it finds.

* fix doctest

* Revert "Merge branch 'tswast-circular-import' into b366248570-dropna-subset"

This reverts commit 57e8335af83558323d4df213854efc1edcdb35f0, reversing
changes made to 197074abc53c5a85c66ec1940e54ea3f2c9c3677.

* Reapply "Merge branch 'tswast-circular-import' into b366248570-dropna-subset"

This reverts commit 0f18294513a2a6a806f5fff4ff38646d4029a884.

* loop over tuple result

---------

Co-authored-by: Huan Chen <142538604+Genesis929@users.noreply.github.com>
---
 bigframes/core/block_transforms.py            | 12 ++++++++--
 bigframes/dataframe.py                        | 22 +++++++++++++++++--
 tests/system/small/test_dataframe.py          | 21 +++++++++++-------
 tests/unit/test_dataframe.py                  |  9 ++++++++
 .../bigframes_vendored/pandas/core/frame.py   | 17 ++++++++++++++
 5 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index 1990647e0a..398c6ab26a 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -15,7 +15,7 @@
 
 import functools
 import typing
-from typing import Sequence
+from typing import Optional, Sequence
 
 import bigframes_vendored.constants as constants
 import pandas as pd
@@ -488,11 +488,19 @@ def dropna(
     block: blocks.Block,
     column_ids: typing.Sequence[str],
     how: typing.Literal["all", "any"] = "any",
+    subset: Optional[typing.Sequence[str]] = None,
 ):
     """
     Drop na entries from block
     """
-    predicates = [ops.notnull_op.as_expr(column_id) for column_id in column_ids]
+    if subset is None:
+        subset = column_ids
+
+    predicates = [
+        ops.notnull_op.as_expr(column_id)
+        for column_id in column_ids
+        if column_id in subset
+    ]
     if len(predicates) == 0:
         return block
     if how == "any":
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index c4597ab843..c2fb9336f3 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2027,8 +2027,9 @@ def dropna(
         self,
         *,
         axis: int | str = 0,
-        inplace: bool = False,
         how: str = "any",
+        subset: typing.Union[None, blocks.Label, Sequence[blocks.Label]] = None,
+        inplace: bool = False,
         ignore_index=False,
     ) -> DataFrame:
         if inplace:
@@ -2040,8 +2041,25 @@ def dropna(
 
         axis_n = utils.get_axis_number(axis)
 
+        if subset is not None and axis_n != 0:
+            raise NotImplementedError(
+                f"subset only supported when axis=0. {constants.FEEDBACK_LINK}"
+            )
+
         if axis_n == 0:
-            result = block_ops.dropna(self._block, self._block.value_columns, how=how)  # type: ignore
+            # subset needs to be converted into column IDs, not column labels.
+            if subset is None:
+                subset_ids = None
+            elif not utils.is_list_like(subset):
+                subset_ids = [id_ for id_ in self._block.label_to_col_id[subset]]
+            else:
+                subset_ids = [
+                    id_
+                    for label in subset
+                    for id_ in self._block.label_to_col_id[label]
+                ]
+
+            result = block_ops.dropna(self._block, self._block.value_columns, how=how, subset=subset_ids)  # type: ignore
             if ignore_index:
                 result = result.reset_index()
             return DataFrame(result)
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index fe63a1ed28..b4c81bfbef 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -936,19 +936,24 @@ def test_assign_callable_lambda(scalars_dfs):
 
 @skip_legacy_pandas
 @pytest.mark.parametrize(
-    ("axis", "how", "ignore_index"),
+    ("axis", "how", "ignore_index", "subset"),
     [
-        (0, "any", False),
-        (0, "any", True),
-        (1, "any", False),
-        (1, "all", False),
+        (0, "any", False, None),
+        (0, "any", True, None),
+        (0, "all", False, ["bool_col", "time_col"]),
+        (0, "any", False, ["bool_col", "time_col"]),
+        (0, "all", False, "time_col"),
+        (1, "any", False, None),
+        (1, "all", False, None),
     ],
 )
-def test_df_dropna(scalars_dfs, axis, how, ignore_index):
+def test_df_dropna(scalars_dfs, axis, how, ignore_index, subset):
     scalars_df, scalars_pandas_df = scalars_dfs
-    df = scalars_df.dropna(axis=axis, how=how, ignore_index=ignore_index)
+    df = scalars_df.dropna(axis=axis, how=how, ignore_index=ignore_index, subset=subset)
     bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df.dropna(axis=axis, how=how, ignore_index=ignore_index)
+    pd_result = scalars_pandas_df.dropna(
+        axis=axis, how=how, ignore_index=ignore_index, subset=subset
+    )
 
     # Pandas uses int64 instead of Int64 (nullable) dtype.
     pd_result.index = pd_result.index.astype(pd.Int64Dtype())
diff --git a/tests/unit/test_dataframe.py b/tests/unit/test_dataframe.py
index 6370d1b987..560c0cf0f4 100644
--- a/tests/unit/test_dataframe.py
+++ b/tests/unit/test_dataframe.py
@@ -20,6 +20,15 @@
 from . import resources
 
 
+def test_dataframe_dropna_axis_1_subset_not_implememented(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    dataframe = resources.create_dataframe(monkeypatch)
+
+    with pytest.raises(NotImplementedError, match="subset"):
+        dataframe.dropna(axis=1, subset=["col1", "col2"])
+
+
 def test_dataframe_repr_with_uninitialized_object():
     """Ensures DataFrame.__init__ can be paused in a visual debugger without crashing.
 
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index e7f555c729..970883257c 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -1614,6 +1614,8 @@ def dropna(
         *,
         axis: int | str = 0,
         how: str = "any",
+        subset=None,
+        inplace: bool = False,
         ignore_index=False,
     ) -> DataFrame:
         """Remove missing values.
@@ -1662,6 +1664,15 @@ def dropna(
             <BLANKLINE>
             [3 rows x 3 columns]
 
+        Define in which columns to look for missing values.
+
+            >>> df.dropna(subset=['name', 'toy'])
+                   name        toy        born
+            1    Batman  Batmobile  1940-04-25
+            2  Catwoman   Bullwhip        <NA>
+            <BLANKLINE>
+            [2 rows x 3 columns]
+
         Args:
             axis ({0 or 'index', 1 or 'columns'}, default 'columns'):
                 Determine if rows or columns which contain missing values are
@@ -1675,6 +1686,12 @@ def dropna(
 
                 * 'any' : If any NA values are present, drop that row or column.
                 * 'all' : If all values are NA, drop that row or column.
+            subset (column label or sequence of labels, optional):
+                Labels along other axis to consider, e.g. if you are dropping
+                rows these would be a list of columns to include.
+                Only supports axis=0.
+            inplace (bool, default ``False``):
+                Not supported.
             ignore_index (bool, default ``False``):
                 If ``True``, the resulting axis will be labeled 0, 1, …, n - 1.
 

From b00db7eb61752f4f93b0244c8eb72af7d24c6524 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Mon, 16 Sep 2024 12:52:59 -0700
Subject: [PATCH 12/18] refactor: Make column id namespaces explicit (#982)

---
 bigframes/core/__init__.py         | 89 +++++++++++++++++++++---------
 bigframes/core/blocks.py           | 72 +++++++++---------------
 bigframes/core/compile/compiled.py |  4 +-
 bigframes/core/compile/compiler.py | 12 ++--
 bigframes/core/nodes.py            | 54 +++++++++++++++---
 bigframes/dataframe.py             |  3 +-
 bigframes/session/executor.py      | 10 ++--
 bigframes/session/loader.py        |  3 +-
 tests/unit/test_planner.py         | 27 +++++----
 9 files changed, 165 insertions(+), 109 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index aa3aa63cc2..8ad00a46c6 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -18,7 +18,7 @@
 import functools
 import io
 import typing
-from typing import Iterable, Optional, Sequence, Tuple
+from typing import Iterable, List, Optional, Sequence, Tuple
 import warnings
 
 import google.cloud.bigquery
@@ -106,6 +106,7 @@ def from_table(
 
     @property
     def column_ids(self) -> typing.Sequence[str]:
+        """Returns column ids as strings."""
         return self.schema.names
 
     @property
@@ -182,10 +183,11 @@ def order_by(self, by: Sequence[OrderingExpression]) -> ArrayValue:
     def reversed(self) -> ArrayValue:
         return ArrayValue(nodes.ReversedNode(child=self.node))
 
-    def promote_offsets(self, col_id: str) -> ArrayValue:
+    def promote_offsets(self) -> Tuple[ArrayValue, str]:
         """
         Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
         """
+        col_id = self._gen_namespaced_uid()
         if self.node.order_ambiguous and not (self.session._strictly_ordered):
             if not self.session._allows_ambiguity:
                 raise ValueError(
@@ -197,7 +199,10 @@ def promote_offsets(self, col_id: str) -> ArrayValue:
                     bigframes.exceptions.AmbiguousWindowWarning,
                 )
 
-        return ArrayValue(nodes.PromoteOffsetsNode(child=self.node, col_id=col_id))
+        return (
+            ArrayValue(nodes.PromoteOffsetsNode(child=self.node, col_id=col_id)),
+            col_id,
+        )
 
     def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
         """Append together multiple ArrayValue objects."""
@@ -205,13 +210,19 @@ def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
             nodes.ConcatNode(children=tuple([self.node, *[val.node for val in other]]))
         )
 
-    def compute_values(self, assignments: Sequence[Tuple[ex.Expression, str]]):
-        return ArrayValue(
-            nodes.ProjectionNode(child=self.node, assignments=tuple(assignments))
+    def compute_values(self, assignments: Sequence[ex.Expression]):
+        col_ids = self._gen_namespaced_uids(len(assignments))
+        ex_id_pairs = tuple((ex, id) for ex, id in zip(assignments, col_ids))
+        return (
+            ArrayValue(nodes.ProjectionNode(child=self.node, assignments=ex_id_pairs)),
+            col_ids,
         )
 
-    def project_to_id(self, expression: ex.Expression, output_id: str):
-        return self.compute_values(((expression, output_id),))
+    def project_to_id(self, expression: ex.Expression):
+        array_val, ids = self.compute_values(
+            [expression],
+        )
+        return array_val, ids[0]
 
     def assign(self, source_id: str, destination_id: str) -> ArrayValue:
         if destination_id in self.column_ids:  # Mutate case
@@ -234,19 +245,22 @@ def assign(self, source_id: str, destination_id: str) -> ArrayValue:
 
     def create_constant(
         self,
-        destination_id: str,
         value: typing.Any,
         dtype: typing.Optional[bigframes.dtypes.Dtype],
-    ) -> ArrayValue:
+    ) -> Tuple[ArrayValue, str]:
+        destination_id = self._gen_namespaced_uid()
         if pandas.isna(value):
             # Need to assign a data type when value is NaN.
             dtype = dtype or bigframes.dtypes.DEFAULT_DTYPE
 
-        return ArrayValue(
-            nodes.ProjectionNode(
-                child=self.node,
-                assignments=((ex.const(value, dtype), destination_id),),
-            )
+        return (
+            ArrayValue(
+                nodes.ProjectionNode(
+                    child=self.node,
+                    assignments=((ex.const(value, dtype), destination_id),),
+                )
+            ),
+            destination_id,
         )
 
     def select_columns(self, column_ids: typing.Sequence[str]) -> ArrayValue:
@@ -297,11 +311,10 @@ def project_window_op(
         column_name: str,
         op: agg_ops.UnaryWindowOp,
         window_spec: WindowSpec,
-        output_name=None,
         *,
         never_skip_nulls=False,
         skip_reproject_unsafe: bool = False,
-    ) -> ArrayValue:
+    ) -> Tuple[ArrayValue, str]:
         """
         Creates a new expression based on this expression with unary operation applied to one column.
         column_name: the id of the input column present in the expression
@@ -324,16 +337,20 @@ def project_window_op(
                         bigframes.exceptions.AmbiguousWindowWarning,
                     )
 
-        return ArrayValue(
-            nodes.WindowOpNode(
-                child=self.node,
-                column_name=column_name,
-                op=op,
-                window_spec=window_spec,
-                output_name=output_name,
-                never_skip_nulls=never_skip_nulls,
-                skip_reproject_unsafe=skip_reproject_unsafe,
-            )
+        output_name = self._gen_namespaced_uid()
+        return (
+            ArrayValue(
+                nodes.WindowOpNode(
+                    child=self.node,
+                    column_name=column_name,
+                    op=op,
+                    window_spec=window_spec,
+                    output_name=output_name,
+                    never_skip_nulls=never_skip_nulls,
+                    skip_reproject_unsafe=skip_reproject_unsafe,
+                )
+            ),
+            output_name,
         )
 
     def _reproject_to_table(self) -> ArrayValue:
@@ -410,3 +427,21 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
 
     def get_offset_for_name(self, name: str):
         return self.schema.names.index(name)
+
+    # Deterministically generate namespaced ids for new variables
+    # These new ids are only unique within the current namespace.
+    # Many operations, such as joins, create new namespaces. See: BigFrameNode.defines_namespace
+    # When migrating to integer ids, these will generate the next available integer, in order to densely pack ids
+    # this will help represent variables sets as compact bitsets
+    def _gen_namespaced_uid(self) -> str:
+        return self._gen_namespaced_uids(1)[0]
+
+    def _gen_namespaced_uids(self, n: int) -> List[str]:
+        i = len(self.node.defined_variables)
+        genned_ids: List[str] = []
+        while len(genned_ids) < n:
+            attempted_id = f"col_{i}"
+            if attempted_id not in self.node.defined_variables:
+                genned_ids.append(attempted_id)
+            i = i + 1
+        return genned_ids
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 39cfed588f..7f6f5f1cc9 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -336,8 +336,7 @@ def reset_index(self, drop: bool = True) -> Block:
             self.session._default_index_type
             == bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
         ):
-            new_index_col_id = guid.generate_guid()
-            expr = expr.promote_offsets(new_index_col_id)
+            expr, new_index_col_id = expr.promote_offsets()
             new_index_cols = [new_index_col_id]
         elif self.session._default_index_type == bigframes.enums.DefaultIndexKind.NULL:
             new_index_cols = []
@@ -846,9 +845,7 @@ def project_expr(
         """
         Apply a scalar expression to the block. Creates a new column to store the result.
         """
-        # TODO(tbergeron): handle labels safely so callers don't need to
-        result_id = guid.generate_guid()
-        array_val = self._expr.project_to_id(expr, result_id)
+        array_val, result_id = self._expr.project_to_id(expr)
         block = Block(
             array_val,
             index_columns=self.index_columns,
@@ -971,12 +968,10 @@ def apply_window_op(
             for key in window_spec.grouping_keys:
                 block, not_null_id = block.apply_unary_op(key, ops.notnull_op)
                 block = block.filter_by_id(not_null_id).drop_columns([not_null_id])
-        result_id = guid.generate_guid()
-        expr = block._expr.project_window_op(
+        expr, result_id = block._expr.project_window_op(
             column,
             op,
             window_spec,
-            result_id,
             skip_reproject_unsafe=skip_reproject_unsafe,
             never_skip_nulls=never_skip_nulls,
         )
@@ -1005,8 +1000,7 @@ def create_constant(
         label: Label = None,
         dtype: typing.Optional[bigframes.dtypes.Dtype] = None,
     ) -> typing.Tuple[Block, str]:
-        result_id = guid.generate_guid()
-        expr = self.expr.create_constant(result_id, scalar_constant, dtype=dtype)
+        expr, result_id = self.expr.create_constant(scalar_constant, dtype=dtype)
         # Create index copy with label inserted
         # See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html
         labels = self.column_labels.insert(len(self.column_labels), label)
@@ -1063,10 +1057,9 @@ def aggregate_all_and_stack(
                 )
                 for col_id in self.value_columns
             ]
-            index_id = guid.generate_guid()
-            result_expr = self.expr.aggregate(
+            result_expr, index_id = self.expr.aggregate(
                 aggregations, dropna=dropna
-            ).create_constant(index_id, None, None)
+            ).create_constant(None, None)
             # Transpose as last operation so that final block has valid transpose cache
             return Block(
                 result_expr,
@@ -1077,8 +1070,7 @@ def aggregate_all_and_stack(
         else:  # axis_n == 1
             # using offsets as identity to group on.
             # TODO: Allow to promote identity/total_order columns instead for better perf
-            offset_col = guid.generate_guid()
-            expr_with_offsets = self.expr.promote_offsets(offset_col)
+            expr_with_offsets, offset_col = self.expr.promote_offsets()
             stacked_expr, (_, value_col_ids, passthrough_cols,) = unpivot(
                 expr_with_offsets,
                 row_labels=self.column_labels,
@@ -1224,8 +1216,7 @@ def aggregate(
 
         names: typing.List[Label] = []
         if len(by_column_ids) == 0:
-            label_id = guid.generate_guid()
-            result_expr = result_expr.create_constant(label_id, 0, pd.Int64Dtype())
+            result_expr, label_id = result_expr.create_constant(0, pd.Int64Dtype())
             index_columns = (label_id,)
             names = [None]
         else:
@@ -1275,8 +1266,7 @@ def get_stat(
             for stat in stats_to_fetch
         ]
         expr = self.expr.aggregate(aggregations)
-        offset_index_id = guid.generate_guid()
-        expr = expr.promote_offsets(offset_index_id)
+        expr, offset_index_id = expr.promote_offsets()
         block = Block(
             expr,
             index_columns=[offset_index_id],
@@ -1303,8 +1293,7 @@ def get_binary_stat(
             )
         ]
         expr = self.expr.aggregate(aggregations)
-        offset_index_id = guid.generate_guid()
-        expr = expr.promote_offsets(offset_index_id)
+        expr, offset_index_id = expr.promote_offsets()
         block = Block(
             expr,
             index_columns=[offset_index_id],
@@ -1406,9 +1395,10 @@ def explode(
             expr = self.expr.explode(column_ids)
 
         if ignore_index:
-            new_index_ids = guid.generate_guid()
+            expr = expr.drop_columns(self.index_columns)
+            expr, new_index_ids = expr.promote_offsets()
             return Block(
-                expr.drop_columns(self.index_columns).promote_offsets(new_index_ids),
+                expr,
                 column_labels=self.column_labels,
                 # Initiates default index creation using the block constructor.
                 index_columns=[new_index_ids],
@@ -1593,8 +1583,7 @@ def retrieve_repr_request_results(
         return computed_df, count, query_job
 
     def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]:
-        result_id = guid.generate_guid()
-        expr = self._expr.promote_offsets(result_id)
+        expr, result_id = self._expr.promote_offsets()
         return (
             Block(
                 expr,
@@ -1611,13 +1600,11 @@ def add_prefix(self, prefix: str, axis: str | int | None = None) -> Block:
             expr = self._expr
             new_index_cols = []
             for index_col in self._index_columns:
-                new_col = guid.generate_guid()
-                expr = expr.project_to_id(
+                expr, new_col = expr.project_to_id(
                     expression=ops.add_op.as_expr(
                         ex.const(prefix),
                         ops.AsTypeOp(to_type="string").as_expr(index_col),
                     ),
-                    output_id=new_col,
                 )
                 new_index_cols.append(new_col)
             expr = expr.select_columns((*new_index_cols, *self.value_columns))
@@ -1637,13 +1624,11 @@ def add_suffix(self, suffix: str, axis: str | int | None = None) -> Block:
             expr = self._expr
             new_index_cols = []
             for index_col in self._index_columns:
-                new_col = guid.generate_guid()
-                expr = expr.project_to_id(
+                expr, new_col = expr.project_to_id(
                     expression=ops.add_op.as_expr(
                         ops.AsTypeOp(to_type="string").as_expr(index_col),
                         ex.const(suffix),
                     ),
-                    output_id=new_col,
                 )
                 new_index_cols.append(new_col)
             expr = expr.select_columns((*new_index_cols, *self.value_columns))
@@ -1785,8 +1770,7 @@ def melt(
         )
 
         if create_offsets_index:
-            index_id = guid.generate_guid()
-            unpivot_expr = unpivot_expr.promote_offsets(index_id)
+            unpivot_expr, index_id = unpivot_expr.promote_offsets()
             index_cols = [index_id]
         else:
             index_cols = []
@@ -2012,12 +1996,10 @@ def merge(
 
         coalesced_ids = []
         for left_id, right_id in zip(left_join_ids, right_join_ids):
-            coalesced_id = guid.generate_guid()
-            joined_expr = joined_expr.project_to_id(
+            joined_expr, coalesced_id = joined_expr.project_to_id(
                 ops.coalesce_op.as_expr(
                     get_column_left[left_id], get_column_right[right_id]
                 ),
-                coalesced_id,
             )
             coalesced_ids.append(coalesced_id)
 
@@ -2076,8 +2058,7 @@ def merge(
             expr = joined_expr
             index_columns = []
         else:
-            offset_index_id = guid.generate_guid()
-            expr = joined_expr.promote_offsets(offset_index_id)
+            expr, offset_index_id = joined_expr.promote_offsets()
             index_columns = [offset_index_id]
 
         return Block(expr, index_columns=index_columns, column_labels=labels)
@@ -2442,8 +2423,7 @@ def _get_rows_as_json_values(self) -> Block:
         # expression.
         # TODO(shobs): Replace direct SQL manipulation by structured expression
         # manipulation
-        ordering_column_name = guid.generate_guid()
-        expr = self.expr.promote_offsets(ordering_column_name)
+        expr, ordering_column_name = self.expr.promote_offsets()
         expr_sql = self.session._to_sql(expr)
 
         # Names of the columns to serialize for the row.
@@ -2869,8 +2849,8 @@ def coalesce_columns(
             expr = expr.drop_columns([left_id])
         elif how == "outer":
             coalesced_id = guid.generate_guid()
-            expr = expr.project_to_id(
-                ops.coalesce_op.as_expr(left_id, right_id), coalesced_id
+            expr, coalesced_id = expr.project_to_id(
+                ops.coalesce_op.as_expr(left_id, right_id)
             )
             expr = expr.drop_columns([left_id, right_id])
             result_ids.append(coalesced_id)
@@ -3047,7 +3027,7 @@ def unpivot(
     explode_offsets_id = labels_mapping[labels_array.column_ids[-1]]
 
     # Build the output rows as a case statment that selects between the N input columns
-    unpivot_exprs: List[Tuple[ex.Expression, str]] = []
+    unpivot_exprs: List[ex.Expression] = []
     # Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
     for input_ids in unpivot_columns:
         # row explode offset used to choose the input column
@@ -3064,11 +3044,11 @@ def unpivot(
             )
         )
         col_expr = ops.case_when_op.as_expr(*cases)
-        unpivot_exprs.append((col_expr, guid.generate_guid()))
+        unpivot_exprs.append(col_expr)
 
-    unpivot_col_ids = [id for _, id in unpivot_exprs]
+    joined_array, unpivot_col_ids = joined_array.compute_values(unpivot_exprs)
 
-    return joined_array.compute_values(unpivot_exprs).select_columns(
+    return joined_array.select_columns(
         [*index_col_ids, *unpivot_col_ids, *new_passthrough_cols]
     ), (tuple(index_col_ids), tuple(unpivot_col_ids), tuple(new_passthrough_cols))
 
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index cd00c98381..693d93de8c 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -814,7 +814,7 @@ def project_window_op(
         column_name: str,
         op: agg_ops.UnaryWindowOp,
         window_spec: WindowSpec,
-        output_name=None,
+        output_name: str,
         *,
         never_skip_nulls=False,
     ) -> OrderedIR:
@@ -823,7 +823,7 @@ def project_window_op(
         column_name: the id of the input column present in the expression
         op: the windowable operator to apply to the input column
         window_spec: a specification of the window over which to apply the operator
-        output_name: the id to assign to the output of the operator, by default will replace input col if distinct output id not provided
+        output_name: the id to assign to the output of the operator
         never_skip_nulls: will disable null skipping for operators that would otherwise do so
         """
         # Cannot nest analytic expressions, so reproject to cte first if needed.
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index 950c2c2cc7..23501f93c8 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -110,7 +110,7 @@ def compile_cached_table(self, node: nodes.CachedTableNode, ordered: bool = True
         )
         used_columns = (
             *node.schema.names,
-            *node.hidden_columns,
+            *node._hidden_columns,
         )
         # Physical schema might include unused columns, unsupported datatypes like JSON
         physical_schema = ibis.backends.bigquery.BigQuerySchema.to_ibis(
@@ -127,18 +127,20 @@ def compile_cached_table(self, node: nodes.CachedTableNode, ordered: bool = True
                 raise ValueError(
                     "Cannot use partially ordered cached value. Result requires total ordering information."
                 )
-            return compiled.OrderedIR(
+            ir = compiled.OrderedIR(
                 ibis_table,
                 columns=tuple(
                     bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
                         ibis_table[col]
                     )
-                    for col in node.schema.names
+                    for col in [*node.schema.names, *node._hidden_columns]
                 ),
                 ordering=node.ordering,
-                hidden_ordering_columns=[ibis_table[c] for c in node.hidden_columns],
             )
-
+            ir = ir._select(
+                tuple(ir._get_ibis_column(name) for name in node.schema.names)
+            )
+            return ir
         else:
             return compiled.UnorderedIR(
                 ibis_table,
diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py
index e11fcafff7..2dc9623d89 100644
--- a/bigframes/core/nodes.py
+++ b/bigframes/core/nodes.py
@@ -178,6 +178,25 @@ def transform_children(
         """Apply a function to each child node."""
         ...
 
+    @property
+    def defines_namespace(self) -> bool:
+        """
+        If true, this node establishes a new column id namespace.
+
+        If false, this node consumes and produces ids in the namespace
+        """
+        return False
+
+    @functools.cached_property
+    def defined_variables(self) -> set[str]:
+        """Full set of variables defined in the namespace, even if not selected."""
+        self_defined_variables = set(self.schema.names)
+        if self.defines_namespace:
+            return self_defined_variables
+        return self_defined_variables.union(
+            *(child.defined_variables for child in self.child_nodes)
+        )
+
 
 @dataclass(frozen=True)
 class UnaryNode(BigFrameNode):
@@ -262,6 +281,10 @@ def transform_children(
             self, left_child=t(self.left_child), right_child=t(self.right_child)
         )
 
+    @property
+    def defines_namespace(self) -> bool:
+        return True
+
 
 @dataclass(frozen=True)
 class ConcatNode(BigFrameNode):
@@ -477,9 +500,9 @@ def __post_init__(self):
             raise ValueError(
                 f"Requested schema {logical_names} cannot be derived from table schema {self.table.physical_schema}"
             )
-        if not set(self.hidden_columns).issubset(physical_names):
+        if not set(self._hidden_columns).issubset(physical_names):
             raise ValueError(
-                f"Requested hidden columns {self.hidden_columns} cannot be derived from table schema {self.table.physical_schema}"
+                f"Requested hidden columns {self._hidden_columns} cannot be derived from table schema {self.table.physical_schema}"
             )
 
     @property
@@ -498,7 +521,7 @@ def variables_introduced(self) -> int:
         return len(self.schema.items) + OVERHEAD_VARIABLES
 
     @property
-    def hidden_columns(self) -> typing.Tuple[str, ...]:
+    def _hidden_columns(self) -> typing.Tuple[str, ...]:
         """Physical columns used to define ordering but not directly exposed as value columns."""
         if self.ordering is None:
             return ()
@@ -646,6 +669,13 @@ def variables_introduced(self) -> int:
         # This operation only renames variables, doesn't actually create new ones
         return 0
 
+    # TODO: Reuse parent namespace
+    # Currently, Selection node allows renaming an reusing existing names, so it must establish a
+    # new namespace.
+    @property
+    def defines_namespace(self) -> bool:
+        return True
+
 
 @dataclass(frozen=True)
 class ProjectionNode(UnaryNode):
@@ -707,6 +737,10 @@ def schema(self) -> schemata.ArraySchema:
     def variables_introduced(self) -> int:
         return 1
 
+    @property
+    def defines_namespace(self) -> bool:
+        return True
+
 
 @dataclass(frozen=True)
 class AggregateNode(UnaryNode):
@@ -752,13 +786,17 @@ def order_ambiguous(self) -> bool:
     def explicitly_ordered(self) -> bool:
         return True
 
+    @property
+    def defines_namespace(self) -> bool:
+        return True
+
 
 @dataclass(frozen=True)
 class WindowOpNode(UnaryNode):
     column_name: str
     op: agg_ops.UnaryWindowOp
     window_spec: window.WindowSpec
-    output_name: typing.Optional[str] = None
+    output_name: str
     never_skip_nulls: bool = False
     skip_reproject_unsafe: bool = False
 
@@ -773,10 +811,6 @@ def non_local(self) -> bool:
     def schema(self) -> schemata.ArraySchema:
         input_type = self.child.schema.get_type(self.column_name)
         new_item_dtype = self.op.output_type(input_type)
-        if self.output_name is None:
-            return self.child.schema.update_dtype(self.column_name, new_item_dtype)
-        if self.output_name in self.child.schema.names:
-            return self.child.schema.update_dtype(self.output_name, new_item_dtype)
         return self.child.schema.append(
             schemata.SchemaItem(self.output_name, new_item_dtype)
         )
@@ -860,3 +894,7 @@ def relation_ops_created(self) -> int:
     @functools.cached_property
     def variables_introduced(self) -> int:
         return len(self.column_ids) + 1
+
+    @property
+    def defines_namespace(self) -> bool:
+        return True
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index c2fb9336f3..862c8dc2c8 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -3457,7 +3457,8 @@ def _prepare_export(
         }
 
         if ordering_id is not None:
-            array_value = array_value.promote_offsets(ordering_id)
+            array_value, internal_ordering_id = array_value.promote_offsets()
+            id_overrides[internal_ordering_id] = ordering_id
         return array_value, id_overrides
 
     def map(self, func, na_action: Optional[str] = None) -> DataFrame:
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index 0512fa78e3..d2a2e0f1b2 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -84,7 +84,9 @@ def to_sql(
         Convert an ArrayValue to a sql query that will yield its value.
         """
         if offset_column:
-            array_value = array_value.promote_offsets(offset_column)
+            array_value, internal_offset_col = array_value.promote_offsets()
+            col_id_overrides = dict(col_id_overrides)
+            col_id_overrides[internal_offset_col] = offset_column
         node = (
             self._get_optimized_plan(array_value.node)
             if enable_cache
@@ -364,12 +366,12 @@ def _cache_with_offsets(self, array_value: bigframes.core.ArrayValue):
                 "Caching with offsets only supported in strictly ordered mode."
             )
         offset_column = bigframes.core.guid.generate_guid("bigframes_offsets")
-        node_w_offsets = array_value.promote_offsets(offset_column).node
-        sql = self.compiler.compile_unordered(self._get_optimized_plan(node_w_offsets))
+        w_offsets, offset_column = array_value.promote_offsets()
+        sql = self.compiler.compile_unordered(self._get_optimized_plan(w_offsets.node))
 
         tmp_table = self._sql_as_cached_temp_table(
             sql,
-            node_w_offsets.schema.to_bigquery(),
+            w_offsets.schema.to_bigquery(),
             cluster_cols=[offset_column],
         )
         cached_replacement = array_value.as_cached(
diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py
index ce9874e35f..22de367804 100644
--- a/bigframes/session/loader.py
+++ b/bigframes/session/loader.py
@@ -383,8 +383,7 @@ def read_gbq_table(
 
         index_names: Sequence[Hashable] = index_cols
         if index_col == bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64:
-            sequential_index_col = bigframes.core.guid.generate_guid("index_")
-            array_value = array_value.promote_offsets(sequential_index_col)
+            array_value, sequential_index_col = array_value.promote_offsets()
             index_cols = [sequential_index_col]
             index_names = [None]
 
diff --git a/tests/unit/test_planner.py b/tests/unit/test_planner.py
index 84dd05ddaa..27ec7d5921 100644
--- a/tests/unit/test_planner.py
+++ b/tests/unit/test_planner.py
@@ -46,10 +46,9 @@ def test_session_aware_caching_project_filter():
     """
     Test that if a node is filtered by a column, the node is cached pre-filter and clustered by the filter column.
     """
-    session_objects = [LEAF, LEAF.create_constant("col_c", 4, pd.Int64Dtype())]
-    target = LEAF.create_constant("col_c", 4, pd.Int64Dtype()).filter(
-        ops.gt_op.as_expr("col_a", ex.const(3))
-    )
+    session_objects = [LEAF, LEAF.create_constant(4, pd.Int64Dtype())[0]]
+    target, _ = LEAF.create_constant(4, pd.Int64Dtype())
+    target = target.filter(ops.gt_op.as_expr("col_a", ex.const(3)))
     result, cluster_cols = planner.session_aware_cache_plan(
         target.node, [obj.node for obj in session_objects]
     )
@@ -61,14 +60,16 @@ def test_session_aware_caching_project_multi_filter():
     """
     Test that if a node is filtered by multiple columns, all of them are in the cluster cols
     """
-    session_objects = [LEAF, LEAF.create_constant("col_c", 4, pd.Int64Dtype())]
+    obj1 = LEAF
+    obj2, _ = LEAF.create_constant(4, pd.Int64Dtype())
+    session_objects = [obj1, obj2]
     predicate_1a = ops.gt_op.as_expr("col_a", ex.const(3))
     predicate_1b = ops.lt_op.as_expr("col_a", ex.const(55))
     predicate_1 = ops.and_op.as_expr(predicate_1a, predicate_1b)
     predicate_3 = ops.eq_op.as_expr("col_b", ex.const(1))
     target = (
         LEAF.filter(predicate_1)
-        .create_constant("col_c", 4, pd.Int64Dtype())
+        .create_constant(4, pd.Int64Dtype())[0]
         .filter(predicate_3)
     )
     result, cluster_cols = planner.session_aware_cache_plan(
@@ -84,8 +85,8 @@ def test_session_aware_caching_unusable_filter():
 
     Most filters with multiple column references cannot be used for scan pruning, as they cannot be converted to fixed value ranges.
     """
-    session_objects = [LEAF, LEAF.create_constant("col_c", 4, pd.Int64Dtype())]
-    target = LEAF.create_constant("col_c", 4, pd.Int64Dtype()).filter(
+    session_objects = [LEAF, LEAF.create_constant(4, pd.Int64Dtype())[0]]
+    target = LEAF.create_constant(4, pd.Int64Dtype())[0].filter(
         ops.gt_op.as_expr("col_a", "col_b")
     )
     result, cluster_cols = planner.session_aware_cache_plan(
@@ -101,12 +102,10 @@ def test_session_aware_caching_fork_after_window_op():
 
     Windowing is expensive, so caching should always compute the window function, in order to avoid later recomputation.
     """
-    other = LEAF.promote_offsets("offsets_col").create_constant(
-        "col_d", 5, pd.Int64Dtype()
-    )
+    other = LEAF.promote_offsets()[0].create_constant(5, pd.Int64Dtype())[0]
     target = (
-        LEAF.promote_offsets("offsets_col")
-        .create_constant("col_c", 4, pd.Int64Dtype())
+        LEAF.promote_offsets()[0]
+        .create_constant(4, pd.Int64Dtype())[0]
         .filter(
             ops.eq_op.as_expr("col_a", ops.add_op.as_expr(ex.const(4), ex.const(3)))
         )
@@ -117,5 +116,5 @@ def test_session_aware_caching_fork_after_window_op():
             other.node,
         ],
     )
-    assert result == LEAF.promote_offsets("offsets_col").node
+    assert result == LEAF.promote_offsets()[0].node
     assert cluster_cols == ["col_a"]

From c164e85cd3a526908592eb681b43ec04bd4fb47b Mon Sep 17 00:00:00 2001
From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com>
Date: Tue, 17 Sep 2024 14:58:55 +0000
Subject: [PATCH 13/18] build(python): release script update (#993)

Source-Link: https://github.com/googleapis/synthtool/commit/71a72973dddbc66ea64073b53eda49f0d22e0942
Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 .github/.OwlBot.lock.yaml  | 4 ++--
 .kokoro/release.sh         | 2 +-
 .kokoro/release/common.cfg | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml
index 6d064ddb9b..597e0c3261 100644
--- a/.github/.OwlBot.lock.yaml
+++ b/.github/.OwlBot.lock.yaml
@@ -13,5 +13,5 @@
 # limitations under the License.
 docker:
   image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
-  digest: sha256:94bb690db96e6242b2567a4860a94d48fa48696d092e51b0884a1a2c0a79a407
-# created: 2024-07-31T14:52:44.926548819Z
+  digest: sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455
+# created: 2024-09-16T21:04:09.091105552Z
diff --git a/.kokoro/release.sh b/.kokoro/release.sh
index 21a9b558c5..b1dd5f09ec 100755
--- a/.kokoro/release.sh
+++ b/.kokoro/release.sh
@@ -23,7 +23,7 @@ python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source /
 export PYTHONUNBUFFERED=1
 
 # Move into the package, build the distribution and upload.
-TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-1")
+TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-2")
 cd github/python-bigquery-dataframes
 python3 setup.py sdist bdist_wheel
 twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/*
diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg
index a0c39946cf..824d62f257 100644
--- a/.kokoro/release/common.cfg
+++ b/.kokoro/release/common.cfg
@@ -28,7 +28,7 @@ before_action {
   fetch_keystore {
     keystore_resource {
       keystore_config_id: 73713
-      keyname: "google-cloud-pypi-token-keystore-1"
+      keyname: "google-cloud-pypi-token-keystore-2"
     }
   }
 }

From 06c1b3396d77d1de4f927328bae70cd7b3eb0b0b Mon Sep 17 00:00:00 2001
From: Chelsea Lin <124939984+chelsea-lin@users.noreply.github.com>
Date: Tue, 17 Sep 2024 08:39:28 -0700
Subject: [PATCH 14/18] deps: limit ibis-framework version to 9.2.0 (#989)

* chore: limit ibis version to 9.2.0

* remove ibis constrains on 3.12 tests
---
 setup.py                     | 2 +-
 testing/constraints-3.12.txt | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/setup.py b/setup.py
index 908a3c244f..833d4fe565 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@
     "google-cloud-iam >=2.12.1",
     "google-cloud-resource-manager >=1.10.3",
     "google-cloud-storage >=2.0.0",
-    "ibis-framework[bigquery] >=9.0.0,<=9.3.0",
+    "ibis-framework[bigquery] >=9.0.0,<=9.2.0",
     "jellyfish >=0.8.9",
     "numpy >=1.24.0",
     "pandas >=1.5.3",
diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt
index dbbb5a2d88..e69de29bb2 100644
--- a/testing/constraints-3.12.txt
+++ b/testing/constraints-3.12.txt
@@ -1,3 +0,0 @@
-# Some internal modules have moved,
-# so make sure we test on all ibis-framework 9.x versions.
-ibis-framework==9.2.0

From 22b483a9d9bc50b594ef270a691e2ea3a31aa295 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@users.noreply.github.com>
Date: Tue, 17 Sep 2024 12:29:17 -0700
Subject: [PATCH 15/18] chore: Use fixture for list/str accessor tests (#992)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* chore: Use fixture for list/str accessor tests

* fix format

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* add more type coverage in tests

* fix format

* remove unnecessary dep

* remove import

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 tests/data/repeated.jsonl                     |  3 +
 tests/data/repeated_schema.json               | 42 ++++++++++++
 tests/system/conftest.py                      | 27 ++++++++
 tests/system/small/operations/test_lists.py   | 67 +++++++++++++------
 tests/system/small/operations/test_strings.py | 25 ++++---
 5 files changed, 134 insertions(+), 30 deletions(-)
 create mode 100644 tests/data/repeated.jsonl
 create mode 100644 tests/data/repeated_schema.json

diff --git a/tests/data/repeated.jsonl b/tests/data/repeated.jsonl
new file mode 100644
index 0000000000..b3c47772f6
--- /dev/null
+++ b/tests/data/repeated.jsonl
@@ -0,0 +1,3 @@
+{"rowindex": 0, "int_list_col": [1],        "bool_list_col": [true],        "float_list_col": [1.2, 2.3],        "date_list_col": ["2021-07-21"],                "date_time_list_col": ["2021-07-21 11:39:45"], "numeric_list_col": [1.2, 2.3, 3.4], "string_list_col": ["abc", "de", "f"]}
+{"rowindex": 1, "int_list_col": [1,2],      "bool_list_col": [true, false], "float_list_col": [1.1],             "date_list_col": ["2021-07-21", "1987-03-28"],  "date_time_list_col": ["1999-03-14 17:22:00"], "numeric_list_col": [5.5, 2.3],      "string_list_col": ["a", "bc", "de"]}
+{"rowindex": 2, "int_list_col": [1,2,3],    "bool_list_col": [true],        "float_list_col": [0.5, -1.9, 2.3],  "date_list_col": ["2017-08-01", "2004-11-22"],  "date_time_list_col": ["1979-06-03 03:20:45"], "numeric_list_col": [1.7],           "string_list_col": ["", "a"]}
diff --git a/tests/data/repeated_schema.json b/tests/data/repeated_schema.json
new file mode 100644
index 0000000000..300f32c994
--- /dev/null
+++ b/tests/data/repeated_schema.json
@@ -0,0 +1,42 @@
+[
+    {
+        "name": "rowindex",
+        "type": "INTEGER",
+        "mode": "REQUIRED"
+    },
+    {
+        "name": "int_list_col",
+        "type": "INTEGER",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "bool_list_col",
+        "type": "BOOLEAN",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "float_list_col",
+        "type": "FLOAT",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "date_list_col",
+        "type": "DATE",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "date_time_list_col",
+        "type": "DATETIME",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "numeric_list_col",
+        "type": "NUMERIC",
+        "mode": "REPEATED"
+    },
+    {
+        "name": "string_list_col",
+        "type": "STRING",
+        "mode": "REPEATED"
+    }
+]
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 49cd887cfd..9cfb9082af 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -39,6 +39,7 @@
 import bigframes
 import bigframes.dataframe
 import bigframes.pandas as bpd
+import bigframes.series
 import tests.system.utils
 
 # Use this to control the number of cloud functions being deleted in a single
@@ -294,6 +295,7 @@ def load_test_data_tables(
         ("scalars", "scalars_schema.json", "scalars.jsonl"),
         ("scalars_too", "scalars_schema.json", "scalars.jsonl"),
         ("nested", "nested_schema.json", "nested.jsonl"),
+        ("repeated", "repeated_schema.json", "repeated.jsonl"),
         ("penguins", "penguins_schema.json", "penguins.jsonl"),
         ("time_series", "time_series_schema.json", "time_series.jsonl"),
         ("hockey_players", "hockey_players.json", "hockey_players.jsonl"),
@@ -370,6 +372,11 @@ def nested_table_id(test_data_tables) -> str:
     return test_data_tables["nested"]
 
 
+@pytest.fixture(scope="session")
+def repeated_table_id(test_data_tables) -> str:
+    return test_data_tables["repeated"]
+
+
 @pytest.fixture(scope="session")
 def penguins_table_id(test_data_tables) -> str:
     return test_data_tables["penguins"]
@@ -410,6 +417,26 @@ def nested_pandas_df() -> pd.DataFrame:
     return df
 
 
+@pytest.fixture(scope="session")
+def repeated_df(
+    repeated_table_id: str, session: bigframes.Session
+) -> bigframes.dataframe.DataFrame:
+    """Returns a DataFrame containing columns of list type."""
+    return session.read_gbq(repeated_table_id, index_col="rowindex")
+
+
+@pytest.fixture(scope="session")
+def repeated_pandas_df() -> pd.DataFrame:
+    """Returns a DataFrame containing columns of list type."""
+
+    df = pd.read_json(
+        DATA_DIR / "repeated.jsonl",
+        lines=True,
+    )
+    df = df.set_index("rowindex")
+    return df
+
+
 @pytest.fixture(scope="session")
 def scalars_df_default_index(
     scalars_df_index: bigframes.dataframe.DataFrame,
diff --git a/tests/system/small/operations/test_lists.py b/tests/system/small/operations/test_lists.py
index 7ecf79dc6a..7b39bdebd5 100644
--- a/tests/system/small/operations/test_lists.py
+++ b/tests/system/small/operations/test_lists.py
@@ -18,8 +18,6 @@
 import pyarrow as pa
 import pytest
 
-import bigframes.pandas as bpd
-
 from ...utils import assert_series_equal
 
 
@@ -32,19 +30,34 @@
         pytest.param(slice(0, 2, None), id="default_step_slice"),
     ],
 )
-def test_getitem(key):
+@pytest.mark.parametrize(
+    ("column_name", "dtype"),
+    [
+        pytest.param("int_list_col", pd.ArrowDtype(pa.list_(pa.int64()))),
+        pytest.param("bool_list_col", pd.ArrowDtype(pa.list_(pa.bool_()))),
+        pytest.param("float_list_col", pd.ArrowDtype(pa.list_(pa.float64()))),
+        pytest.param("date_list_col", pd.ArrowDtype(pa.list_(pa.date32()))),
+        pytest.param("date_time_list_col", pd.ArrowDtype(pa.list_(pa.timestamp("us")))),
+        pytest.param("numeric_list_col", pd.ArrowDtype(pa.list_(pa.decimal128(38, 9)))),
+        pytest.param("string_list_col", pd.ArrowDtype(pa.list_(pa.string()))),
+    ],
+)
+def test_getitem(key, column_name, dtype, repeated_df, repeated_pandas_df):
     if packaging.version.Version(pd.__version__) < packaging.version.Version("2.2.0"):
         pytest.skip(
             "https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#series-list-accessor-for-pyarrow-list-data"
         )
-    data = [[1], [2, 3], [4, 5, 6]]
-    s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
-    pd_s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
 
-    bf_result = s.list[key].to_pandas()
-    pd_result = pd_s.list[key]
+    bf_result = repeated_df[column_name].list[key].to_pandas()
+    pd_result = repeated_pandas_df[column_name].astype(dtype).list[key]
 
-    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
+    assert_series_equal(
+        pd_result,
+        bf_result,
+        check_dtype=False,
+        check_index_type=False,
+        check_names=False,
+    )
 
 
 @pytest.mark.parametrize(
@@ -60,24 +73,36 @@ def test_getitem(key):
         (slice(0, 2, 2), pytest.raises(NotImplementedError)),
     ],
 )
-def test_getitem_notsupported(key, expectation):
-    data = [[1], [2, 3], [4, 5, 6]]
-    s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
-
+def test_getitem_notsupported(key, expectation, repeated_df):
     with expectation as e:
-        assert s.list[key] == e
+        assert repeated_df["int_list_col"].list[key] == e
 
 
-def test_len():
+@pytest.mark.parametrize(
+    ("column_name", "dtype"),
+    [
+        pytest.param("int_list_col", pd.ArrowDtype(pa.list_(pa.int64()))),
+        pytest.param("bool_list_col", pd.ArrowDtype(pa.list_(pa.bool_()))),
+        pytest.param("float_list_col", pd.ArrowDtype(pa.list_(pa.float64()))),
+        pytest.param("date_list_col", pd.ArrowDtype(pa.list_(pa.date32()))),
+        pytest.param("date_time_list_col", pd.ArrowDtype(pa.list_(pa.timestamp("us")))),
+        pytest.param("numeric_list_col", pd.ArrowDtype(pa.list_(pa.decimal128(38, 9)))),
+        pytest.param("string_list_col", pd.ArrowDtype(pa.list_(pa.string()))),
+    ],
+)
+def test_len(column_name, dtype, repeated_df, repeated_pandas_df):
     if packaging.version.Version(pd.__version__) < packaging.version.Version("2.2.0"):
         pytest.skip(
             "https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#series-list-accessor-for-pyarrow-list-data"
         )
-    data = [[], [1], [1, 2], [1, 2, 3]]
-    s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
-    pd_s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
 
-    bf_result = s.list.len().to_pandas()
-    pd_result = pd_s.list.len()
+    bf_result = repeated_df[column_name].list.len().to_pandas()
+    pd_result = repeated_pandas_df[column_name].astype(dtype).list.len()
 
-    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
+    assert_series_equal(
+        pd_result,
+        bf_result,
+        check_dtype=False,
+        check_index_type=False,
+        check_names=False,
+    )
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index 15e8512317..98fecaa93b 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -615,21 +615,28 @@ def test_getitem_w_string(scalars_dfs, index):
 @pytest.mark.parametrize(
     ("index"),
     [
-        pytest.param(2, id="int"),
+        pytest.param(0, id="int"),
         pytest.param(slice(None, None, None), id="default_start_slice"),
         pytest.param(slice(0, None, 1), id="default_stop_slice"),
         pytest.param(slice(0, 2, None), id="default_step_slice"),
         pytest.param(slice(0, 0, None), id="single_one_slice"),
     ],
 )
-def test_getitem_w_array(index):
-    data = [[1], [2, 3], [], [4, 5, 6]]
-    s = bpd.Series(data)
-    pd_s = pd.Series(data)
-
-    bf_result = s.str[index].to_pandas()
-    pd_result = pd_s.str[index]
-    # Skip dtype checks here because pandas returns `int64` while BF returns `Int64`.
+@pytest.mark.parametrize(
+    "column_name",
+    [
+        pytest.param("int_list_col"),
+        pytest.param("bool_list_col"),
+        pytest.param("float_list_col"),
+        pytest.param("string_list_col"),
+        # date, date_time and numeric are excluded because their default types are different
+        # in Pandas and BigFrames
+    ],
+)
+def test_getitem_w_array(index, column_name, repeated_df, repeated_pandas_df):
+    bf_result = repeated_df[column_name].str[index].to_pandas()
+    pd_result = repeated_pandas_df[column_name].str[index]
+
     assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
 
 

From ad8263dd0772d87106b401703cd32897ab01f187 Mon Sep 17 00:00:00 2001
From: Luis Velasco <57447519+velascoluis@users.noreply.github.com>
Date: Tue, 17 Sep 2024 22:15:34 +0200
Subject: [PATCH 16/18] chore: fix tpch q5 (#984)

---
 tests/benchmark/tpch/q5.py                        | 2 +-
 third_party/bigframes_vendored/tpch/queries/q5.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/benchmark/tpch/q5.py b/tests/benchmark/tpch/q5.py
index e4b3cb0f51..bc39b76e82 100644
--- a/tests/benchmark/tpch/q5.py
+++ b/tests/benchmark/tpch/q5.py
@@ -14,7 +14,7 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q1 as vendored_tpch_q5
+import bigframes_vendored.tpch.queries.q5 as vendored_tpch_q5
 
 if __name__ == "__main__":
     dataset_id, session, suffix = utils.get_tpch_configuration()
diff --git a/third_party/bigframes_vendored/tpch/queries/q5.py b/third_party/bigframes_vendored/tpch/queries/q5.py
index 20bd8d9c44..b3a9b5e614 100644
--- a/third_party/bigframes_vendored/tpch/queries/q5.py
+++ b/third_party/bigframes_vendored/tpch/queries/q5.py
@@ -23,7 +23,7 @@ def q(dataset_id: str, session: bigframes.Session):
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERES",
+        f"bigframes-dev-perf.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     supplier = session.read_gbq(

From 7dbda8461da84bdfa2c15049235b7f925129e5e6 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Wed, 18 Sep 2024 11:08:04 -0700
Subject: [PATCH 17/18] chore: update local benchmark, 10t config and kokoro
 project. (#995)

* chore: update local benchmark, 10t config and kokoro project.

* update details

* fixes

* update config

* update config reading

* update config

* remove notebook for now.
---
 .kokoro/load/benchmark.cfg                    |   2 +-
 noxfile.py                                    |  56 ++++++-
 scripts/run_and_publish_benchmark.py          | 138 ++++++++++++------
 tests/benchmark/tpch/config.jsonl             |  18 ++-
 tests/benchmark/tpch/q1.py                    |   4 +-
 tests/benchmark/tpch/q10.py                   |   4 +-
 tests/benchmark/tpch/q11.py                   |   4 +-
 tests/benchmark/tpch/q12.py                   |   4 +-
 tests/benchmark/tpch/q13.py                   |   4 +-
 tests/benchmark/tpch/q14.py                   |   4 +-
 tests/benchmark/tpch/q15.py                   |   4 +-
 tests/benchmark/tpch/q16.py                   |   4 +-
 tests/benchmark/tpch/q17.py                   |   4 +-
 tests/benchmark/tpch/q18.py                   |   4 +-
 tests/benchmark/tpch/q19.py                   |   4 +-
 tests/benchmark/tpch/q2.py                    |   4 +-
 tests/benchmark/tpch/q20.py                   |   4 +-
 tests/benchmark/tpch/q21.py                   |   4 +-
 tests/benchmark/tpch/q22.py                   |   4 +-
 tests/benchmark/tpch/q3.py                    |   4 +-
 tests/benchmark/tpch/q4.py                    |   4 +-
 tests/benchmark/tpch/q5.py                    |   4 +-
 tests/benchmark/tpch/q6.py                    |   4 +-
 tests/benchmark/tpch/q7.py                    |   4 +-
 tests/benchmark/tpch/q8.py                    |   4 +-
 tests/benchmark/tpch/q9.py                    |   4 +-
 tests/benchmark/utils.py                      |  10 +-
 .../bigframes_vendored/tpch/queries/q1.py     |   4 +-
 .../bigframes_vendored/tpch/queries/q10.py    |  10 +-
 .../bigframes_vendored/tpch/queries/q11.py    |   8 +-
 .../bigframes_vendored/tpch/queries/q12.py    |   6 +-
 .../bigframes_vendored/tpch/queries/q13.py    |   6 +-
 .../bigframes_vendored/tpch/queries/q14.py    |   6 +-
 .../bigframes_vendored/tpch/queries/q15.py    |   6 +-
 .../bigframes_vendored/tpch/queries/q16.py    |   8 +-
 .../bigframes_vendored/tpch/queries/q17.py    |   6 +-
 .../bigframes_vendored/tpch/queries/q18.py    |   8 +-
 .../bigframes_vendored/tpch/queries/q19.py    |   6 +-
 .../bigframes_vendored/tpch/queries/q2.py     |  12 +-
 .../bigframes_vendored/tpch/queries/q20.py    |  12 +-
 .../bigframes_vendored/tpch/queries/q21.py    |  10 +-
 .../bigframes_vendored/tpch/queries/q22.py    |   6 +-
 .../bigframes_vendored/tpch/queries/q3.py     |   8 +-
 .../bigframes_vendored/tpch/queries/q4.py     |   6 +-
 .../bigframes_vendored/tpch/queries/q5.py     |  14 +-
 .../bigframes_vendored/tpch/queries/q6.py     |   4 +-
 .../bigframes_vendored/tpch/queries/q7.py     |  12 +-
 .../bigframes_vendored/tpch/queries/q8.py     |  16 +-
 .../bigframes_vendored/tpch/queries/q9.py     |  14 +-
 49 files changed, 301 insertions(+), 199 deletions(-)

diff --git a/.kokoro/load/benchmark.cfg b/.kokoro/load/benchmark.cfg
index bc2d7a2655..45e2527a0c 100644
--- a/.kokoro/load/benchmark.cfg
+++ b/.kokoro/load/benchmark.cfg
@@ -13,7 +13,7 @@ env_vars: {
 
 env_vars: {
     key: "GOOGLE_CLOUD_PROJECT"
-    value: "bigframes-load-testing"
+    value: "bigframes-benchmarking"
 }
 
 env_vars: {
diff --git a/noxfile.py b/noxfile.py
index f4142af21b..bf4c6f9641 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -16,6 +16,7 @@
 
 from __future__ import absolute_import
 
+import argparse
 import multiprocessing
 import os
 import pathlib
@@ -804,7 +805,7 @@ def notebook(session: nox.Session):
         processes = []
         for notebook, regions in notebooks_reg.items():
             for region in regions:
-                args = (
+                region_args = (
                     "python",
                     "scripts/run_and_publish_benchmark.py",
                     "--notebook",
@@ -814,7 +815,7 @@ def notebook(session: nox.Session):
                 if multi_process_mode:
                     process = multiprocessing.Process(
                         target=_run_process,
-                        args=(session, args, error_flag),
+                        args=(session, region_args, error_flag),
                     )
                     process.start()
                     processes.append(process)
@@ -822,7 +823,7 @@ def notebook(session: nox.Session):
                     # process to avoid potential race conditions。
                     time.sleep(1)
                 else:
-                    session.run(*args)
+                    session.run(*region_args)
 
         for process in processes:
             process.join()
@@ -861,7 +862,51 @@ def benchmark(session: nox.Session):
     session.install("-e", ".[all]")
     base_path = os.path.join("tests", "benchmark")
 
-    benchmark_script_list = list(pathlib.Path(base_path).rglob("*.py"))
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-i",
+        "--iterations",
+        type=int,
+        default=1,
+        help="Number of iterations to run each benchmark.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output-csv",
+        nargs="?",
+        const=True,
+        default=False,
+        help=(
+            "Determines whether to output results to a CSV file. If no location is provided, "
+            "a temporary location is automatically generated."
+        ),
+    )
+    parser.add_argument(
+        "-b",
+        "--benchmark-filter",
+        nargs="+",
+        help=(
+            "List of file or directory names to include in the benchmarks. If not provided, "
+            "all benchmarks are run."
+        ),
+    )
+
+    args = parser.parse_args(session.posargs)
+
+    benchmark_script_list: List[pathlib.Path] = []
+    if args.benchmark_filter:
+        for filter_item in args.benchmark_filter:
+            full_path = os.path.join(base_path, filter_item)
+            if os.path.isdir(full_path):
+                benchmark_script_list.extend(pathlib.Path(full_path).rglob("*.py"))
+            elif os.path.isfile(full_path) and full_path.endswith(".py"):
+                benchmark_script_list.append(pathlib.Path(full_path))
+            else:
+                raise ValueError(
+                    f"Item {filter_item} does not match any valid file or directory"
+                )
+    else:
+        benchmark_script_list = list(pathlib.Path(base_path).rglob("*.py"))
 
     try:
         for benchmark in benchmark_script_list:
@@ -871,12 +916,15 @@ def benchmark(session: nox.Session):
                 "python",
                 "scripts/run_and_publish_benchmark.py",
                 f"--benchmark-path={benchmark}",
+                f"--iterations={args.iterations}",
             )
     finally:
         session.run(
             "python",
             "scripts/run_and_publish_benchmark.py",
             f"--publish-benchmarks={base_path}",
+            f"--iterations={args.iterations}",
+            f"--output-csv={args.output_csv}",
         )
 
 
diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py
index 4b3d4f4d96..a42301cb13 100644
--- a/scripts/run_and_publish_benchmark.py
+++ b/scripts/run_and_publish_benchmark.py
@@ -19,6 +19,7 @@
 import pathlib
 import subprocess
 import sys
+import tempfile
 from typing import Dict, List, Union
 
 import numpy as np
@@ -50,7 +51,7 @@ def run_benchmark_subprocess(args, log_env_name_var, filename=None, region=None)
     subprocess.run(args, env=env, check=True)
 
 
-def collect_benchmark_result(benchmark_path: str) -> pd.DataFrame:
+def collect_benchmark_result(benchmark_path: str, iterations: int) -> pd.DataFrame:
     """Generate a DataFrame report on HTTP queries, bytes processed, slot time and execution time from log files."""
     path = pathlib.Path(benchmark_path)
     try:
@@ -100,28 +101,23 @@ def collect_benchmark_result(benchmark_path: str) -> pd.DataFrame:
 
             with open(bytes_file, "r") as file:
                 lines = file.read().splitlines()
-                query_count = len(lines)
-                total_bytes = sum(int(line) for line in lines)
+                query_count = len(lines) / iterations
+                total_bytes = sum(int(line) for line in lines) / iterations
 
             with open(millis_file, "r") as file:
                 lines = file.read().splitlines()
-                total_slot_millis = sum(int(line) for line in lines)
+                total_slot_millis = sum(int(line) for line in lines) / iterations
 
             if has_local_seconds:
-                # 'local_seconds' captures the total execution time for a benchmark as it
-                # starts timing immediately before the benchmark code begins and stops
-                # immediately after it ends. Unlike other metrics that might accumulate
-                # values proportional to the number of queries executed, 'local_seconds' is
-                # a singular measure of the time taken for the complete execution of the
-                # benchmark, from start to finish.
                 with open(local_seconds_file, "r") as file:
-                    local_seconds = float(file.readline().strip())
+                    lines = file.read().splitlines()
+                    local_seconds = sum(float(line) for line in lines) / iterations
             else:
                 local_seconds = None
 
             with open(bq_seconds_file, "r") as file:
                 lines = file.read().splitlines()
-                bq_seconds = sum(float(line) for line in lines)
+                bq_seconds = sum(float(line) for line in lines) / iterations
 
             results_dict[str(filename)] = [
                 query_count,
@@ -154,7 +150,12 @@ def collect_benchmark_result(benchmark_path: str) -> pd.DataFrame:
         columns=columns,
     )
 
-    print("---BIGQUERY USAGE REPORT---")
+    report_title = (
+        "---BIGQUERY USAGE REPORT---"
+        if iterations == 1
+        else f"---BIGQUERY USAGE REPORT (Averages over {iterations} Iterations)---"
+    )
+    print(report_title)
     for index, row in benchmark_metrics.iterrows():
         formatted_local_exec_time = (
             f"{round(row['Local_Execution_Time_Sec'], 1)} seconds"
@@ -259,7 +260,26 @@ def find_config(start_path):
     return None
 
 
-def run_benchmark_from_config(benchmark: str):
+def publish_to_bigquery(dataframe, notebook, project_name="bigframes-metrics"):
+    bigquery_table = (
+        f"{project_name}.benchmark_report.notebook_benchmark"
+        if notebook
+        else f"{project_name}.benchmark_report.benchmark"
+    )
+
+    repo_status = get_repository_status()
+    for idx, col in enumerate(repo_status.keys()):
+        dataframe.insert(idx, col, repo_status[col])
+
+    pandas_gbq.to_gbq(
+        dataframe=dataframe,
+        destination_table=bigquery_table,
+        if_exists="append",
+    )
+    print(f"Results have been successfully uploaded to {bigquery_table}.")
+
+
+def run_benchmark_from_config(benchmark: str, iterations: int):
     print(benchmark)
     config_path = find_config(benchmark)
 
@@ -267,24 +287,26 @@ def run_benchmark_from_config(benchmark: str):
         benchmark_configs = []
         with open(config_path, "r") as f:
             for line in f:
-                config = json.loads(line)
-                python_args = [f"--{key}={value}" for key, value in config.items()]
-                suffix = (
-                    config["benchmark_suffix"]
-                    if "benchmark_suffix" in config
-                    else "_".join(f"{key}_{value}" for key, value in config.items())
-                )
-                benchmark_configs.append((suffix, python_args))
+                if line.strip():
+                    config = json.loads(line)
+                    python_args = [f"--{key}={value}" for key, value in config.items()]
+                    suffix = (
+                        config["benchmark_suffix"]
+                        if "benchmark_suffix" in config
+                        else "_".join(f"{key}_{value}" for key, value in config.items())
+                    )
+                    benchmark_configs.append((suffix, python_args))
     else:
         benchmark_configs = [(None, [])]
 
-    for benchmark_config in benchmark_configs:
-        args = ["python", str(benchmark)]
-        args.extend(benchmark_config[1])
-        log_env_name_var = str(benchmark)
-        if benchmark_config[0] is not None:
-            log_env_name_var += f"_{benchmark_config[0]}"
-        run_benchmark_subprocess(args=args, log_env_name_var=log_env_name_var)
+    for _ in range(iterations):
+        for benchmark_config in benchmark_configs:
+            args = ["python", str(benchmark)]
+            args.extend(benchmark_config[1])
+            log_env_name_var = str(benchmark)
+            if benchmark_config[0] is not None:
+                log_env_name_var += f"_{benchmark_config[0]}"
+            run_benchmark_subprocess(args=args, log_env_name_var=log_env_name_var)
 
 
 def run_notebook_benchmark(benchmark_file: str, region: str):
@@ -341,6 +363,19 @@ def parse_arguments():
         help="Set the benchmarks to be published to BigQuery.",
     )
 
+    parser.add_argument(
+        "--iterations",
+        type=int,
+        default=1,
+        help="Number of iterations to run each benchmark.",
+    )
+    parser.add_argument(
+        "--output-csv",
+        type=str,
+        default=None,
+        help="Determines whether to output results to a CSV file. If no location is provided, a temporary location is automatically generated.",
+    )
+
     return parser.parse_args()
 
 
@@ -348,28 +383,39 @@ def main():
     args = parse_arguments()
 
     if args.publish_benchmarks:
-        bigquery_table = (
-            "bigframes-metrics.benchmark_report.notebook_benchmark"
-            if args.notebook
-            else "bigframes-metrics.benchmark_report.benchmark"
+        benchmark_metrics = collect_benchmark_result(
+            args.publish_benchmarks, args.iterations
         )
-        benchmark_metrics = collect_benchmark_result(args.publish_benchmarks)
-
-        if os.getenv("BENCHMARK_AND_PUBLISH", "false") == "true":
-            repo_status = get_repository_status()
-            for idx, col in enumerate(repo_status.keys()):
-                benchmark_metrics.insert(idx, col, repo_status[col])
-
-            pandas_gbq.to_gbq(
-                dataframe=benchmark_metrics,
-                destination_table=bigquery_table,
-                if_exists="append",
+        # Output results to CSV without specifying a location
+        if args.output_csv == "True":
+            current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+            temp_file = tempfile.NamedTemporaryFile(
+                prefix=f"benchmark_{current_time}_", delete=False, suffix=".csv"
             )
-            print("Results have been successfully uploaded to BigQuery.")
+            benchmark_metrics.to_csv(temp_file.name, index=False)
+            print(
+                f"Benchmark result is saved to a temporary location: {temp_file.name}"
+            )
+            temp_file.close()
+        # Output results to CSV with specified a custom location
+        elif args.output_csv != "False":
+            benchmark_metrics.to_csv(args.output_csv, index=False)
+            print(f"Benchmark result is saved to: {args.output_csv}")
+
+        # Publish the benchmark metrics to BigQuery under the 'bigframes-metrics' project.
+        # The 'BENCHMARK_AND_PUBLISH' environment variable should be set to 'true' only
+        # in specific Kokoro sessions.
+        if os.getenv("BENCHMARK_AND_PUBLISH", "false") == "true":
+            publish_to_bigquery(benchmark_metrics, args.notebook)
+        # If the 'GCLOUD_BENCH_PUBLISH_PROJECT' environment variable is set, publish the
+        # benchmark metrics to a specified BigQuery table in the provided project. This is
+        # intended for local testing where the default behavior is not to publish results.
+        elif project := os.getenv("GCLOUD_BENCH_PUBLISH_PROJECT", ""):
+            publish_to_bigquery(benchmark_metrics, args.notebook, project)
     elif args.notebook:
         run_notebook_benchmark(args.benchmark_path, args.region)
     else:
-        run_benchmark_from_config(args.benchmark_path)
+        run_benchmark_from_config(args.benchmark_path, args.iterations)
 
 
 if __name__ == "__main__":
diff --git a/tests/benchmark/tpch/config.jsonl b/tests/benchmark/tpch/config.jsonl
index 1c8b4dd19a..e6f7a444f6 100644
--- a/tests/benchmark/tpch/config.jsonl
+++ b/tests/benchmark/tpch/config.jsonl
@@ -1,8 +1,10 @@
-{"benchmark_suffix": "1g_ordered", "dataset_id": "tpch_0001g", "ordered": true}
-{"benchmark_suffix": "1g_unordered", "dataset_id": "tpch_0001g", "ordered": false}
-{"benchmark_suffix": "10g_ordered", "dataset_id": "tpch_0010g", "ordered": true}
-{"benchmark_suffix": "10g_unordered", "dataset_id": "tpch_0010g", "ordered": false}
-{"benchmark_suffix": "100g_ordered", "dataset_id": "tpch_0100g", "ordered": true}
-{"benchmark_suffix": "100g_unordered", "dataset_id": "tpch_0100g", "ordered": false}
-{"benchmark_suffix": "1t_ordered", "dataset_id": "tpch_0001t", "ordered": true}
-{"benchmark_suffix": "1t_unordered", "dataset_id": "tpch_0001t", "ordered": false}
+{"benchmark_suffix": "1g_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0001g", "ordered": true}
+{"benchmark_suffix": "1g_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0001g", "ordered": false}
+{"benchmark_suffix": "10g_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0010g", "ordered": true}
+{"benchmark_suffix": "10g_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0010g", "ordered": false}
+{"benchmark_suffix": "100g_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0100g", "ordered": true}
+{"benchmark_suffix": "100g_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0100g", "ordered": false}
+{"benchmark_suffix": "1t_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0001t", "ordered": true}
+{"benchmark_suffix": "1t_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0001t", "ordered": false}
+{"benchmark_suffix": "10t_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0010t", "ordered": true}
+{"benchmark_suffix": "10t_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0010t", "ordered": false}
diff --git a/tests/benchmark/tpch/q1.py b/tests/benchmark/tpch/q1.py
index 398e9bf9b2..3f1c63967e 100644
--- a/tests/benchmark/tpch/q1.py
+++ b/tests/benchmark/tpch/q1.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q1 as vendored_tpch_q1
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q1.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q1.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q10.py b/tests/benchmark/tpch/q10.py
index e500a657b6..bea18975ca 100644
--- a/tests/benchmark/tpch/q10.py
+++ b/tests/benchmark/tpch/q10.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q10 as vendored_tpch_q10
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q10.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q10.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q11.py b/tests/benchmark/tpch/q11.py
index 46bd2f022c..538d8d3e5f 100644
--- a/tests/benchmark/tpch/q11.py
+++ b/tests/benchmark/tpch/q11.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q11 as vendored_tpch_q11
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q11.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q11.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q12.py b/tests/benchmark/tpch/q12.py
index d2fab97699..6503b543f4 100644
--- a/tests/benchmark/tpch/q12.py
+++ b/tests/benchmark/tpch/q12.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q12 as vendored_tpch_q12
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q12.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q12.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q13.py b/tests/benchmark/tpch/q13.py
index 24eec7090c..60c2101f6f 100644
--- a/tests/benchmark/tpch/q13.py
+++ b/tests/benchmark/tpch/q13.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q13 as vendored_tpch_q13
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q13.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q13.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q14.py b/tests/benchmark/tpch/q14.py
index 8aa7ed4d2e..1698a01628 100644
--- a/tests/benchmark/tpch/q14.py
+++ b/tests/benchmark/tpch/q14.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q14 as vendored_tpch_q14
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q14.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q14.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q15.py b/tests/benchmark/tpch/q15.py
index 511cfbc683..49e2ce4e92 100644
--- a/tests/benchmark/tpch/q15.py
+++ b/tests/benchmark/tpch/q15.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q15 as vendored_tpch_q15
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q15.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q15.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q16.py b/tests/benchmark/tpch/q16.py
index 1d1f4b5f30..ef6edf6b12 100644
--- a/tests/benchmark/tpch/q16.py
+++ b/tests/benchmark/tpch/q16.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q16 as vendored_tpch_q16
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q16.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q16.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q17.py b/tests/benchmark/tpch/q17.py
index e285cc9fca..2f680d206e 100644
--- a/tests/benchmark/tpch/q17.py
+++ b/tests/benchmark/tpch/q17.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q17 as vendored_tpch_q17
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q17.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q17.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q18.py b/tests/benchmark/tpch/q18.py
index de9e4f2f94..7336246f1b 100644
--- a/tests/benchmark/tpch/q18.py
+++ b/tests/benchmark/tpch/q18.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q18 as vendored_tpch_q18
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q18.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q18.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q19.py b/tests/benchmark/tpch/q19.py
index 7b13b0d250..3bf34794bf 100644
--- a/tests/benchmark/tpch/q19.py
+++ b/tests/benchmark/tpch/q19.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q19 as vendored_tpch_q19
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q19.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q19.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q2.py b/tests/benchmark/tpch/q2.py
index 57151e36bd..c738aae124 100644
--- a/tests/benchmark/tpch/q2.py
+++ b/tests/benchmark/tpch/q2.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q2 as vendored_tpch_q2
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q2.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q2.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q20.py b/tests/benchmark/tpch/q20.py
index e02e9306f8..1b254c6a78 100644
--- a/tests/benchmark/tpch/q20.py
+++ b/tests/benchmark/tpch/q20.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q20 as vendored_tpch_q20
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q20.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q20.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q21.py b/tests/benchmark/tpch/q21.py
index d123286c3e..18e8df87fe 100644
--- a/tests/benchmark/tpch/q21.py
+++ b/tests/benchmark/tpch/q21.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q21 as vendored_tpch_q21
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q21.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q21.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q22.py b/tests/benchmark/tpch/q22.py
index 841740da0e..6c10021c2b 100644
--- a/tests/benchmark/tpch/q22.py
+++ b/tests/benchmark/tpch/q22.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q22 as vendored_tpch_q22
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q22.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q22.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q3.py b/tests/benchmark/tpch/q3.py
index 28cdfed49c..5bcaaa0d5d 100644
--- a/tests/benchmark/tpch/q3.py
+++ b/tests/benchmark/tpch/q3.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q3 as vendored_tpch_q3
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q3.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q3.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q4.py b/tests/benchmark/tpch/q4.py
index aa67cc77a0..462c6336d1 100644
--- a/tests/benchmark/tpch/q4.py
+++ b/tests/benchmark/tpch/q4.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q4 as vendored_tpch_q4
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q4.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q4.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q5.py b/tests/benchmark/tpch/q5.py
index bc39b76e82..108cde58cc 100644
--- a/tests/benchmark/tpch/q5.py
+++ b/tests/benchmark/tpch/q5.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q5 as vendored_tpch_q5
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q5.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q5.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q6.py b/tests/benchmark/tpch/q6.py
index a193333045..ccefc1b0bf 100644
--- a/tests/benchmark/tpch/q6.py
+++ b/tests/benchmark/tpch/q6.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q6 as vendored_tpch_q6
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q6.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q6.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q7.py b/tests/benchmark/tpch/q7.py
index 8a17eb91ea..0cad599a60 100644
--- a/tests/benchmark/tpch/q7.py
+++ b/tests/benchmark/tpch/q7.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q7 as vendored_tpch_q7
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q7.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q7.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q8.py b/tests/benchmark/tpch/q8.py
index b5e7d7aa37..6c6ac23b9b 100644
--- a/tests/benchmark/tpch/q8.py
+++ b/tests/benchmark/tpch/q8.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q8 as vendored_tpch_q8
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q8.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q8.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/tpch/q9.py b/tests/benchmark/tpch/q9.py
index c098e9a8da..05c82fb66e 100644
--- a/tests/benchmark/tpch/q9.py
+++ b/tests/benchmark/tpch/q9.py
@@ -17,9 +17,9 @@
 import bigframes_vendored.tpch.queries.q9 as vendored_tpch_q9
 
 if __name__ == "__main__":
-    dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_tpch_q9.q, current_path, suffix, dataset_id, session
+        vendored_tpch_q9.q, current_path, suffix, project_id, dataset_id, session
     )
diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py
index c286d4e229..32be33fc74 100644
--- a/tests/benchmark/utils.py
+++ b/tests/benchmark/utils.py
@@ -43,6 +43,12 @@ def get_dbbenchmark_configuration():
 
 def get_tpch_configuration():
     parser = argparse.ArgumentParser(description="Process TPC-H Query using BigFrames.")
+    parser.add_argument(
+        "--project_id",
+        type=str,
+        required=True,
+        help="The BigQuery dataset ID to query.",
+    )
     parser.add_argument(
         "--dataset_id",
         type=str,
@@ -62,7 +68,7 @@ def get_tpch_configuration():
 
     args = parser.parse_args()
     session = _initialize_session(_str_to_bool(args.ordered))
-    return args.dataset_id, session, args.benchmark_suffix
+    return args.project_id, args.dataset_id, session, args.benchmark_suffix
 
 
 def get_execution_time(func, current_path, suffix, *args, **kwargs):
@@ -73,7 +79,7 @@ def get_execution_time(func, current_path, suffix, *args, **kwargs):
 
     clock_time_file_path = f"{current_path}_{suffix}.local_exec_time_seconds"
 
-    with open(clock_time_file_path, "w") as log_file:
+    with open(clock_time_file_path, "a") as log_file:
         log_file.write(f"{runtime}\n")
 
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q1.py b/third_party/bigframes_vendored/tpch/queries/q1.py
index 3f155ec1c9..e1fdf85f58 100644
--- a/third_party/bigframes_vendored/tpch/queries/q1.py
+++ b/third_party/bigframes_vendored/tpch/queries/q1.py
@@ -7,9 +7,9 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q10.py b/third_party/bigframes_vendored/tpch/queries/q10.py
index 95b7b87b0e..75a8f2de7f 100644
--- a/third_party/bigframes_vendored/tpch/queries/q10.py
+++ b/third_party/bigframes_vendored/tpch/queries/q10.py
@@ -7,21 +7,21 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     customer = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.CUSTOMER",
+        f"{project_id}.{dataset_id}.CUSTOMER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     nation = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.NATION",
+        f"{project_id}.{dataset_id}.NATION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q11.py b/third_party/bigframes_vendored/tpch/queries/q11.py
index 269728f6f1..484a7c0001 100644
--- a/third_party/bigframes_vendored/tpch/queries/q11.py
+++ b/third_party/bigframes_vendored/tpch/queries/q11.py
@@ -4,17 +4,17 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     supplier = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
+        f"{project_id}.{dataset_id}.SUPPLIER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     partsupp = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PARTSUPP",
+        f"{project_id}.{dataset_id}.PARTSUPP",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     nation = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.NATION",
+        f"{project_id}.{dataset_id}.NATION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q12.py b/third_party/bigframes_vendored/tpch/queries/q12.py
index 1c39275932..e2b7aaf9f2 100644
--- a/third_party/bigframes_vendored/tpch/queries/q12.py
+++ b/third_party/bigframes_vendored/tpch/queries/q12.py
@@ -7,13 +7,13 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q13.py b/third_party/bigframes_vendored/tpch/queries/q13.py
index dd69cf509f..ea2f0da284 100644
--- a/third_party/bigframes_vendored/tpch/queries/q13.py
+++ b/third_party/bigframes_vendored/tpch/queries/q13.py
@@ -6,13 +6,13 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     customer = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.CUSTOMER",
+        f"{project_id}.{dataset_id}.CUSTOMER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q14.py b/third_party/bigframes_vendored/tpch/queries/q14.py
index 8c25a5897d..27f3d9e224 100644
--- a/third_party/bigframes_vendored/tpch/queries/q14.py
+++ b/third_party/bigframes_vendored/tpch/queries/q14.py
@@ -5,13 +5,13 @@
 import bigframes
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     part = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PART",
+        f"{project_id}.{dataset_id}.PART",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q15.py b/third_party/bigframes_vendored/tpch/queries/q15.py
index fae3010e36..042adbda8b 100644
--- a/third_party/bigframes_vendored/tpch/queries/q15.py
+++ b/third_party/bigframes_vendored/tpch/queries/q15.py
@@ -6,13 +6,13 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     supplier = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
+        f"{project_id}.{dataset_id}.SUPPLIER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q16.py b/third_party/bigframes_vendored/tpch/queries/q16.py
index b5eb62547f..1bd2795c42 100644
--- a/third_party/bigframes_vendored/tpch/queries/q16.py
+++ b/third_party/bigframes_vendored/tpch/queries/q16.py
@@ -4,17 +4,17 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     part = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PART",
+        f"{project_id}.{dataset_id}.PART",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     partsupp = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PARTSUPP",
+        f"{project_id}.{dataset_id}.PARTSUPP",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     supplier = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
+        f"{project_id}.{dataset_id}.SUPPLIER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q17.py b/third_party/bigframes_vendored/tpch/queries/q17.py
index a95d128b5c..0bd1c44315 100644
--- a/third_party/bigframes_vendored/tpch/queries/q17.py
+++ b/third_party/bigframes_vendored/tpch/queries/q17.py
@@ -4,13 +4,13 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     part = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PART",
+        f"{project_id}.{dataset_id}.PART",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q18.py b/third_party/bigframes_vendored/tpch/queries/q18.py
index dac9f5c438..f645a08681 100644
--- a/third_party/bigframes_vendored/tpch/queries/q18.py
+++ b/third_party/bigframes_vendored/tpch/queries/q18.py
@@ -6,17 +6,17 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     customer = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.CUSTOMER",
+        f"{project_id}.{dataset_id}.CUSTOMER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q19.py b/third_party/bigframes_vendored/tpch/queries/q19.py
index 526d0aa1a6..1371af53fc 100644
--- a/third_party/bigframes_vendored/tpch/queries/q19.py
+++ b/third_party/bigframes_vendored/tpch/queries/q19.py
@@ -4,13 +4,13 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     part = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PART",
+        f"{project_id}.{dataset_id}.PART",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q2.py b/third_party/bigframes_vendored/tpch/queries/q2.py
index 0586a5d1a5..f388252993 100644
--- a/third_party/bigframes_vendored/tpch/queries/q2.py
+++ b/third_party/bigframes_vendored/tpch/queries/q2.py
@@ -3,25 +3,25 @@
 import bigframes
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     region = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.REGION",
+        f"{project_id}.{dataset_id}.REGION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     nation = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.NATION",
+        f"{project_id}.{dataset_id}.NATION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     supplier = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
+        f"{project_id}.{dataset_id}.SUPPLIER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     part = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PART",
+        f"{project_id}.{dataset_id}.PART",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     partsupp = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PARTSUPP",
+        f"{project_id}.{dataset_id}.PARTSUPP",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q20.py b/third_party/bigframes_vendored/tpch/queries/q20.py
index 671d7e06fb..26651a31c4 100644
--- a/third_party/bigframes_vendored/tpch/queries/q20.py
+++ b/third_party/bigframes_vendored/tpch/queries/q20.py
@@ -6,25 +6,25 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     nation = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.NATION",
+        f"{project_id}.{dataset_id}.NATION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     part = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PART",
+        f"{project_id}.{dataset_id}.PART",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     partsupp = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PARTSUPP",
+        f"{project_id}.{dataset_id}.PARTSUPP",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     supplier = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
+        f"{project_id}.{dataset_id}.SUPPLIER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q21.py b/third_party/bigframes_vendored/tpch/queries/q21.py
index 3a4ea495c9..b844cd6f6b 100644
--- a/third_party/bigframes_vendored/tpch/queries/q21.py
+++ b/third_party/bigframes_vendored/tpch/queries/q21.py
@@ -6,21 +6,21 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     nation = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.NATION",
+        f"{project_id}.{dataset_id}.NATION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     supplier = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
+        f"{project_id}.{dataset_id}.SUPPLIER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q22.py b/third_party/bigframes_vendored/tpch/queries/q22.py
index 97180cd11a..137a7d5c36 100644
--- a/third_party/bigframes_vendored/tpch/queries/q22.py
+++ b/third_party/bigframes_vendored/tpch/queries/q22.py
@@ -4,13 +4,13 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     customer = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.CUSTOMER",
+        f"{project_id}.{dataset_id}.CUSTOMER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q3.py b/third_party/bigframes_vendored/tpch/queries/q3.py
index 94b85f2054..fb09abe159 100644
--- a/third_party/bigframes_vendored/tpch/queries/q3.py
+++ b/third_party/bigframes_vendored/tpch/queries/q3.py
@@ -5,17 +5,17 @@
 import bigframes
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     customer = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.CUSTOMER",
+        f"{project_id}.{dataset_id}.CUSTOMER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q4.py b/third_party/bigframes_vendored/tpch/queries/q4.py
index 9cc6f73c98..b89f70845f 100644
--- a/third_party/bigframes_vendored/tpch/queries/q4.py
+++ b/third_party/bigframes_vendored/tpch/queries/q4.py
@@ -8,13 +8,13 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q5.py b/third_party/bigframes_vendored/tpch/queries/q5.py
index b3a9b5e614..9839c025a5 100644
--- a/third_party/bigframes_vendored/tpch/queries/q5.py
+++ b/third_party/bigframes_vendored/tpch/queries/q5.py
@@ -5,29 +5,29 @@
 import bigframes
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     region = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.REGION",
+        f"{project_id}.{dataset_id}.REGION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     nation = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.NATION",
+        f"{project_id}.{dataset_id}.NATION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     customer = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.CUSTOMER",
+        f"{project_id}.{dataset_id}.CUSTOMER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     supplier = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
+        f"{project_id}.{dataset_id}.SUPPLIER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q6.py b/third_party/bigframes_vendored/tpch/queries/q6.py
index 13341c4f4e..b883837fe2 100644
--- a/third_party/bigframes_vendored/tpch/queries/q6.py
+++ b/third_party/bigframes_vendored/tpch/queries/q6.py
@@ -5,9 +5,9 @@
 import bigframes
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q7.py b/third_party/bigframes_vendored/tpch/queries/q7.py
index d922efd1e2..a4dfe3f12e 100644
--- a/third_party/bigframes_vendored/tpch/queries/q7.py
+++ b/third_party/bigframes_vendored/tpch/queries/q7.py
@@ -8,25 +8,25 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     nation = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.NATION",
+        f"{project_id}.{dataset_id}.NATION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     customer = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.CUSTOMER",
+        f"{project_id}.{dataset_id}.CUSTOMER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     supplier = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
+        f"{project_id}.{dataset_id}.SUPPLIER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q8.py b/third_party/bigframes_vendored/tpch/queries/q8.py
index 4520fce14a..1676ec6349 100644
--- a/third_party/bigframes_vendored/tpch/queries/q8.py
+++ b/third_party/bigframes_vendored/tpch/queries/q8.py
@@ -5,33 +5,33 @@
 import bigframes
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     customer = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.CUSTOMER",
+        f"{project_id}.{dataset_id}.CUSTOMER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     nation = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.NATION",
+        f"{project_id}.{dataset_id}.NATION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     part = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PART",
+        f"{project_id}.{dataset_id}.PART",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     region = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.REGION",
+        f"{project_id}.{dataset_id}.REGION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     supplier = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
+        f"{project_id}.{dataset_id}.SUPPLIER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 
diff --git a/third_party/bigframes_vendored/tpch/queries/q9.py b/third_party/bigframes_vendored/tpch/queries/q9.py
index d04117f0f1..c2b52789bd 100644
--- a/third_party/bigframes_vendored/tpch/queries/q9.py
+++ b/third_party/bigframes_vendored/tpch/queries/q9.py
@@ -6,29 +6,29 @@
 import bigframes.pandas as bpd
 
 
-def q(dataset_id: str, session: bigframes.Session):
+def q(project_id: str, dataset_id: str, session: bigframes.Session):
     lineitem = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.LINEITEM",
+        f"{project_id}.{dataset_id}.LINEITEM",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     nation = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.NATION",
+        f"{project_id}.{dataset_id}.NATION",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     orders = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.ORDERS",
+        f"{project_id}.{dataset_id}.ORDERS",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     part = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PART",
+        f"{project_id}.{dataset_id}.PART",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     partsupp = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.PARTSUPP",
+        f"{project_id}.{dataset_id}.PARTSUPP",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
     supplier = session.read_gbq(
-        f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
+        f"{project_id}.{dataset_id}.SUPPLIER",
         index_col=bigframes.enums.DefaultIndexKind.NULL,
     )
 

From d2fb49f84d3e27d79d3248227d057fa1f57bb897 Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Wed, 18 Sep 2024 13:21:14 -0700
Subject: [PATCH 18/18] chore(main): release 1.18.0 (#986)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 CHANGELOG.md                              | 21 +++++++++++++++++++++
 bigframes/version.py                      |  2 +-
 third_party/bigframes_vendored/version.py |  2 +-
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a989d8af66..c210f5c991 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,27 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [1.18.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.17.0...v1.18.0) (2024-09-18)
+
+
+### Features
+
+* Add "include" param to describe for string types ([#973](https://github.com/googleapis/python-bigquery-dataframes/issues/973)) ([deac6d2](https://github.com/googleapis/python-bigquery-dataframes/commit/deac6d2d6e459b26c05f6e5ff328ea03a3cff45f))
+* Add `subset` parameter to `DataFrame.dropna` to select which columns to consider ([#981](https://github.com/googleapis/python-bigquery-dataframes/issues/981)) ([f7c03dc](https://github.com/googleapis/python-bigquery-dataframes/commit/f7c03dcaf7ee4d62497f6653851e390795fc60a2))
+
+
+### Bug Fixes
+
+* DataFrameGroupby.agg now works with unnamed tuples ([#985](https://github.com/googleapis/python-bigquery-dataframes/issues/985)) ([0f047b4](https://github.com/googleapis/python-bigquery-dataframes/commit/0f047b4fae2a10b2a465c506bea561f8bb8d4262))
+* Fix a bug that raises exception when re-indexing columns with their original order ([#988](https://github.com/googleapis/python-bigquery-dataframes/issues/988)) ([596b03b](https://github.com/googleapis/python-bigquery-dataframes/commit/596b03bb3ea27cead9b90200b9ef3cdcd99ca184))
+* Make the `Series.apply` outcome `assign`able to the original dataframe in partial ordering mode ([#874](https://github.com/googleapis/python-bigquery-dataframes/issues/874)) ([c94ead9](https://github.com/googleapis/python-bigquery-dataframes/commit/c94ead996e3bfa98edd51ff678a3d43a10ee980f))
+
+
+### Dependencies
+
+* Limit ibis-framework version to 9.2.0 ([#989](https://github.com/googleapis/python-bigquery-dataframes/issues/989)) ([06c1b33](https://github.com/googleapis/python-bigquery-dataframes/commit/06c1b3396d77d1de4f927328bae70cd7b3eb0b0b))
+* Update to ibis-framework 9.x and newer sqlglot ([#827](https://github.com/googleapis/python-bigquery-dataframes/issues/827)) ([89ea44f](https://github.com/googleapis/python-bigquery-dataframes/commit/89ea44fb66314b134fc0a10d816c1659978d4182))
+
 ## [1.17.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.16.0...v1.17.0) (2024-09-11)
 
 
diff --git a/bigframes/version.py b/bigframes/version.py
index 2c0c6e4d3a..745632616c 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.17.0"
+__version__ = "1.18.0"
diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py
index 2c0c6e4d3a..745632616c 100644
--- a/third_party/bigframes_vendored/version.py
+++ b/third_party/bigframes_vendored/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.17.0"
+__version__ = "1.18.0"