diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 323ef8f07a..f839c3c0a4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -46,4 +46,4 @@ repos: rev: v2.0.2 hooks: - id: biome-check - files: '\.js$' + files: '\.(js|css)$' diff --git a/CHANGELOG.md b/CHANGELOG.md index 142edaa9d5..89c1b1af08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,26 @@ [1]: https://pypi.org/project/bigframes/#history +## [2.13.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.12.0...v2.13.0) (2025-07-25) + + +### Features + +* _read_gbq_colab creates hybrid session ([#1901](https://github.com/googleapis/python-bigquery-dataframes/issues/1901)) ([31b17b0](https://github.com/googleapis/python-bigquery-dataframes/commit/31b17b01706ccfcee9a2d838c43a9609ec4dc218)) +* Add CSS styling for TableWidget pagination interface ([#1934](https://github.com/googleapis/python-bigquery-dataframes/issues/1934)) ([5b232d7](https://github.com/googleapis/python-bigquery-dataframes/commit/5b232d7e33563196316f5dbb50b28c6be388d440)) +* Add row numbering local pushdown in hybrid execution ([#1932](https://github.com/googleapis/python-bigquery-dataframes/issues/1932)) ([92a2377](https://github.com/googleapis/python-bigquery-dataframes/commit/92a237712aa4ce516b1a44748127b34d7780fff6)) +* Implement Index.get_loc ([#1921](https://github.com/googleapis/python-bigquery-dataframes/issues/1921)) ([bbbcaf3](https://github.com/googleapis/python-bigquery-dataframes/commit/bbbcaf35df113617fd6bb8ae36468cf3f7ab493b)) + + +### Bug Fixes + +* Add license header and correct issues in dbt sample ([#1931](https://github.com/googleapis/python-bigquery-dataframes/issues/1931)) ([ab01b0a](https://github.com/googleapis/python-bigquery-dataframes/commit/ab01b0a236ffc7b667f258e0497105ea5c3d3aab)) + + +### Dependencies + +* Replace `google-cloud-iam` with `grpc-google-iam-v1` ([#1864](https://github.com/googleapis/python-bigquery-dataframes/issues/1864)) ([e5ff8f7](https://github.com/googleapis/python-bigquery-dataframes/commit/e5ff8f7d9fdac3ea47dabcc80a2598d601f39e64)) + ## [2.12.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.11.0...v2.12.0) (2025-07-23) diff --git a/MANIFEST.in b/MANIFEST.in index e0deb6deb2..c8555a39bf 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -17,7 +17,7 @@ # Generated by synthtool. DO NOT EDIT! include README.rst LICENSE recursive-include third_party/bigframes_vendored * -recursive-include bigframes *.json *.proto *.js py.typed +recursive-include bigframes *.json *.proto *.js *.css py.typed recursive-include tests * global-exclude *.py[co] global-exclude __pycache__ diff --git a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py index 22079a9a6d..609ac374b6 100644 --- a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py @@ -175,6 +175,16 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: ) +@UNARY_OP_REGISTRATION.register(ops.StrContainsRegexOp) +def _(op: ops.StrContainsRegexOp, expr: TypedExpr) -> sge.Expression: + return sge.RegexpLike(this=expr.expr, expression=sge.convert(op.pat)) + + +@UNARY_OP_REGISTRATION.register(ops.StrContainsOp) +def _(op: ops.StrContainsOp, expr: TypedExpr) -> sge.Expression: + return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%")) + + @UNARY_OP_REGISTRATION.register(ops.date_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.Date(this=expr.expr) @@ -229,6 +239,26 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.Floor(this=expr.expr) +@UNARY_OP_REGISTRATION.register(ops.geo_area_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("ST_AREA", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.geo_st_astext_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("ST_ASTEXT", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.geo_x_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("SAFE.ST_X", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.geo_y_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("SAFE.ST_Y", expr.expr) + + @UNARY_OP_REGISTRATION.register(ops.hash_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.func("FARM_FINGERPRINT", expr.expr) @@ -302,6 +332,123 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: ) +@UNARY_OP_REGISTRATION.register(ops.len_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Length(this=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.ln_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(0), + true=_NAN, + ) + ], + default=sge.Ln(this=expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.log10_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(0), + true=_NAN, + ) + ], + default=sge.Log(this=expr.expr, expression=sge.convert(10)), + ) + + +@UNARY_OP_REGISTRATION.register(ops.log1p_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(-1), + true=_NAN, + ) + ], + default=sge.Ln(this=sge.convert(1) + expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.lower_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Lower(this=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.minute_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="MINUTE"), expression=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.month_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="MONTH"), expression=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.StrLstripOp) +def _(op: ops.StrLstripOp, expr: TypedExpr) -> sge.Expression: + return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT") + + +@UNARY_OP_REGISTRATION.register(ops.neg_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Neg(this=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.normalize_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this="DAY")) + + +@UNARY_OP_REGISTRATION.register(ops.pos_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return expr.expr + + +@UNARY_OP_REGISTRATION.register(ops.quarter_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="QUARTER"), expression=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.reverse_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("REVERSE", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.second_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="SECOND"), expression=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.StrRstripOp) +def _(op: ops.StrRstripOp, expr: TypedExpr) -> sge.Expression: + return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="RIGHT") + + +@UNARY_OP_REGISTRATION.register(ops.sqrt_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(0), + true=_NAN, + ) + ], + default=sge.Sqrt(this=expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.StrStripOp) +def _(op: ops.StrStripOp, expr: TypedExpr) -> sge.Expression: + return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr) + + @UNARY_OP_REGISTRATION.register(ops.iso_day_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr) @@ -312,6 +459,11 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr) +@UNARY_OP_REGISTRATION.register(ops.iso_year_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr) + + @UNARY_OP_REGISTRATION.register(ops.isnull_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.Is(this=expr.expr, expression=sge.Null()) @@ -340,6 +492,31 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: ) +@UNARY_OP_REGISTRATION.register(ops.StrGetOp) +def _(op: ops.StrGetOp, expr: TypedExpr) -> sge.Expression: + return sge.Substring( + this=expr.expr, + start=sge.convert(op.i + 1), + length=sge.convert(1), + ) + + +@UNARY_OP_REGISTRATION.register(ops.StrSliceOp) +def _(op: ops.StrSliceOp, expr: TypedExpr) -> sge.Expression: + start = op.start + 1 if op.start is not None else None + if op.end is None: + length = None + elif op.start is None: + length = op.end + else: + length = op.end - op.start + return sge.Substring( + this=expr.expr, + start=sge.convert(start) if start is not None else None, + length=sge.convert(length) if length is not None else None, + ) + + @UNARY_OP_REGISTRATION.register(ops.tan_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.func("TAN", expr.expr) @@ -350,6 +527,16 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.func("TANH", expr.expr) +@UNARY_OP_REGISTRATION.register(ops.time_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("TIME", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.timedelta_floor_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Floor(this=expr.expr) + + # JSON Ops @UNARY_OP_REGISTRATION.register(ops.JSONExtract) def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression: @@ -394,3 +581,13 @@ def _(op: ops.ParseJSON, expr: TypedExpr) -> sge.Expression: @UNARY_OP_REGISTRATION.register(ops.ToJSONString) def _(op: ops.ToJSONString, expr: TypedExpr) -> sge.Expression: return sge.func("TO_JSON_STRING", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.upper_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Upper(this=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.year_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="YEAR"), expression=expr.expr) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index b442f87aec..2bb58da330 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -27,16 +27,21 @@ import pandas from bigframes import dtypes +from bigframes.core.array_value import ArrayValue import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks import bigframes.core.expression as ex +import bigframes.core.identifiers as ids +import bigframes.core.nodes as nodes import bigframes.core.ordering as order import bigframes.core.utils as utils import bigframes.core.validations as validations +import bigframes.core.window_spec as window_spec import bigframes.dtypes import bigframes.formatting_helpers as formatter import bigframes.operations as ops import bigframes.operations.aggregations as agg_ops +import bigframes.series if typing.TYPE_CHECKING: import bigframes.dataframe @@ -247,6 +252,118 @@ def query_job(self) -> bigquery.QueryJob: self._query_job = query_job return self._query_job + def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]: + """Get integer location, slice or boolean mask for requested label. + + Args: + key: + The label to search for in the index. + + Returns: + An integer, slice, or boolean mask representing the location(s) of the key. + + Raises: + NotImplementedError: If the index has more than one level. + KeyError: If the key is not found in the index. + """ + if self.nlevels != 1: + raise NotImplementedError("get_loc only supports single-level indexes") + + # Get the index column from the block + index_column = self._block.index_columns[0] + + # Apply row numbering to the original data + row_number_column_id = ids.ColumnId.unique() + window_node = nodes.WindowOpNode( + child=self._block._expr.node, + expression=ex.NullaryAggregation(agg_ops.RowNumberOp()), + window_spec=window_spec.unbound(), + output_name=row_number_column_id, + never_skip_nulls=True, + ) + + windowed_array = ArrayValue(window_node) + windowed_block = blocks.Block( + windowed_array, + index_columns=self._block.index_columns, + column_labels=self._block.column_labels.insert( + len(self._block.column_labels), None + ), + index_labels=self._block._index_labels, + ) + + # Create expression to find matching positions + match_expr = ops.eq_op.as_expr(ex.deref(index_column), ex.const(key)) + windowed_block, match_col_id = windowed_block.project_expr(match_expr) + + # Filter to only rows where the key matches + filtered_block = windowed_block.filter_by_id(match_col_id) + + # Check if key exists at all by counting on the filtered block + count_agg = ex.UnaryAggregation( + agg_ops.count_op, ex.deref(row_number_column_id.name) + ) + count_result = filtered_block._expr.aggregate([(count_agg, "count")]) + count_scalar = self._block.session._executor.execute( + count_result + ).to_py_scalar() + + if count_scalar == 0: + raise KeyError(f"'{key}' is not in index") + + # If only one match, return integer position + if count_scalar == 1: + min_agg = ex.UnaryAggregation( + agg_ops.min_op, ex.deref(row_number_column_id.name) + ) + position_result = filtered_block._expr.aggregate([(min_agg, "position")]) + position_scalar = self._block.session._executor.execute( + position_result + ).to_py_scalar() + return int(position_scalar) + + # Handle multiple matches based on index monotonicity + is_monotonic = self.is_monotonic_increasing or self.is_monotonic_decreasing + if is_monotonic: + return self._get_monotonic_slice(filtered_block, row_number_column_id) + else: + # Return boolean mask for non-monotonic duplicates + mask_block = windowed_block.select_columns([match_col_id]) + # Reset the index to use positional integers instead of original index values + mask_block = mask_block.reset_index(drop=True) + # Ensure correct dtype and name to match pandas behavior + result_series = bigframes.series.Series(mask_block) + return result_series.astype("boolean") + + def _get_monotonic_slice( + self, filtered_block, row_number_column_id: "ids.ColumnId" + ) -> slice: + """Helper method to get a slice for monotonic duplicates with an optimized query.""" + # Combine min and max aggregations into a single query for efficiency + min_max_aggs = [ + ( + ex.UnaryAggregation( + agg_ops.min_op, ex.deref(row_number_column_id.name) + ), + "min_pos", + ), + ( + ex.UnaryAggregation( + agg_ops.max_op, ex.deref(row_number_column_id.name) + ), + "max_pos", + ), + ] + combined_result = filtered_block._expr.aggregate(min_max_aggs) + + # Execute query and extract positions + result_df = self._block.session._executor.execute(combined_result).to_pandas() + min_pos = int(result_df["min_pos"].iloc[0]) + max_pos = int(result_df["max_pos"].iloc[0]) + + # Create slice (stop is exclusive) + return slice(min_pos, max_pos + 1) + def __repr__(self) -> str: # Protect against errors with uninitialized Series. See: # https://github.com/googleapis/python-bigquery-dataframes/issues/728 diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 04d82c97fe..8bbb72c11a 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -62,20 +62,20 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): super().__init__() self._dataframe = dataframe - # respect display options - self.page_size = bigframes.options.display.max_rows - - # Initialize data fetching attributes. - self._batches = dataframe.to_pandas_batches(page_size=self.page_size) - - # Use list of DataFrames to avoid memory copies from concatenation - self._cached_batches: List[pd.DataFrame] = [] - - # Unique identifier for HTML table element + # Initialize attributes that might be needed by observers FIRST self._table_id = str(uuid.uuid4()) self._all_data_loaded = False - # Renamed from _batch_iterator to _batch_iter to avoid naming conflict self._batch_iter: Optional[Iterator[pd.DataFrame]] = None + self._cached_batches: List[pd.DataFrame] = [] + + # respect display options for initial page size + initial_page_size = bigframes.options.display.max_rows + + # Initialize data fetching attributes. + self._batches = dataframe.to_pandas_batches(page_size=initial_page_size) + + # set traitlets properties that trigger observers + self.page_size = initial_page_size # len(dataframe) is expensive, since it will trigger a # SELECT COUNT(*) query. It is a must have however. @@ -91,18 +91,26 @@ def _esm(self): """Load JavaScript code from external file.""" return resources.read_text(bigframes.display, "table_widget.js") + @functools.cached_property + def _css(self): + """Load CSS code from external file.""" + return resources.read_text(bigframes.display, "table_widget.css") + page = traitlets.Int(0).tag(sync=True) page_size = traitlets.Int(25).tag(sync=True) row_count = traitlets.Int(0).tag(sync=True) table_html = traitlets.Unicode().tag(sync=True) @traitlets.validate("page") - def _validate_page(self, proposal: Dict[str, Any]): + def _validate_page(self, proposal: Dict[str, Any]) -> int: """Validate and clamp the page number to a valid range. Args: proposal: A dictionary from the traitlets library containing the proposed change. The new value is in proposal["value"]. + + Returns: + The validated and clamped page number as an integer. """ value = proposal["value"] @@ -115,11 +123,32 @@ def _validate_page(self, proposal: Dict[str, Any]): # Clamp the proposed value to the valid range [0, max_page]. return max(0, min(value, max_page)) + @traitlets.validate("page_size") + def _validate_page_size(self, proposal: Dict[str, Any]) -> int: + """Validate page size to ensure it's positive and reasonable. + + Args: + proposal: A dictionary from the traitlets library containing the + proposed change. The new value is in proposal["value"]. + + Returns: + The validated page size as an integer. + """ + value = proposal["value"] + + # Ensure page size is positive and within reasonable bounds + if value <= 0: + return self.page_size # Keep current value + + # Cap at reasonable maximum to prevent performance issues + max_page_size = 1000 + return min(value, max_page_size) + def _get_next_batch(self) -> bool: """ Gets the next batch of data from the generator and appends to cache. - Return: + Returns: True if a batch was successfully loaded, False otherwise. """ if self._all_data_loaded: @@ -148,6 +177,13 @@ def _cached_data(self) -> pd.DataFrame: return pd.DataFrame(columns=self._dataframe.columns) return pd.concat(self._cached_batches, ignore_index=True) + def _reset_batches_for_new_page_size(self): + """Reset the batch iterator when page size changes.""" + self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size) + self._cached_batches = [] + self._batch_iter = None + self._all_data_loaded = False + def _set_table_html(self): """Sets the current html data based on the current page and page size.""" start = self.page * self.page_size @@ -174,6 +210,18 @@ def _set_table_html(self): ) @traitlets.observe("page") - def _page_changed(self, change): + def _page_changed(self, _change: Dict[str, Any]): """Handler for when the page number is changed from the frontend.""" self._set_table_html() + + @traitlets.observe("page_size") + def _page_size_changed(self, _change: Dict[str, Any]): + """Handler for when the page size is changed from the frontend.""" + # Reset the page to 0 when page size changes to avoid invalid page states + self.page = 0 + + # Reset batches to use new page size for future data fetching + self._reset_batches_for_new_page_size() + + # Update the table display + self._set_table_html() diff --git a/bigframes/display/table_widget.css b/bigframes/display/table_widget.css new file mode 100644 index 0000000000..790b6ae1bc --- /dev/null +++ b/bigframes/display/table_widget.css @@ -0,0 +1,76 @@ +/** + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +.bigframes-widget .table-container { + max-height: 620px; + overflow: auto; +} + +.bigframes-widget .footer { + align-items: center; + display: flex; + font-size: 0.8rem; + padding-top: 8px; +} + +.bigframes-widget .footer > * { + flex: 1; +} + +.bigframes-widget .pagination { + align-items: center; + display: flex; + flex-direction: row; + gap: 4px; + justify-content: center; + padding: 4px; +} + +.bigframes-widget .page-size { + align-items: center; + display: flex; + flex-direction: row; + gap: 4px; + justify-content: end; +} + +.bigframes-widget table { + border-collapse: collapse; + text-align: left; + width: 100%; +} + +.bigframes-widget th { + background-color: var(--colab-primary-surface-color, var(--jp-layout-color0)); + /* Uncomment once we support sorting: cursor: pointer; */ + position: sticky; + top: 0; + z-index: 1; +} + +.bigframes-widget button { + cursor: pointer; + display: inline-block; + text-align: center; + text-decoration: none; + user-select: none; + vertical-align: middle; +} + +.bigframes-widget button:disabled { + opacity: 0.65; + pointer-events: none; +} diff --git a/bigframes/display/table_widget.js b/bigframes/display/table_widget.js index 71484af4d5..7fff72d1ba 100644 --- a/bigframes/display/table_widget.js +++ b/bigframes/display/table_widget.js @@ -15,81 +15,147 @@ */ const ModelProperty = { - TABLE_HTML: "table_html", - ROW_COUNT: "row_count", - PAGE_SIZE: "page_size", PAGE: "page", + PAGE_SIZE: "page_size", + ROW_COUNT: "row_count", + TABLE_HTML: "table_html", }; const Event = { + CHANGE: "change", CHANGE_TABLE_HTML: `change:${ModelProperty.TABLE_HTML}`, CLICK: "click", }; /** - * Renders a paginated table and its controls into a given element. + * Renders the interactive table widget. * @param {{ - * model: !Backbone.Model, - * el: !HTMLElement + * model: any, + * el: HTMLElement * }} options */ function render({ model, el }) { + // Main container with a unique class for CSS scoping const container = document.createElement("div"); - container.innerHTML = model.get(ModelProperty.TABLE_HTML); + container.classList.add("bigframes-widget"); + + // Structure + const tableContainer = document.createElement("div"); + const footer = document.createElement("div"); - const buttonContainer = document.createElement("div"); + // Footer: Total rows label + const rowCountLabel = document.createElement("div"); + + // Footer: Pagination controls + const paginationContainer = document.createElement("div"); const prevPage = document.createElement("button"); - const label = document.createElement("span"); + const paginationLabel = document.createElement("span"); const nextPage = document.createElement("button"); + // Footer: Page size controls + const pageSizeContainer = document.createElement("div"); + const pageSizeLabel = document.createElement("label"); + const pageSizeSelect = document.createElement("select"); + + // Add CSS classes + tableContainer.classList.add("table-container"); + footer.classList.add("footer"); + paginationContainer.classList.add("pagination"); + pageSizeContainer.classList.add("page-size"); + + // Configure pagination buttons prevPage.type = "button"; nextPage.type = "button"; prevPage.textContent = "Prev"; nextPage.textContent = "Next"; - /** Updates the button states and page label based on the model. */ + // Configure page size selector + pageSizeLabel.textContent = "Page Size"; + for (const size of [10, 25, 50, 100]) { + const option = document.createElement("option"); + option.value = size; + option.textContent = size; + if (size === model.get(ModelProperty.PAGE_SIZE)) { + option.selected = true; + } + pageSizeSelect.appendChild(option); + } + + /** Updates the footer states and page label based on the model. */ function updateButtonStates() { - const totalPages = Math.ceil( - model.get(ModelProperty.ROW_COUNT) / model.get(ModelProperty.PAGE_SIZE), - ); + const rowCount = model.get(ModelProperty.ROW_COUNT); + const pageSize = model.get(ModelProperty.PAGE_SIZE); const currentPage = model.get(ModelProperty.PAGE); + const totalPages = Math.ceil(rowCount / pageSize); - label.textContent = `Page ${currentPage + 1} of ${totalPages}`; + rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`; + paginationLabel.textContent = `Page ${currentPage + 1} of ${totalPages || 1}`; prevPage.disabled = currentPage === 0; nextPage.disabled = currentPage >= totalPages - 1; + pageSizeSelect.value = pageSize; } /** - * Updates the page in the model. - * @param {number} direction -1 for previous, 1 for next. + * Increments or decrements the page in the model. + * @param {number} direction - `1` for next, `-1` for previous. */ function handlePageChange(direction) { - const currentPage = model.get(ModelProperty.PAGE); - const newPage = Math.max(0, currentPage + direction); - if (newPage !== currentPage) { - model.set(ModelProperty.PAGE, newPage); + const current = model.get(ModelProperty.PAGE); + const next = current + direction; + model.set(ModelProperty.PAGE, next); + model.save_changes(); + } + + /** + * Handles changes to the page size from the dropdown. + * @param {number} size - The new page size. + */ + function handlePageSizeChange(size) { + const currentSize = model.get(ModelProperty.PAGE_SIZE); + if (size !== currentSize) { + model.set(ModelProperty.PAGE_SIZE, size); model.save_changes(); } } + /** Updates the HTML in the table container and refreshes button states. */ + function handleTableHTMLChange() { + // Note: Using innerHTML is safe here because the content is generated + // by a trusted backend (DataFrame.to_html). + tableContainer.innerHTML = model.get(ModelProperty.TABLE_HTML); + updateButtonStates(); + } + + // Add event listeners prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1)); nextPage.addEventListener(Event.CLICK, () => handlePageChange(1)); - - model.on(Event.CHANGE_TABLE_HTML, () => { - // Note: Using innerHTML can be a security risk if the content is - // user-generated. Ensure 'table_html' is properly sanitized. - container.innerHTML = model.get(ModelProperty.TABLE_HTML); - updateButtonStates(); + pageSizeSelect.addEventListener(Event.CHANGE, (e) => { + const newSize = Number(e.target.value); + if (newSize) { + handlePageSizeChange(newSize); + } }); + model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange); + + // Assemble the DOM + paginationContainer.appendChild(prevPage); + paginationContainer.appendChild(paginationLabel); + paginationContainer.appendChild(nextPage); - // Initial setup - updateButtonStates(); + pageSizeContainer.appendChild(pageSizeLabel); + pageSizeContainer.appendChild(pageSizeSelect); + + footer.appendChild(rowCountLabel); + footer.appendChild(paginationContainer); + footer.appendChild(pageSizeContainer); + + container.appendChild(tableContainer); + container.appendChild(footer); - buttonContainer.appendChild(prevPage); - buttonContainer.appendChild(label); - buttonContainer.appendChild(nextPage); el.appendChild(container); - el.appendChild(buttonContainer); + + // Initial render + handleTableHTMLChange(); } export default { render }; diff --git a/bigframes/pandas/io/api.py b/bigframes/pandas/io/api.py index 65435bd902..5ec3626c7a 100644 --- a/bigframes/pandas/io/api.py +++ b/bigframes/pandas/io/api.py @@ -345,6 +345,8 @@ def _read_gbq_colab( dry_run=True, ) _set_default_session_location_if_possible_deferred_query(create_query) + if not config.options.bigquery._session_started: + config.options.bigquery.enable_polars_execution = True return global_session.with_default_session( bigframes.session.Session._read_gbq_colab, diff --git a/bigframes/session/polars_executor.py b/bigframes/session/polars_executor.py index 9b2346a7ed..2c04a0016b 100644 --- a/bigframes/session/polars_executor.py +++ b/bigframes/session/polars_executor.py @@ -40,6 +40,7 @@ nodes.ConcatNode, nodes.JoinNode, nodes.InNode, + nodes.PromoteOffsetsNode, ) _COMPATIBLE_SCALAR_OPS = ( diff --git a/bigframes/version.py b/bigframes/version.py index 4eec2e8af7..71fc4e35e0 100644 --- a/bigframes/version.py +++ b/bigframes/version.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.12.0" +__version__ = "2.13.0" # {x-release-please-start-date} -__release_date__ = "2025-07-23" +__release_date__ = "2025-07-25" # {x-release-please-end} diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 072e5c6504..f6380a9fd4 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -75,7 +75,7 @@ { "data": { "text/html": [ - "Query job 0b22b0f5-b952-4546-a969-41a89e343e9b is DONE. 0 Bytes processed. Open Job" + "Query job c5fcfd5e-1617-49c8-afa3-86ca21019de4 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -141,7 +141,7 @@ { "data": { "text/html": [ - "Query job 8e57da45-b6a7-44fb-8c4f-4b87058d94cb is DONE. 171.4 MB processed. Open Job" + "Query job ab900a53-5011-4e80-85d5-0ef2958598db is DONE. 171.4 MB processed. Open Job" ], "text/plain": [ "" @@ -153,7 +153,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4d00aaf284984cbc97483c651b9c5110", + "model_id": "bda63ba739dc4d5f83a5e18eb27b2686", "version_major": 2, "version_minor": 1 }, @@ -204,7 +204,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d4af4cf7d24d4f1c8e9c9b5f237df32b", + "model_id": "9bffeb73549e48419c3dd895edfe30e8", "version_major": 2, "version_minor": 1 }, @@ -290,7 +290,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0f04ad3c464145ee9735eba09f5107a9", + "model_id": "dfd4fa3a1c6f4b3eb1877cb0e9ba7e94", "version_major": 2, "version_minor": 1 }, diff --git a/owlbot.py b/owlbot.py index 5dc57a35b8..b9145d4367 100644 --- a/owlbot.py +++ b/owlbot.py @@ -107,6 +107,13 @@ "recursive-include bigframes *.json *.proto *.js py.typed", ) +# Include JavaScript and CSS files for display widgets +assert 1 == s.replace( # MANIFEST.in + ["MANIFEST.in"], + re.escape("recursive-include bigframes *.json *.proto *.js py.typed"), + "recursive-include bigframes *.json *.proto *.js *.css py.typed", +) + # Fixup the documentation. assert 1 == s.replace( # docs/conf.py ["docs/conf.py"], diff --git a/samples/dbt/.dbt.yml b/samples/dbt/.dbt.yml index 98053bfc37..a2fd2ffd4c 100644 --- a/samples/dbt/.dbt.yml +++ b/samples/dbt/.dbt.yml @@ -1,3 +1,17 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + dbt_sample_project: outputs: dev: # The target environment name (e.g., dev, prod) diff --git a/samples/dbt/dbt_sample_project/dbt_project.yml b/samples/dbt/dbt_sample_project/dbt_project.yml index d12098a18a..aef376e1fc 100644 --- a/samples/dbt/dbt_sample_project/dbt_project.yml +++ b/samples/dbt/dbt_sample_project/dbt_project.yml @@ -1,3 +1,16 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # Name your project! Project names should contain only lowercase characters # and underscores. A good package name should reflect your organization's diff --git a/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py b/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py index 4c8ddf8f6c..e397549afe 100644 --- a/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py +++ b/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py @@ -1,3 +1,17 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # This example demonstrates one of the most general usages of transforming raw # BigQuery data into a processed table using a dbt Python model with BigFrames. # See more from: https://cloud.google.com/bigquery/docs/dataframes-dbt. @@ -32,7 +46,13 @@ def model(dbt, session): table = "bigquery-public-data.epa_historical_air_quality.temperature_hourly_summary" # Define the specific columns to select from the BigQuery table. - columns = ["state_name", "county_name", "date_local", "time_local", "sample_measurement"] + columns = [ + "state_name", + "county_name", + "date_local", + "time_local", + "sample_measurement", + ] # Read data from the specified BigQuery table into a BigFrames DataFrame. df = session.read_gbq(table, columns=columns) @@ -44,14 +64,16 @@ def model(dbt, session): # Group the DataFrame by 'state_name', 'county_name', and 'date_local'. For # each group, calculate the minimum and maximum of the 'sample_measurement' # column. The result will be a BigFrames DataFrame with a MultiIndex. - result = df.groupby(["state_name", "county_name", "date_local"])["sample_measurement"]\ - .agg(["min", "max"]) + result = df.groupby(["state_name", "county_name", "date_local"])[ + "sample_measurement" + ].agg(["min", "max"]) # Rename some columns and convert the MultiIndex of the 'result' DataFrame # into regular columns. This flattens the DataFrame so 'state_name', # 'county_name', and 'date_local' become regular columns again. - result = result.rename(columns={'min': 'min_temperature', 'max': 'max_temperature'})\ - .reset_index() + result = result.rename( + columns={"min": "min_temperature", "max": "max_temperature"} + ).reset_index() # Return the processed BigFrames DataFrame. # In a dbt Python model, this DataFrame will be materialized as a table diff --git a/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py b/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py index 019e503393..3795d0eee9 100644 --- a/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py +++ b/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py @@ -1,6 +1,20 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # This example demonstrates how to build an **incremental dbt Python model** # using BigFrames. -# +# # Incremental models are essential for efficiently processing large datasets by # only transforming new or changed data, rather than reprocessing the entire # dataset every time. If the target table already exists, dbt will perform a @@ -13,8 +27,6 @@ # directly within BigQuery, leveraging BigQuery's scalability. -import bigframes.pandas as bpd - def model(dbt, session): # Optional: override settings from dbt_project.yml. # When both are set, dbt.config takes precedence over dbt_project.yml. @@ -24,9 +36,9 @@ def model(dbt, session): submission_method="bigframes", # Materialize this model as an 'incremental' table. This tells dbt to # only process new or updated data on subsequent runs. - materialized='incremental', + materialized="incremental", # Use MERGE strategy to update rows during incremental runs. - incremental_strategy='merge', + incremental_strategy="merge", # Define the composite key that uniquely identifies a row in the # target table. This key is used by the 'merge' strategy to match # existing rows for updates during incremental runs. @@ -41,7 +53,7 @@ def model(dbt, session): # Define a BigFrames UDF to generate a temperature description. # BigFrames UDFs allow you to define custom Python logic that executes # directly within BigQuery. This is powerful for complex transformations. - @bpd.udf(dataset='dbt_sample_dataset', name='describe_udf') + @session.udf(dataset="dbt_sample_dataset", name="describe_udf") def describe( max_temperature: float, min_temperature: float, diff --git a/setup.py b/setup.py index 63d019caa0..bc42cc4281 100644 --- a/setup.py +++ b/setup.py @@ -44,9 +44,9 @@ "google-cloud-bigquery-storage >= 2.30.0, < 3.0.0", "google-cloud-functions >=1.12.0", "google-cloud-bigquery-connection >=1.12.0", - "google-cloud-iam >=2.12.1", "google-cloud-resource-manager >=1.10.3", "google-cloud-storage >=2.0.0", + "grpc-google-iam-v1 >= 0.14.2", "numpy >=1.24.0", "pandas >=1.5.3", "pandas-gbq >=0.26.1", diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 155d4388a4..8df3a3a2c3 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -12,6 +12,7 @@ google-cloud-bigquery-connection==1.12.0 google-cloud-iam==2.12.1 google-cloud-resource-manager==1.10.3 google-cloud-storage==2.0.0 +grpc-google-iam-v1==0.14.2 numpy==1.24.0 pandas==1.5.3 pandas-gbq==0.26.1 diff --git a/tests/benchmark/db_benchmark/groupby/q1.py b/tests/benchmark/db_benchmark/groupby/q1.py index dc86817908..0051ed5b59 100644 --- a/tests/benchmark/db_benchmark/groupby/q1.py +++ b/tests/benchmark/db_benchmark/groupby/q1.py @@ -18,21 +18,15 @@ import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q1, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/groupby/q10.py b/tests/benchmark/db_benchmark/groupby/q10.py index 99d28e2f9a..08ca9a7fe4 100644 --- a/tests/benchmark/db_benchmark/groupby/q10.py +++ b/tests/benchmark/db_benchmark/groupby/q10.py @@ -18,21 +18,15 @@ import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q10, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/groupby/q2.py b/tests/benchmark/db_benchmark/groupby/q2.py index b06a4189fe..5b3b683931 100644 --- a/tests/benchmark/db_benchmark/groupby/q2.py +++ b/tests/benchmark/db_benchmark/groupby/q2.py @@ -18,21 +18,15 @@ import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q2, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/groupby/q3.py b/tests/benchmark/db_benchmark/groupby/q3.py index d66dd7b39d..97d005fbf4 100644 --- a/tests/benchmark/db_benchmark/groupby/q3.py +++ b/tests/benchmark/db_benchmark/groupby/q3.py @@ -18,21 +18,15 @@ import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q3, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/groupby/q4.py b/tests/benchmark/db_benchmark/groupby/q4.py index 6c72069a53..709b2107d2 100644 --- a/tests/benchmark/db_benchmark/groupby/q4.py +++ b/tests/benchmark/db_benchmark/groupby/q4.py @@ -18,21 +18,15 @@ import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q4, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/groupby/q5.py b/tests/benchmark/db_benchmark/groupby/q5.py index 3e6db9783e..3d870b0598 100644 --- a/tests/benchmark/db_benchmark/groupby/q5.py +++ b/tests/benchmark/db_benchmark/groupby/q5.py @@ -18,21 +18,15 @@ import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q5, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/groupby/q6.py b/tests/benchmark/db_benchmark/groupby/q6.py index f763280b5b..bceb5599b2 100644 --- a/tests/benchmark/db_benchmark/groupby/q6.py +++ b/tests/benchmark/db_benchmark/groupby/q6.py @@ -18,21 +18,15 @@ import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q6, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/groupby/q7.py b/tests/benchmark/db_benchmark/groupby/q7.py index 4e7f2d58b6..600e26bf16 100644 --- a/tests/benchmark/db_benchmark/groupby/q7.py +++ b/tests/benchmark/db_benchmark/groupby/q7.py @@ -18,21 +18,15 @@ import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q7, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/groupby/q8.py b/tests/benchmark/db_benchmark/groupby/q8.py index 75d5dcaa0c..82082bc7e5 100644 --- a/tests/benchmark/db_benchmark/groupby/q8.py +++ b/tests/benchmark/db_benchmark/groupby/q8.py @@ -18,21 +18,15 @@ import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q8, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/join/q1.py b/tests/benchmark/db_benchmark/join/q1.py index 4ca0ee3389..e9e3c2fad0 100644 --- a/tests/benchmark/db_benchmark/join/q1.py +++ b/tests/benchmark/db_benchmark/join/q1.py @@ -18,22 +18,16 @@ import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_join_queries.q1, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/join/q2.py b/tests/benchmark/db_benchmark/join/q2.py index 19efd6fbf2..f4b9f67def 100644 --- a/tests/benchmark/db_benchmark/join/q2.py +++ b/tests/benchmark/db_benchmark/join/q2.py @@ -18,22 +18,16 @@ import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_join_queries.q2, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/join/q3.py b/tests/benchmark/db_benchmark/join/q3.py index d0a931bfb2..83be831a46 100644 --- a/tests/benchmark/db_benchmark/join/q3.py +++ b/tests/benchmark/db_benchmark/join/q3.py @@ -18,22 +18,16 @@ import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_join_queries.q3, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/join/q4.py b/tests/benchmark/db_benchmark/join/q4.py index ebd7c461d0..6399683472 100644 --- a/tests/benchmark/db_benchmark/join/q4.py +++ b/tests/benchmark/db_benchmark/join/q4.py @@ -18,22 +18,16 @@ import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_join_queries.q4, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/join/q5.py b/tests/benchmark/db_benchmark/join/q5.py index 7114acd408..b0b26f9365 100644 --- a/tests/benchmark/db_benchmark/join/q5.py +++ b/tests/benchmark/db_benchmark/join/q5.py @@ -18,22 +18,16 @@ import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_join_queries.q5, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/db_benchmark/sort/q1.py b/tests/benchmark/db_benchmark/sort/q1.py index 5f6c404443..d73fe28e30 100644 --- a/tests/benchmark/db_benchmark/sort/q1.py +++ b/tests/benchmark/db_benchmark/sort/q1.py @@ -18,21 +18,15 @@ import bigframes_vendored.db_benchmark.sort_queries as vendored_dbbenchmark_sort_queries if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( vendored_dbbenchmark_sort_queries.q1, current_path, - suffix, - project_id, - dataset_id, - table_id, - session, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.table_id, + config.session, ) diff --git a/tests/benchmark/read_gbq_colab/aggregate_output.py b/tests/benchmark/read_gbq_colab/aggregate_output.py index dda4bf95a4..6acf84d5bc 100644 --- a/tests/benchmark/read_gbq_colab/aggregate_output.py +++ b/tests/benchmark/read_gbq_colab/aggregate_output.py @@ -52,21 +52,15 @@ def aggregate_output( if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( aggregate_output, current_path, - suffix, - project_id=project_id, - dataset_id=dataset_id, - table_id=table_id, - session=session, + config.benchmark_suffix, + project_id=config.project_id, + dataset_id=config.dataset_id, + table_id=config.table_id, + session=config.session, ) diff --git a/tests/benchmark/read_gbq_colab/dry_run.py b/tests/benchmark/read_gbq_colab/dry_run.py index c2de1b7cc4..0f05a2c0b4 100644 --- a/tests/benchmark/read_gbq_colab/dry_run.py +++ b/tests/benchmark/read_gbq_colab/dry_run.py @@ -28,21 +28,15 @@ def dry_run(*, project_id, dataset_id, table_id, session: bigframes.session.Sess if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( dry_run, current_path, - suffix, - project_id=project_id, - dataset_id=dataset_id, - table_id=table_id, - session=session, + config.benchmark_suffix, + project_id=config.project_id, + dataset_id=config.dataset_id, + table_id=config.table_id, + session=config.session, ) diff --git a/tests/benchmark/read_gbq_colab/filter_output.py b/tests/benchmark/read_gbq_colab/filter_output.py index 0db7ac5fd6..d35cc6d5f7 100644 --- a/tests/benchmark/read_gbq_colab/filter_output.py +++ b/tests/benchmark/read_gbq_colab/filter_output.py @@ -44,21 +44,15 @@ def filter_output( if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( filter_output, current_path, - suffix, - project_id=project_id, - dataset_id=dataset_id, - table_id=table_id, - session=session, + config.benchmark_suffix, + project_id=config.project_id, + dataset_id=config.dataset_id, + table_id=config.table_id, + session=config.session, ) diff --git a/tests/benchmark/read_gbq_colab/first_page.py b/tests/benchmark/read_gbq_colab/first_page.py index 2df9990d22..eba60297e4 100644 --- a/tests/benchmark/read_gbq_colab/first_page.py +++ b/tests/benchmark/read_gbq_colab/first_page.py @@ -33,21 +33,15 @@ def first_page(*, project_id, dataset_id, table_id, session: bigframes.session.S if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( first_page, current_path, - suffix, - project_id=project_id, - dataset_id=dataset_id, - table_id=table_id, - session=session, + config.benchmark_suffix, + project_id=config.project_id, + dataset_id=config.dataset_id, + table_id=config.table_id, + session=config.session, ) diff --git a/tests/benchmark/read_gbq_colab/last_page.py b/tests/benchmark/read_gbq_colab/last_page.py index ad785a29e8..d973c84bce 100644 --- a/tests/benchmark/read_gbq_colab/last_page.py +++ b/tests/benchmark/read_gbq_colab/last_page.py @@ -34,21 +34,15 @@ def last_page(*, project_id, dataset_id, table_id, session: bigframes.session.Se if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( last_page, current_path, - suffix, - project_id=project_id, - dataset_id=dataset_id, - table_id=table_id, - session=session, + config.benchmark_suffix, + project_id=config.project_id, + dataset_id=config.dataset_id, + table_id=config.table_id, + session=config.session, ) diff --git a/tests/benchmark/read_gbq_colab/sort_output.py b/tests/benchmark/read_gbq_colab/sort_output.py index 997de5683d..7e1db368c5 100644 --- a/tests/benchmark/read_gbq_colab/sort_output.py +++ b/tests/benchmark/read_gbq_colab/sort_output.py @@ -44,21 +44,15 @@ def sort_output( if __name__ == "__main__": - ( - project_id, - dataset_id, - table_id, - session, - suffix, - ) = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( sort_output, current_path, - suffix, - project_id=project_id, - dataset_id=dataset_id, - table_id=table_id, - session=session, + config.benchmark_suffix, + project_id=config.project_id, + dataset_id=config.dataset_id, + table_id=config.table_id, + session=config.session, ) diff --git a/tests/benchmark/tpch/q1.py b/tests/benchmark/tpch/q1.py index a672103931..beacaa436b 100644 --- a/tests/benchmark/tpch/q1.py +++ b/tests/benchmark/tpch/q1.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q1 as vendored_tpch_q1 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q1.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q1.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q10.py b/tests/benchmark/tpch/q10.py index d468a90156..27262ff210 100644 --- a/tests/benchmark/tpch/q10.py +++ b/tests/benchmark/tpch/q10.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q10 as vendored_tpch_q10 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q10.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q10.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q11.py b/tests/benchmark/tpch/q11.py index dbf3fd94de..45a0168bb1 100644 --- a/tests/benchmark/tpch/q11.py +++ b/tests/benchmark/tpch/q11.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q11 as vendored_tpch_q11 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q11.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q11.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q12.py b/tests/benchmark/tpch/q12.py index 57774457ae..d055cd1c0b 100644 --- a/tests/benchmark/tpch/q12.py +++ b/tests/benchmark/tpch/q12.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q12 as vendored_tpch_q12 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q12.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q12.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q13.py b/tests/benchmark/tpch/q13.py index a7f2780e4b..f74ef26448 100644 --- a/tests/benchmark/tpch/q13.py +++ b/tests/benchmark/tpch/q13.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q13 as vendored_tpch_q13 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q13.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q13.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q14.py b/tests/benchmark/tpch/q14.py index e9599f3bd8..01ee0add39 100644 --- a/tests/benchmark/tpch/q14.py +++ b/tests/benchmark/tpch/q14.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q14 as vendored_tpch_q14 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q14.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q14.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q15.py b/tests/benchmark/tpch/q15.py index ff200384a8..b19141797a 100644 --- a/tests/benchmark/tpch/q15.py +++ b/tests/benchmark/tpch/q15.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q15 as vendored_tpch_q15 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q15.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q15.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q16.py b/tests/benchmark/tpch/q16.py index 69fc1b9523..5947bb6ed1 100644 --- a/tests/benchmark/tpch/q16.py +++ b/tests/benchmark/tpch/q16.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q16 as vendored_tpch_q16 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q16.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q16.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q17.py b/tests/benchmark/tpch/q17.py index 14707f4a93..e80f7b23f9 100644 --- a/tests/benchmark/tpch/q17.py +++ b/tests/benchmark/tpch/q17.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q17 as vendored_tpch_q17 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q17.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q17.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q18.py b/tests/benchmark/tpch/q18.py index 54cf0d0432..7e9d6c00c4 100644 --- a/tests/benchmark/tpch/q18.py +++ b/tests/benchmark/tpch/q18.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q18 as vendored_tpch_q18 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q18.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q18.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q19.py b/tests/benchmark/tpch/q19.py index 1ec44391ff..f2c1cfc623 100644 --- a/tests/benchmark/tpch/q19.py +++ b/tests/benchmark/tpch/q19.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q19 as vendored_tpch_q19 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q19.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q19.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q2.py b/tests/benchmark/tpch/q2.py index da8064b400..64907d0d25 100644 --- a/tests/benchmark/tpch/q2.py +++ b/tests/benchmark/tpch/q2.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q2 as vendored_tpch_q2 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q2.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q2.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q20.py b/tests/benchmark/tpch/q20.py index 33e4f72ef6..8a405280ef 100644 --- a/tests/benchmark/tpch/q20.py +++ b/tests/benchmark/tpch/q20.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q20 as vendored_tpch_q20 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q20.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q20.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q21.py b/tests/benchmark/tpch/q21.py index f73f87725f..29b364b387 100644 --- a/tests/benchmark/tpch/q21.py +++ b/tests/benchmark/tpch/q21.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q21 as vendored_tpch_q21 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q21.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q21.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q22.py b/tests/benchmark/tpch/q22.py index 0a6f6d923c..9147115097 100644 --- a/tests/benchmark/tpch/q22.py +++ b/tests/benchmark/tpch/q22.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q22 as vendored_tpch_q22 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q22.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q22.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q3.py b/tests/benchmark/tpch/q3.py index 92322eea21..e4eee0630b 100644 --- a/tests/benchmark/tpch/q3.py +++ b/tests/benchmark/tpch/q3.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q3 as vendored_tpch_q3 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q3.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q3.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q4.py b/tests/benchmark/tpch/q4.py index 2d6931d6b1..f0aa3b77a0 100644 --- a/tests/benchmark/tpch/q4.py +++ b/tests/benchmark/tpch/q4.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q4 as vendored_tpch_q4 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q4.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q4.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q5.py b/tests/benchmark/tpch/q5.py index e8fd83e193..5f82638278 100644 --- a/tests/benchmark/tpch/q5.py +++ b/tests/benchmark/tpch/q5.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q5 as vendored_tpch_q5 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q5.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q5.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q6.py b/tests/benchmark/tpch/q6.py index 152d6c663e..bf06f8d31c 100644 --- a/tests/benchmark/tpch/q6.py +++ b/tests/benchmark/tpch/q6.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q6 as vendored_tpch_q6 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q6.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q6.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q7.py b/tests/benchmark/tpch/q7.py index 1c3e455e1c..f9575dd4d6 100644 --- a/tests/benchmark/tpch/q7.py +++ b/tests/benchmark/tpch/q7.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q7 as vendored_tpch_q7 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q7.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q7.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q8.py b/tests/benchmark/tpch/q8.py index 8d23194834..0af13eaeeb 100644 --- a/tests/benchmark/tpch/q8.py +++ b/tests/benchmark/tpch/q8.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q8 as vendored_tpch_q8 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q8.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q8.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/tpch/q9.py b/tests/benchmark/tpch/q9.py index 329e315c2c..61a319377a 100644 --- a/tests/benchmark/tpch/q9.py +++ b/tests/benchmark/tpch/q9.py @@ -17,9 +17,14 @@ import bigframes_vendored.tpch.queries.q9 as vendored_tpch_q9 if __name__ == "__main__": - project_id, dataset_id, session, suffix = utils.get_configuration() + config = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q9.q, current_path, suffix, project_id, dataset_id, session + vendored_tpch_q9.q, + current_path, + config.benchmark_suffix, + config.project_id, + config.dataset_id, + config.session, ) diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py index 48357ddde7..5dfd8d74bd 100644 --- a/tests/benchmark/utils.py +++ b/tests/benchmark/utils.py @@ -13,6 +13,7 @@ # limitations under the License. import argparse +import dataclasses import time import bigframes @@ -20,7 +21,16 @@ READ_GBQ_COLAB_PAGE_SIZE = 100 -def get_configuration(include_table_id=False): +@dataclasses.dataclass(frozen=True) +class BenchmarkConfig: + project_id: str + dataset_id: str + session: bigframes.Session + benchmark_suffix: str | None + table_id: str | None = None + + +def get_configuration(include_table_id=False) -> BenchmarkConfig: parser = argparse.ArgumentParser() parser.add_argument( "--project_id", @@ -57,21 +67,13 @@ def get_configuration(include_table_id=False): args = parser.parse_args() session = _initialize_session(_str_to_bool(args.ordered)) - if include_table_id: - return ( - args.project_id, - args.dataset_id, - args.table_id, - session, - args.benchmark_suffix, - ) - else: - return ( - args.project_id, - args.dataset_id, - session, - args.benchmark_suffix, - ) + return BenchmarkConfig( + project_id=args.project_id, + dataset_id=args.dataset_id, + table_id=args.table_id if include_table_id else None, + session=session, + benchmark_suffix=args.benchmark_suffix, + ) def get_execution_time(func, current_path, suffix, *args, **kwargs): diff --git a/tests/system/small/engines/test_windowing.py b/tests/system/small/engines/test_windowing.py new file mode 100644 index 0000000000..f4c2b61e6f --- /dev/null +++ b/tests/system/small/engines/test_windowing.py @@ -0,0 +1,33 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes.core import array_value +from bigframes.session import polars_executor +from bigframes.testing.engine_utils import assert_equivalence_execution + +pytest.importorskip("polars") + +# Polars used as reference as its fast and local. Generally though, prefer gbq engine where they disagree. +REFERENCE_ENGINE = polars_executor.PolarsExecutor() + + +@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True) +def test_engines_with_offsets( + scalars_array_value: array_value.ArrayValue, + engine, +): + result, _ = scalars_array_value.promote_offsets() + assert_equivalence_execution(result.node, REFERENCE_ENGINE, engine) diff --git a/tests/system/small/session/test_read_gbq_colab.py b/tests/system/small/session/test_read_gbq_colab.py index af78117262..9ace2dbed7 100644 --- a/tests/system/small/session/test_read_gbq_colab.py +++ b/tests/system/small/session/test_read_gbq_colab.py @@ -19,6 +19,11 @@ import pandas.testing import pytest +import bigframes +import bigframes.pandas + +pytest.importorskip("polars") + def test_read_gbq_colab_to_pandas_batches_preserves_order_by(maybe_ordered_session): # This query should return enough results to be too big to fit in a single @@ -59,6 +64,31 @@ def test_read_gbq_colab_to_pandas_batches_preserves_order_by(maybe_ordered_sessi assert executions_after == executions_before_python == executions_before_sql + 1 +def test_read_gbq_colab_fresh_session_is_hybrid(): + bigframes.close_session() + df = bigframes.pandas._read_gbq_colab( + """ + SELECT + name, + SUM(number) AS total + FROM + `bigquery-public-data.usa_names.usa_1910_2013` + WHERE state LIKE 'W%' + GROUP BY name + ORDER BY total DESC + LIMIT 300 + """ + ) + session = df._session + executions_before_python = session._metrics.execution_count + result = df.sort_values("name").peek(100) + executions_after = session._metrics.execution_count + + assert len(result) == 100 + assert session._executor._enable_polars_execution is True # type: ignore + assert executions_after == executions_before_python == 1 + + def test_read_gbq_colab_peek_avoids_requery(maybe_ordered_session): executions_before_sql = maybe_ordered_session._metrics.execution_count df = maybe_ordered_session._read_gbq_colab( diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index b6dfb22934..8a91176dd9 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -167,35 +167,31 @@ def test_widget_display_should_show_first_page_on_load( _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) -def test_widget_navigation_should_display_second_page( - table_widget, paginated_pandas_df: pd.DataFrame -): - """ - Given a widget, when the page is set to 1, then it should display - the second page of data. - """ - expected_slice = paginated_pandas_df.iloc[2:4] - - table_widget.page = 1 - html = table_widget.table_html - - assert table_widget.page == 1 - _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) - - -def test_widget_navigation_should_display_last_page( - table_widget, paginated_pandas_df: pd.DataFrame +@pytest.mark.parametrize( + "page_number, start_row, end_row", + [ + (1, 2, 4), # Second page + (2, 4, 6), # Last page + ], + ids=["second_page", "last_page"], +) +def test_widget_navigation_should_display_correct_page( + table_widget, + paginated_pandas_df: pd.DataFrame, + page_number: int, + start_row: int, + end_row: int, ): """ - Given a widget, when the page is set to the last page (2), - then it should display the final page of data. + Given a widget, when the page is set, then it should display the correct + slice of data. """ - expected_slice = paginated_pandas_df.iloc[4:6] + expected_slice = paginated_pandas_df.iloc[start_row:end_row] - table_widget.page = 2 + table_widget.page = page_number html = table_widget.table_html - assert table_widget.page == 2 + assert table_widget.page == page_number _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) @@ -338,6 +334,108 @@ def test_empty_widget_should_render_table_headers(empty_bf_df: bf.dataframe.Data assert "id" in html +def test_page_size_change_should_reset_current_page_to_zero(table_widget): + """ + Given a widget on a non-default page, When the page_size is changed, + Then the current page attribute should reset to 0. + """ + # Start on page 1 with an initial page size of 2. + table_widget.page = 1 + assert table_widget.page == 1 + + # Change the page size. + table_widget.page_size = 3 + + # The page number is reset to 0. + assert table_widget.page == 0 + + +def test_page_size_change_should_render_html_with_new_size( + table_widget, paginated_pandas_df: pd.DataFrame +): + """ + Given a widget, when the page_size is changed, + the rendered HTML should immediately reflect the new page size. + """ + # The widget is in its initial state with page_size=2. + # We expect the first 3 rows after the change. + expected_slice = paginated_pandas_df.iloc[0:3] + + # Change the page size. + table_widget.page_size = 3 + + # The HTML now contains the first 3 rows. + html = table_widget.table_html + _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) + + +def test_navigation_after_page_size_change_should_use_new_size( + table_widget, paginated_pandas_df: pd.DataFrame +): + """ + Given a widget whose page size has been changed, When we navigate to the + next page, Then the pagination should use the new page size. + """ + # Change the page size to 3. + table_widget.page_size = 3 + # We expect the second page to contain rows 4-6 (indices 3-6). + expected_slice = paginated_pandas_df.iloc[3:6] + + # Navigate to the next page. + table_widget.page = 1 + + # The second page's HTML correctly reflects the new page size. + html = table_widget.table_html + _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) + + +@pytest.mark.parametrize("invalid_size", [0, -5], ids=["zero", "negative"]) +def test_setting_invalid_page_size_should_be_ignored(table_widget, invalid_size: int): + """When the page size is set to an invalid number (<=0), the change should + be ignored.""" + # Set the initial page to 2. + initial_size = table_widget.page_size + assert initial_size == 2 + + # Attempt to set the page size to a invlaid size. + table_widget.page_size = invalid_size + + # The page size remains unchanged. + assert table_widget.page_size == initial_size + + +def test_setting_page_size_above_max_should_be_clamped(table_widget): + """ + Given a widget, when the page size is set to a value greater than the + allowed maximum, the page size should be clamped to the maximum value. + """ + # The maximum is hardcoded to 1000 in the implementation. + expected_clamped_size = 1000 + + # Attempt to set a very large page size. + table_widget.page_size = 9001 + + # The page size is clamped to the maximum. + assert table_widget.page_size == expected_clamped_size + + +def test_widget_creation_should_load_css_for_rendering(table_widget): + """ + Given a TableWidget is created, when its resources are accessed, + it should contain the CSS content required for styling. + """ + # The table_widget fixture creates the widget. + # No additional setup is needed. + + # Access the CSS content. + css_content = table_widget._css + + # The content is a non-empty string containing a known selector. + assert isinstance(css_content, str) + assert len(css_content) > 0 + assert ".bigframes-widget .footer" in css_content + + # TODO(shuowei): Add tests for custom index and multiindex # This may not be necessary for the SQL Cell use case but should be # considered for completeness. diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py index c8da85dca1..a82bdf7635 100644 --- a/tests/system/small/test_index.py +++ b/tests/system/small/test_index.py @@ -32,6 +32,110 @@ def test_index_construct_from_list(): pd.testing.assert_index_equal(bf_result, pd_result) +@pytest.mark.parametrize("key, expected_loc", [("a", 0), ("b", 1), ("c", 2)]) +def test_get_loc_should_return_int_for_unique_index(key, expected_loc): + """Behavior: get_loc on a unique index returns an integer position.""" + # The pandas result is used as the known-correct value. + # We assert our implementation matches it and the expected type. + bf_index = bpd.Index(["a", "b", "c"]) + + result = bf_index.get_loc(key) + + assert result == expected_loc + assert isinstance(result, int) + + +def test_get_loc_should_return_slice_for_monotonic_duplicates(): + """Behavior: get_loc on a monotonic string index with duplicates returns a slice.""" + bf_index = bpd.Index(["a", "b", "b", "c"]) + pd_index = pd.Index(["a", "b", "b", "c"]) + + bf_result = bf_index.get_loc("b") + pd_result = pd_index.get_loc("b") + + assert isinstance(bf_result, slice) + assert bf_result == pd_result # Should be slice(1, 3, None) + + +def test_get_loc_should_return_slice_for_monotonic_numeric_duplicates(): + """Behavior: get_loc on a monotonic numeric index with duplicates returns a slice.""" + bf_index = bpd.Index([1, 2, 2, 3]) + pd_index = pd.Index([1, 2, 2, 3]) + + bf_result = bf_index.get_loc(2) + pd_result = pd_index.get_loc(2) + + assert isinstance(bf_result, slice) + assert bf_result == pd_result # Should be slice(1, 3, None) + + +def test_get_loc_should_return_mask_for_non_monotonic_duplicates(): + """Behavior: get_loc on a non-monotonic string index returns a boolean array.""" + bf_index = bpd.Index(["a", "b", "c", "b"]) + pd_index = pd.Index(["a", "b", "c", "b"]) + + pd_result = pd_index.get_loc("b") + bf_result = bf_index.get_loc("b") + + assert not isinstance(bf_result, (int, slice)) + + if hasattr(bf_result, "to_numpy"): + bf_array = bf_result.to_numpy() + else: + bf_array = bf_result.to_pandas().to_numpy() + numpy.testing.assert_array_equal(bf_array, pd_result) + + +def test_get_loc_should_return_mask_for_non_monotonic_numeric_duplicates(): + """Behavior: get_loc on a non-monotonic numeric index returns a boolean array.""" + bf_index = bpd.Index([1, 2, 3, 2]) + pd_index = pd.Index([1, 2, 3, 2]) + + pd_result = pd_index.get_loc(2) + bf_result = bf_index.get_loc(2) + + assert not isinstance(bf_result, (int, slice)) + + if hasattr(bf_result, "to_numpy"): + bf_array = bf_result.to_numpy() + else: + bf_array = bf_result.to_pandas().to_numpy() + numpy.testing.assert_array_equal(bf_array, pd_result) + + +def test_get_loc_should_raise_error_for_missing_key(): + """Behavior: get_loc raises KeyError when a string key is not found.""" + bf_index = bpd.Index(["a", "b", "c"]) + + with pytest.raises(KeyError): + bf_index.get_loc("d") + + +def test_get_loc_should_raise_error_for_missing_numeric_key(): + """Behavior: get_loc raises KeyError when a numeric key is not found.""" + bf_index = bpd.Index([1, 2, 3]) + + with pytest.raises(KeyError): + bf_index.get_loc(4) + + +def test_get_loc_should_work_for_single_element_index(): + """Behavior: get_loc on a single-element index returns 0.""" + assert bpd.Index(["a"]).get_loc("a") == pd.Index(["a"]).get_loc("a") + + +def test_get_loc_should_return_slice_when_all_elements_are_duplicates(): + """Behavior: get_loc returns a full slice if all elements match the key.""" + bf_index = bpd.Index(["a", "a", "a"]) + pd_index = pd.Index(["a", "a", "a"]) + + bf_result = bf_index.get_loc("a") + pd_result = pd_index.get_loc("a") + + assert isinstance(bf_result, slice) + assert bf_result == pd_result # Should be slice(0, 3, None) + + def test_index_construct_from_series(): bf_result = bpd.Index( bpd.Series([3, 14, 159], dtype=pd.Float64Dtype(), name="series_name"), diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_area/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_area/out.sql new file mode 100644 index 0000000000..9b4b6894e0 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_area/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `geography_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + ST_AREA(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `geography_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_astext/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_astext/out.sql new file mode 100644 index 0000000000..9557e2f1d6 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_astext/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `geography_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + ST_ASTEXT(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `geography_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_x/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_x/out.sql new file mode 100644 index 0000000000..d4c0370ca8 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_x/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `geography_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + SAFE.ST_X(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `geography_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_y/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_y/out.sql new file mode 100644 index 0000000000..196c2fcad6 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_y/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `geography_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + SAFE.ST_Y(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `geography_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_iso_year/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_iso_year/out.sql new file mode 100644 index 0000000000..13b56f709c --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_iso_year/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `timestamp_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + EXTRACT(ISOYEAR FROM `bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `timestamp_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_len/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_len/out.sql new file mode 100644 index 0000000000..35fd087bc7 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_len/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + LENGTH(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_ln/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_ln/out.sql new file mode 100644 index 0000000000..1372c088d9 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_ln/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` < 0 THEN CAST('NaN' AS FLOAT64) ELSE LN(`bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log10/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log10/out.sql new file mode 100644 index 0000000000..b4cced439b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log10/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` < 0 THEN CAST('NaN' AS FLOAT64) ELSE LOG(10, `bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log1p/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log1p/out.sql new file mode 100644 index 0000000000..c3902ec174 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log1p/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` < -1 THEN CAST('NaN' AS FLOAT64) ELSE LN(1 + `bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lower/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lower/out.sql new file mode 100644 index 0000000000..e730cdee15 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lower/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + LOWER(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lstrip/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lstrip/out.sql new file mode 100644 index 0000000000..49ed89b40b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lstrip/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + TRIM(`bfcol_0`, ' ') AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_minute/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_minute/out.sql new file mode 100644 index 0000000000..4ef9b8142f --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_minute/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `timestamp_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + EXTRACT(MINUTE FROM `bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `timestamp_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_month/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_month/out.sql new file mode 100644 index 0000000000..4912622898 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_month/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `timestamp_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + EXTRACT(MONTH FROM `bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `timestamp_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_neg/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_neg/out.sql new file mode 100644 index 0000000000..46c58f766d --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_neg/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + -`bfcol_0` AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_normalize/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_normalize/out.sql new file mode 100644 index 0000000000..3c7efd3098 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_normalize/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `timestamp_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + TIMESTAMP_TRUNC(`bfcol_0`, DAY) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `timestamp_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_pos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_pos/out.sql new file mode 100644 index 0000000000..2d6322a182 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_pos/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_0` AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_quarter/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_quarter/out.sql new file mode 100644 index 0000000000..2be2866661 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_quarter/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `timestamp_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + EXTRACT(QUARTER FROM `bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `timestamp_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_reverse/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_reverse/out.sql new file mode 100644 index 0000000000..1ef1074149 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_reverse/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + REVERSE(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_rstrip/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_rstrip/out.sql new file mode 100644 index 0000000000..49ed89b40b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_rstrip/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + TRIM(`bfcol_0`, ' ') AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_second/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_second/out.sql new file mode 100644 index 0000000000..144b704788 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_second/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `timestamp_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + EXTRACT(SECOND FROM `bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `timestamp_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sqrt/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sqrt/out.sql new file mode 100644 index 0000000000..e6a93e5e6c --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sqrt/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` < 0 THEN CAST('NaN' AS FLOAT64) ELSE SQRT(`bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains/out.sql new file mode 100644 index 0000000000..a1aa0539ee --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_0` LIKE '%e%' AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains_regex/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains_regex/out.sql new file mode 100644 index 0000000000..d0383172cb --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains_regex/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + REGEXP_CONTAINS(`bfcol_0`, 'e') AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_get/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_get/out.sql new file mode 100644 index 0000000000..1278c3435d --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_get/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + SUBSTRING(`bfcol_0`, 2, 1) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_slice/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_slice/out.sql new file mode 100644 index 0000000000..4f97ab3ac6 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_slice/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + SUBSTRING(`bfcol_0`, 2, 2) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_strip/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_strip/out.sql new file mode 100644 index 0000000000..311f2c1727 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_strip/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + TRIM(' ', `bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_time/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_time/out.sql new file mode 100644 index 0000000000..6b74efafd5 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_time/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `timestamp_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + TIME(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `timestamp_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_timedelta_floor/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_timedelta_floor/out.sql new file mode 100644 index 0000000000..1a8b9f4e39 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_timedelta_floor/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + FLOOR(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `int64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_upper/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_upper/out.sql new file mode 100644 index 0000000000..d22c8cff5a --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_upper/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + UPPER(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_year/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_year/out.sql new file mode 100644 index 0000000000..8352a65e9e --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_year/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `timestamp_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + EXTRACT(YEAR FROM `bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `timestamp_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py index 9f04450d38..236f94045f 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py @@ -146,6 +146,34 @@ def test_floor(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_geo_area(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["geography_col"]] + sql = _apply_unary_op(bf_df, ops.geo_area_op, "geography_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["geography_col"]] + sql = _apply_unary_op(bf_df, ops.geo_st_astext_op, "geography_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_x(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["geography_col"]] + sql = _apply_unary_op(bf_df, ops.geo_x_op, "geography_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_y(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["geography_col"]] + sql = _apply_unary_op(bf_df, ops.geo_y_op, "geography_col") + + snapshot.assert_match(sql, "out.sql") + + def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): bf_df = repeated_types_df[["string_list_col"]] sql = _apply_unary_op(bf_df, ops.ArrayToStringOp(delimiter="."), "string_list_col") @@ -265,6 +293,153 @@ def test_isupper(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_len(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.len_op, "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_ln(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.ln_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_log10(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.log10_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_log1p(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.log1p_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_lower(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.lower_op, "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_lstrip(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrLstripOp(" "), "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_minute(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["timestamp_col"]] + sql = _apply_unary_op(bf_df, ops.minute_op, "timestamp_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_month(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["timestamp_col"]] + sql = _apply_unary_op(bf_df, ops.month_op, "timestamp_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_neg(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.neg_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_normalize(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["timestamp_col"]] + sql = _apply_unary_op(bf_df, ops.normalize_op, "timestamp_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_pos(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.pos_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_quarter(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["timestamp_col"]] + sql = _apply_unary_op(bf_df, ops.quarter_op, "timestamp_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_reverse(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.reverse_op, "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_second(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["timestamp_col"]] + sql = _apply_unary_op(bf_df, ops.second_op, "timestamp_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_rstrip(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrRstripOp(" "), "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_sqrt(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.sqrt_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_str_get(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrGetOp(1), "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrSliceOp(1, 3), "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_str_contains(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrContainsOp("e"), "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrContainsRegexOp("e"), "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_strip(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrStripOp(" "), "string_col") + + snapshot.assert_match(sql, "out.sql") + + def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["timestamp_col"]] sql = _apply_unary_op(bf_df, ops.iso_day_op, "timestamp_col") @@ -279,6 +454,13 @@ def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_iso_year(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["timestamp_col"]] + sql = _apply_unary_op(bf_df, ops.iso_year_op, "timestamp_col") + + snapshot.assert_match(sql, "out.sql") + + def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.isnull_op, "float64_col") @@ -321,6 +503,20 @@ def test_tanh(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_time(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["timestamp_col"]] + sql = _apply_unary_op(bf_df, ops.time_op, "timestamp_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_timedelta_floor(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col"]] + sql = _apply_unary_op(bf_df, ops.timedelta_floor_op, "int64_col") + + snapshot.assert_match(sql, "out.sql") + + def test_json_extract(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONExtract(json_path="$"), "json_col") @@ -375,3 +571,17 @@ def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): sql = _apply_unary_op(bf_df, ops.ToJSONString(), "json_col") snapshot.assert_match(sql, "out.sql") + + +def test_upper(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.upper_op, "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_year(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["timestamp_col"]] + sql = _apply_unary_op(bf_df, ops.year_op, "timestamp_col") + + snapshot.assert_match(sql, "out.sql") diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py index 6a6bb96897..035eba74fd 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/base.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py @@ -4,6 +4,7 @@ from collections.abc import Hashable import typing +import bigframes from bigframes import constants @@ -741,6 +742,47 @@ def argmin(self) -> int: """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def get_loc( + self, key: typing.Any + ) -> typing.Union[int, slice, bigframes.series.Series]: + """ + Get integer location, slice or boolean mask for requested label. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> unique_index = bpd.Index(list('abc')) + >>> unique_index.get_loc('b') + 1 + + >>> monotonic_index = bpd.Index(list('abbc')) + >>> monotonic_index.get_loc('b') + slice(1, 3, None) + + >>> non_monotonic_index = bpd.Index(list('abcb')) + >>> non_monotonic_index.get_loc('b') + 0 False + 1 True + 2 False + 3 True + Name: nan, dtype: boolean + + Args: + key: Label to get the location for. + + Returns: + Union[int, slice, bigframes.pandas.Series]: + Integer position of the label for unique indexes. + Slice object for monotonic indexes with duplicates. + Boolean Series mask for non-monotonic indexes with duplicates. + + Raises: + KeyError: If the key is not found in the index. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def argmax(self) -> int: """ Return int position of the largest value in the Series. diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py index 4eec2e8af7..71fc4e35e0 100644 --- a/third_party/bigframes_vendored/version.py +++ b/third_party/bigframes_vendored/version.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.12.0" +__version__ = "2.13.0" # {x-release-please-start-date} -__release_date__ = "2025-07-23" +__release_date__ = "2025-07-25" # {x-release-please-end}