diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml
index 3983728ba1..51b21a62b7 100644
--- a/.github/.OwlBot.lock.yaml
+++ b/.github/.OwlBot.lock.yaml
@@ -13,5 +13,5 @@
 # limitations under the License.
 docker:
   image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
-  digest: sha256:5581906b957284864632cde4e9c51d1cc66b0094990b27e689132fe5cd036046
-# created: 2025-01-16T15:24:11.364245182Z
+  digest: sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb
+# created: 2025-04-10T17:00:10.042601326Z
diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml
index 37874a6888..80bfd5f951 100644
--- a/.github/sync-repo-settings.yaml
+++ b/.github/sync-repo-settings.yaml
@@ -20,6 +20,23 @@ branchProtectionRules:
     - 'cover'
     - 'Kokoro presubmit'
     - 'Kokoro windows'
+- pattern: v1
+  requiresCodeOwnerReviews: true
+  requiresStrictStatusChecks: false
+  requiredStatusCheckContexts:
+    - 'OwlBot Post Processor'
+    - 'conventionalcommits.org'
+    - 'cla/google'
+    - 'docs'
+    - 'lint'
+    - 'mypy'
+    - 'unit (3.9)'
+    - 'unit (3.10)'
+    - 'unit (3.11)'
+    - 'unit (3.12)'
+    - 'cover'
+    - 'Kokoro presubmit'
+    - 'Kokoro windows'
 permissionRules:
   - team: actools-python
     permission: admin
diff --git a/.gitignore b/.gitignore
index d083ea1ddc..f7c77e4d3e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,5 +60,6 @@ coverage.xml
 system_tests/local_test_setup
 
 # Make sure a generated file isn't accidentally committed.
+demo.ipynb
 pylintrc
 pylintrc.test
diff --git a/.kokoro/release-nightly.sh b/.kokoro/release-nightly.sh
index 7da0881bbe..124e4b8b48 100755
--- a/.kokoro/release-nightly.sh
+++ b/.kokoro/release-nightly.sh
@@ -57,8 +57,7 @@ git config --global --add safe.directory "${PROJECT_ROOT}"
 
 # Workaround for older pip not able to resolve dependencies. See internal
 # issue 316909553.
-python3.10 -m pip install pip==23.3.2
-python3.10 -m pip install --require-hashes -r .kokoro/requirements.txt
+python3.10 -m pip install pip==25.0.1
 
 # Disable buffering, so that the logs stream through.
 export PYTHONUNBUFFERED=1
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8ca120bd07..863a345da1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,11 +31,11 @@ repos:
     hooks:
     - id: black
 -   repo: https://github.com/pycqa/flake8
-    rev: 6.1.0
+    rev: 7.1.2
     hooks:
     - id: flake8
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.10.0
+    rev: v1.15.0
     hooks:
     -   id: mypy
         additional_dependencies: [types-requests, types-tabulate, pandas-stubs<=2.2.3.241126]
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bebe139c72..667273167b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,86 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.0.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.42.0...v2.0.0) (2025-04-17)
+
+
+### ⚠ BREAKING CHANGES
+
+* make `dataset` and `name` params mandatory in `udf` ([#1619](https://github.com/googleapis/python-bigquery-dataframes/issues/1619))
+* Locational endpoints support is not available in BigFrames 2.0.
+* change default LLM model to gemini-2.0-flash-001, drop PaLM2TextGenerator and PaLM2TextEmbeddingGenerator  ([#1558](https://github.com/googleapis/python-bigquery-dataframes/issues/1558))
+* change default ingress setting for `remote_function` to internal-only ([#1544](https://github.com/googleapis/python-bigquery-dataframes/issues/1544))
+* make `remote_function` params keyword only ([#1537](https://github.com/googleapis/python-bigquery-dataframes/issues/1537))
+* make `remote_function` default service account explicit ([#1537](https://github.com/googleapis/python-bigquery-dataframes/issues/1537))
+* set `allow_large_results=False` by default ([#1541](https://github.com/googleapis/python-bigquery-dataframes/issues/1541))
+
+### Features
+
+* Add `on`  parameter in `dataframe.rolling()` and `dataframe.groupby.rolling()` ([#1556](https://github.com/googleapis/python-bigquery-dataframes/issues/1556)) ([45c9d9f](https://github.com/googleapis/python-bigquery-dataframes/commit/45c9d9fd1c5c13a8692435aa22861820fc11e347))
+* Add component to manage temporary tables ([#1559](https://github.com/googleapis/python-bigquery-dataframes/issues/1559)) ([0a4e245](https://github.com/googleapis/python-bigquery-dataframes/commit/0a4e245670e678f4ead0aec8f8b534e7fe97d112))
+* Add Series.to_pandas_batches() method ([#1592](https://github.com/googleapis/python-bigquery-dataframes/issues/1592)) ([09ce979](https://github.com/googleapis/python-bigquery-dataframes/commit/09ce97999cfc1ded72906b1c7307da5950978ae6))
+* Add support for creating a Matrix Factorization model ([#1330](https://github.com/googleapis/python-bigquery-dataframes/issues/1330)) ([b5297f9](https://github.com/googleapis/python-bigquery-dataframes/commit/b5297f909b08928b97d887764d6e5142c763a5a3))
+* Allow `input_types`, `output_type`, and `dataset` to be used positionally in `remote_function` ([#1560](https://github.com/googleapis/python-bigquery-dataframes/issues/1560)) ([bcac8c6](https://github.com/googleapis/python-bigquery-dataframes/commit/bcac8c6ed0b40902d0ccaef3f907e6acbe6a52ed))
+* Allow pandas.cut 'labels' parameter to accept a list of string ([#1549](https://github.com/googleapis/python-bigquery-dataframes/issues/1549)) ([af842b1](https://github.com/googleapis/python-bigquery-dataframes/commit/af842b174de7eef4908b397d6a745caf8eda7b3d))
+* Change default ingress setting for `remote_function` to internal-only ([#1544](https://github.com/googleapis/python-bigquery-dataframes/issues/1544)) ([c848a80](https://github.com/googleapis/python-bigquery-dataframes/commit/c848a80766ff68ea92c05a5dc5c26508e6755381))
+* Detect duplicate column/index names in read_gbq before send query. ([#1615](https://github.com/googleapis/python-bigquery-dataframes/issues/1615)) ([40d6960](https://github.com/googleapis/python-bigquery-dataframes/commit/40d696088114fb08e68df74be261144350b785c8))
+* Drop support for locational endpoints ([#1542](https://github.com/googleapis/python-bigquery-dataframes/issues/1542)) ([4bf2e43](https://github.com/googleapis/python-bigquery-dataframes/commit/4bf2e43ef4498b11f32086231fc4cc749fde966a))
+* Enable time range rolling for DataFrame, DataFrameGroupBy and SeriesGroupBy ([#1605](https://github.com/googleapis/python-bigquery-dataframes/issues/1605)) ([b4b7073](https://github.com/googleapis/python-bigquery-dataframes/commit/b4b7073da8348b6597bd3d90d1a758cd29586533))
+* Improve local data validation ([#1598](https://github.com/googleapis/python-bigquery-dataframes/issues/1598)) ([815e471](https://github.com/googleapis/python-bigquery-dataframes/commit/815e471b904d4bd708afc4bfbf1db945e76f75c9))
+* Make `remote_function` default service account explicit ([#1537](https://github.com/googleapis/python-bigquery-dataframes/issues/1537)) ([9eb9089](https://github.com/googleapis/python-bigquery-dataframes/commit/9eb9089ce3f1dad39761ba8ebc2d6f76261bd243))
+* Set `allow_large_results=False` by default ([#1541](https://github.com/googleapis/python-bigquery-dataframes/issues/1541)) ([e9fb712](https://github.com/googleapis/python-bigquery-dataframes/commit/e9fb7129a05e8ac7c938ffe30e86902950316f20))
+* Support bigquery connection in managed function ([#1554](https://github.com/googleapis/python-bigquery-dataframes/issues/1554)) ([f6f697a](https://github.com/googleapis/python-bigquery-dataframes/commit/f6f697afc167e0fa7ea923c0aed85a9ef257d61f))
+* Support bq connection path format ([#1550](https://github.com/googleapis/python-bigquery-dataframes/issues/1550)) ([e7eb918](https://github.com/googleapis/python-bigquery-dataframes/commit/e7eb918dd9df3569febe695f57c1a5909844fd3c))
+* Support gemini-2.0-X models ([#1558](https://github.com/googleapis/python-bigquery-dataframes/issues/1558)) ([3104fab](https://github.com/googleapis/python-bigquery-dataframes/commit/3104fab019d20b0cbc06cd81d43b3f34fd1dd987))
+* Support inlining small list, struct, json data ([#1589](https://github.com/googleapis/python-bigquery-dataframes/issues/1589)) ([2ce891f](https://github.com/googleapis/python-bigquery-dataframes/commit/2ce891fcd5bfd9f093fbcbb1ea35158d2bf9d8b9))
+* Support time range rolling on Series. ([#1590](https://github.com/googleapis/python-bigquery-dataframes/issues/1590)) ([6e98a2c](https://github.com/googleapis/python-bigquery-dataframes/commit/6e98a2cf53dd130963a9c5ba07e21ce6c32b7c6d))
+* Use session temp tables for all ephemeral storage ([#1569](https://github.com/googleapis/python-bigquery-dataframes/issues/1569)) ([9711b83](https://github.com/googleapis/python-bigquery-dataframes/commit/9711b830a7bdc6740f4ebeaaab6f37082ae5dfd9))
+* Use validated local storage for data uploads ([#1612](https://github.com/googleapis/python-bigquery-dataframes/issues/1612)) ([aee4159](https://github.com/googleapis/python-bigquery-dataframes/commit/aee4159807401d7432bb8c0c41859ada3291599b))
+* Warn the deprecated `max_download_size`, `random_state` and `sampling_method` parameters in `(DataFrame|Series).to_pandas()` ([#1573](https://github.com/googleapis/python-bigquery-dataframes/issues/1573)) ([b9623da](https://github.com/googleapis/python-bigquery-dataframes/commit/b9623daa847805abf420f0f11e173674fb147193))
+
+
+### Bug Fixes
+
+* `to_pandas_batches()` respects `page_size` and `max_results` again ([#1572](https://github.com/googleapis/python-bigquery-dataframes/issues/1572)) ([27c5905](https://github.com/googleapis/python-bigquery-dataframes/commit/27c59051549b83fdac954eaa3d257803c6f9133d))
+* Ensure `page_size` works correctly in `to_pandas_batches` when `max_results` is not set ([#1588](https://github.com/googleapis/python-bigquery-dataframes/issues/1588)) ([570cff3](https://github.com/googleapis/python-bigquery-dataframes/commit/570cff3c2efe3a47535bb3c931a345856d256a19))
+* Include role and service account in IAM exception ([#1564](https://github.com/googleapis/python-bigquery-dataframes/issues/1564)) ([8c50755](https://github.com/googleapis/python-bigquery-dataframes/commit/8c507556c5f61fab95c6389a8ad04d731df1df7b))
+* Make `dataset` and `name` params mandatory in `udf` ([#1619](https://github.com/googleapis/python-bigquery-dataframes/issues/1619)) ([637e860](https://github.com/googleapis/python-bigquery-dataframes/commit/637e860d3cea0a36b1e58a45ec9b9ab0059fb3b1))
+* Pandas.cut returns labels index for numeric breaks when labels=False ([#1548](https://github.com/googleapis/python-bigquery-dataframes/issues/1548)) ([b2375de](https://github.com/googleapis/python-bigquery-dataframes/commit/b2375decedbf1a793eedbbc9dc2efc2296f8cc6e))
+* Prevent `KeyError` in `bpd.concat` with empty DF and struct/array types DF ([#1568](https://github.com/googleapis/python-bigquery-dataframes/issues/1568)) ([b4da1cf](https://github.com/googleapis/python-bigquery-dataframes/commit/b4da1cf3c0fb94a2bb21e6039896accab85742d4))
+* Read_csv supports for tilde local paths and includes index for bigquery_stream write engine ([#1580](https://github.com/googleapis/python-bigquery-dataframes/issues/1580)) ([352e8e4](https://github.com/googleapis/python-bigquery-dataframes/commit/352e8e4b05cf19e970b47b017f958a1c6fc89bea))
+* Use dictionaries to avoid problematic google.iam namespace ([#1611](https://github.com/googleapis/python-bigquery-dataframes/issues/1611)) ([b03e44f](https://github.com/googleapis/python-bigquery-dataframes/commit/b03e44f7fca429a6de41c42ec28504b688cd84f0))
+
+
+### Performance Improvements
+
+* Directly read gbq table for simple plans ([#1607](https://github.com/googleapis/python-bigquery-dataframes/issues/1607)) ([6ad38e8](https://github.com/googleapis/python-bigquery-dataframes/commit/6ad38e8287354f62b0c5cad1f3d5b897256860ca))
+
+
+### Dependencies
+
+* Remove jellyfish dependency ([#1604](https://github.com/googleapis/python-bigquery-dataframes/issues/1604)) ([1ac0e1e](https://github.com/googleapis/python-bigquery-dataframes/commit/1ac0e1e82c097717338a6816f27c01b67736f51c))
+* Remove parsy dependency ([#1610](https://github.com/googleapis/python-bigquery-dataframes/issues/1610)) ([293f676](https://github.com/googleapis/python-bigquery-dataframes/commit/293f676e98446c417c12c345d5db875dd4c438df))
+* Remove test dependency on pytest-mock package ([#1622](https://github.com/googleapis/python-bigquery-dataframes/issues/1622)) ([1ba72ea](https://github.com/googleapis/python-bigquery-dataframes/commit/1ba72ead256178afee6f1d3303b0556bec1c4a9b))
+* Support a shapely versions 1.8.5+ ([#1621](https://github.com/googleapis/python-bigquery-dataframes/issues/1621)) ([e39ee3b](https://github.com/googleapis/python-bigquery-dataframes/commit/e39ee3bcf37f2a4f5e6ce981d248c24c6f5d770b))
+
+
+### Documentation
+
+* Add details for `bigquery_connection` in `[@bpd](https://github.com/bpd).udf` docstring ([#1609](https://github.com/googleapis/python-bigquery-dataframes/issues/1609)) ([ef63772](https://github.com/googleapis/python-bigquery-dataframes/commit/ef6377277bc9c354385c83ceba9e00094c0a6cc6))
+* Add explain forecast snippet to multiple time series tutorial ([#1586](https://github.com/googleapis/python-bigquery-dataframes/issues/1586)) ([40c55a0](https://github.com/googleapis/python-bigquery-dataframes/commit/40c55a06a529ca49d203227ccf36c12427d0cd5b))
+* Add message to remove default model for version 3.0 ([#1563](https://github.com/googleapis/python-bigquery-dataframes/issues/1563)) ([910be2b](https://github.com/googleapis/python-bigquery-dataframes/commit/910be2b5b2bfaf0e21cdc4fd775c1605a864c1aa))
+* Add samples for ArimaPlus `time_series_id_col` feature ([#1577](https://github.com/googleapis/python-bigquery-dataframes/issues/1577)) ([1e4cd9c](https://github.com/googleapis/python-bigquery-dataframes/commit/1e4cd9cf69f98d4af6b2a70bd8189c619b19baaa))
+* Add warning for bigframes 2.0 ([#1557](https://github.com/googleapis/python-bigquery-dataframes/issues/1557)) ([3f0eaa1](https://github.com/googleapis/python-bigquery-dataframes/commit/3f0eaa1c6b02d086270421f91dbb6aa2f117317d))
+* Deprecate default model in `TextEmbedddingGenerator`, `GeminiTextGenerator`, and other `bigframes.ml.llm` classes ([#1570](https://github.com/googleapis/python-bigquery-dataframes/issues/1570)) ([89ab33e](https://github.com/googleapis/python-bigquery-dataframes/commit/89ab33e1179aef142415fd5c9073671903bf1d45))
+* Include all licenses for vendored packages in the root LICENSE file ([#1626](https://github.com/googleapis/python-bigquery-dataframes/issues/1626)) ([8116ed0](https://github.com/googleapis/python-bigquery-dataframes/commit/8116ed0938634d301a153613f8a9cd8053ddf026))
+* Remove gemini-1.5 deprecation warning for `GeminiTextGenerator` ([#1562](https://github.com/googleapis/python-bigquery-dataframes/issues/1562)) ([0cc6784](https://github.com/googleapis/python-bigquery-dataframes/commit/0cc678448fdec1eaa3acfbb563a018325a8c85bc))
+* Use restructured text to allow publishing to PyPI ([#1565](https://github.com/googleapis/python-bigquery-dataframes/issues/1565)) ([d1e9ec2](https://github.com/googleapis/python-bigquery-dataframes/commit/d1e9ec2936d270ec4035014ea3ddd335a5747ade))
+
+
+### Miscellaneous Chores
+
+* Make `remote_function` params keyword only ([#1537](https://github.com/googleapis/python-bigquery-dataframes/issues/1537)) ([9eb9089](https://github.com/googleapis/python-bigquery-dataframes/commit/9eb9089ce3f1dad39761ba8ebc2d6f76261bd243))
+
 ## [1.42.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.41.0...v1.42.0) (2025-03-27)
 
 
diff --git a/LICENSE b/LICENSE
index d645695673..c7807337dc 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,3 +1,6 @@
+Files: All files not covered by another license. Notably: the bigframes module,
+tests/*, bigframes_vendored.google_cloud_bigquery module,
+bigframes_vendored.ibis module, and bigframes_vendored.xgboost module.
 
                                  Apache License
                            Version 2.0, January 2004
@@ -200,3 +203,118 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+---
+
+Files: For the bigframes_vendored.cpython module.
+
+PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation.
+2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright , i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee.
+3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python.
+4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
+7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
+8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement.
+
+---
+
+Files: for the bigframes_vendored.geopandas module.
+
+Copyright (c) 2013-2022, GeoPandas developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of GeoPandas nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+---
+
+Files: The bigframes_vendored.pandas module.
+
+BSD 3-Clause License
+
+Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
+All rights reserved.
+
+Copyright (c) 2011-2023, Open source contributors.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+---
+
+Files: The bigframes_vendored.sklearn module.
+
+BSD 3-Clause License
+
+Copyright (c) 2007-2023 The scikit-learn developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.rst b/README.rst
index 185c50c14a..7f487b9077 100644
--- a/README.rst
+++ b/README.rst
@@ -12,6 +12,31 @@ powered by the BigQuery engine.
 BigQuery DataFrames is an open-source package. You can run
 ``pip install --upgrade bigframes`` to install the latest version.
 
+⚠️ Warning: Breaking Changes in BigQuery DataFrames v2.0
+--------------------------------------------------------
+
+Version 2.0 introduces breaking changes for improved security and performance. Key default behaviors have changed, including
+
+* **Large Results (>10GB):** The default value for ``allow_large_results`` has changed to ``False``.
+  Methods like ``to_pandas()`` will now fail if the query result's compressed data size exceeds 10GB,
+  unless large results are explicitly permitted.
+* **Remote Function Security:** The library no longer automatically lets the Compute Engine default service
+  account become the identity of the Cloud Run functions. If that is desired, it has to be indicated by passing
+  ``cloud_function_service_account="default"``. And network ingress now defaults to ``"internal-only"``.
+* **@remote_function Argument Passing:** Arguments other than ``input_types``, ``output_type``, and ``dataset``
+  to ``remote_function`` must now be passed using keyword syntax, as positional arguments are no longer supported.
+* **@udf Argument Passing:** Arguments ``dataset`` and ``name`` to ``udf`` are now mandatory.
+* **Endpoint Connections:** Automatic fallback to locational endpoints in certain regions is removed.
+* **LLM Updates (Gemini Integration):** Integrations now default to the ``gemini-2.0-flash-001`` model.
+  PaLM2 support has been removed; please migrate any existing PaLM2 usage to Gemini. **Note:** The current default
+  model will be removed in Version 3.0.
+
+**Important:** If you are not ready to adapt to these changes, please pin your dependency to a version less than 2.0
+(e.g., ``bigframes==1.42.0``) to avoid disruption.
+
+To learn about these changes and how to migrate to version 2.0, see the
+`updated introduction guide <https://cloud.google.com/bigquery/docs/bigquery-dataframes-introduction>`_.
+
 .. |GA| image:: https://img.shields.io/badge/support-GA-gold.svg
    :target: https://github.com/googleapis/google-cloud-python/blob/main/README.rst#general-availability
 .. |pypi| image:: https://img.shields.io/pypi/v/bigframes.svg
diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py
index 84bc4f6d01..5155b09063 100644
--- a/bigframes/_config/bigquery_options.py
+++ b/bigframes/_config/bigquery_options.py
@@ -21,7 +21,6 @@
 
 import google.api_core.exceptions
 import google.auth.credentials
-import jellyfish
 
 import bigframes.constants
 import bigframes.enums
@@ -37,6 +36,7 @@
 
 
 def _get_validated_location(value: Optional[str]) -> Optional[str]:
+    import bigframes._tools.strings
 
     if value is None or value in bigframes.constants.ALL_BIGQUERY_LOCATIONS:
         return value
@@ -53,7 +53,7 @@ def _get_validated_location(value: Optional[str]) -> Optional[str]:
 
     possibility = min(
         bigframes.constants.ALL_BIGQUERY_LOCATIONS,
-        key=lambda item: jellyfish.levenshtein_distance(location, item),
+        key=lambda item: bigframes._tools.strings.levenshtein_distance(location, item),
     )
     # There are many layers before we get to (possibly) the user's code:
     # -> bpd.options.bigquery.location = "us-central-1"
@@ -89,7 +89,7 @@ def __init__(
         kms_key_name: Optional[str] = None,
         skip_bq_connection_check: bool = False,
         *,
-        allow_large_results: bool = True,
+        allow_large_results: bool = False,
         ordering_mode: Literal["strict", "partial"] = "strict",
         client_endpoints_override: Optional[dict] = None,
     ):
@@ -258,7 +258,8 @@ def allow_large_results(self, value: bool):
 
     @property
     def use_regional_endpoints(self) -> bool:
-        """Flag to connect to regional API endpoints.
+        """Flag to connect to regional API endpoints for BigQuery API and
+        BigQuery Storage API.
 
         .. note::
             Use of regional endpoints is a feature in Preview and available only
@@ -267,18 +268,16 @@ def use_regional_endpoints(self) -> bool:
             "us-east5", "us-east7", "us-south1", "us-west1", "us-west2", "us-west3"
             and "us-west4".
 
-        .. deprecated:: 0.13.0
-            Use of locational endpoints is available only in selected projects.
-
-        Requires that ``location`` is set. For supported regions, for example
-        ``europe-west3``, you need to specify ``location='europe-west3'`` and
-        ``use_regional_endpoints=True``, and then BigQuery DataFrames would
-        connect to the BigQuery endpoint ``bigquery.europe-west3.rep.googleapis.com``.
-        For not supported regions, for example ``asia-northeast1``, when you
-        specify ``location='asia-northeast1'`` and ``use_regional_endpoints=True``,
-        a different endpoint (called locational endpoint, now deprecated, used
-        to provide weaker promise on the request remaining within the location
-        during transit) ``europe-west3-bigquery.googleapis.com`` would be used.
+        Requires that ``location`` is set. For [supported regions](https://cloud.google.com/bigquery/docs/regional-endpoints),
+        for example ``europe-west3``, you need to specify
+        ``location='europe-west3'`` and ``use_regional_endpoints=True``, and
+        then BigQuery DataFrames would connect to the BigQuery endpoint
+        ``bigquery.europe-west3.rep.googleapis.com``. For not supported regions,
+        for example ``asia-northeast1``, when you specify
+        ``location='asia-northeast1'`` and ``use_regional_endpoints=True``,
+        the global endpoint ``bigquery.googleapis.com`` would be used, which
+        does not promise any guarantee on the request remaining within the
+        location during transit.
 
         Returns:
             bool:
diff --git a/bigframes/_tools/__init__.py b/bigframes/_tools/__init__.py
new file mode 100644
index 0000000000..ea3bc209d0
--- /dev/null
+++ b/bigframes/_tools/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""_tools is a collection of helper functions with minimal dependencies.
+
+Please keep the dependencies used in this subpackage to a minimum to avoid the
+risk of circular dependencies.
+"""
diff --git a/bigframes/_tools/strings.py b/bigframes/_tools/strings.py
new file mode 100644
index 0000000000..3d9402c68f
--- /dev/null
+++ b/bigframes/_tools/strings.py
@@ -0,0 +1,66 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helper methods for processing strings with minimal dependencies.
+
+Please keep the dependencies used in this subpackage to a minimum to avoid the
+risk of circular dependencies.
+"""
+
+import numpy
+
+
+def levenshtein_distance(left: str, right: str) -> int:
+    """Compute the edit distance between two strings.
+
+    This is the minumum number of substitutions, insertions, deletions
+    to get from left string to right string. See:
+    https://en.wikipedia.org/wiki/Levenshtein_distance
+    """
+    # TODO(tswast): accelerate with numba (if available) if we end up using this
+    # function in contexts other than when raising an exception or there are too
+    # many values to compare even in that context.
+
+    distances0 = numpy.zeros(len(right) + 1)
+    distances1 = numpy.zeros(len(right) + 1)
+
+    # Maximum distance is to drop all characters and then add the other string.
+    distances0[:] = range(len(right) + 1)
+
+    for left_index in range(len(left)):
+        # Calculate distance from distances0 to distances1.
+
+        # Edit distance is to delete (i + 1) chars from left to match empty right
+        distances1[0] = left_index + 1
+        # "ab"
+        for right_index in range(len(right)):
+            left_char = left[left_index]
+            right_char = right[right_index]
+
+            deletion_cost = distances0[right_index + 1] + 1
+            insertion_cost = distances1[right_index] + 1
+            if left_char == right_char:
+                substitution_cost = distances0[right_index]
+            else:
+                substitution_cost = distances0[right_index] + 1
+
+            distances1[right_index + 1] = min(
+                deletion_cost, insertion_cost, substitution_cost
+            )
+
+        temp = distances0
+        distances0 = distances1
+        distances1 = temp
+
+    return distances0[len(right)]
diff --git a/bigframes/blob/_functions.py b/bigframes/blob/_functions.py
index 6b9fa308d8..f7f035bff4 100644
--- a/bigframes/blob/_functions.py
+++ b/bigframes/blob/_functions.py
@@ -69,7 +69,7 @@ def _output_bq_type(self):
     def _create_udf(self):
         """Create Python UDF in BQ. Return name of the UDF."""
         udf_name = str(
-            self._session._loader._storage_manager.generate_unique_resource_id()
+            self._session._anon_dataset_manager.generate_unique_resource_id()
         )
 
         func_body = inspect.getsource(self._func)
@@ -102,6 +102,9 @@ def _create_udf(self):
     def udf(self):
         """Create and return the UDF object."""
         udf_name = self._create_udf()
+
+        # TODO(b/404605969): remove cleanups when UDF fixes dataset deletion.
+        self._session._function_session._update_temp_artifacts(udf_name, "")
         return self._session.read_gbq_function(udf_name)
 
 
diff --git a/bigframes/clients.py b/bigframes/clients.py
index 1b8212377d..f1f6d686fd 100644
--- a/bigframes/clients.py
+++ b/bigframes/clients.py
@@ -17,32 +17,57 @@
 from __future__ import annotations
 
 import logging
+import textwrap
 import time
 from typing import cast, Optional
 
 import google.api_core.exceptions
 import google.api_core.retry
 from google.cloud import bigquery_connection_v1, resourcemanager_v3
-from google.iam.v1 import iam_policy_pb2, policy_pb2
 
 logger = logging.getLogger(__name__)
 
 
-def resolve_full_bq_connection_name(
-    connection_name: str, default_project: str, default_location: str
+def get_canonical_bq_connection_id(
+    connection_id: str, default_project: str, default_location: str
 ) -> str:
-    """Retrieve the full connection name of the form <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>.
-    Use default project, location or connection_id when any of them are missing."""
-    if connection_name.count(".") == 2:
-        return connection_name
-
-    if connection_name.count(".") == 1:
-        return f"{default_project}.{connection_name}"
-
-    if connection_name.count(".") == 0:
-        return f"{default_project}.{default_location}.{connection_name}"
-
-    raise ValueError(f"Invalid connection name format: {connection_name}.")
+    """
+    Retrieve the full connection id of the form
+    <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>.
+    Use default project, location or connection_id when any of them are missing.
+    """
+
+    if "/" in connection_id:
+        fields = connection_id.split("/")
+        if (
+            len(fields) == 6
+            and fields[0] == "projects"
+            and fields[2] == "locations"
+            and fields[4] == "connections"
+        ):
+            return ".".join((fields[1], fields[3], fields[5]))
+    else:
+        if connection_id.count(".") == 2:
+            return connection_id
+
+        if connection_id.count(".") == 1:
+            return f"{default_project}.{connection_id}"
+
+        if connection_id.count(".") == 0:
+            return f"{default_project}.{default_location}.{connection_id}"
+
+    raise ValueError(
+        textwrap.dedent(
+            f"""
+        Invalid connection id format: {connection_id}.
+        Only the following formats are supported:
+            <project-id>.<location>.<connection-id>,
+            <location>.<connection-id>,
+            <connection-id>,
+            projects/<project-id>/locations/<location>/connections/<connection-id>
+        """
+        ).strip()
+    )
 
 
 class BqConnectionManager:
@@ -60,7 +85,11 @@ def __init__(
         self._cloud_resource_manager_client = cloud_resource_manager_client
 
     def create_bq_connection(
-        self, project_id: str, location: str, connection_id: str, iam_role: str
+        self,
+        project_id: str,
+        location: str,
+        connection_id: str,
+        iam_role: Optional[str] = None,
     ):
         """Create the BQ connection if not exist. In addition, try to add the IAM role to the connection to ensure required permissions.
 
@@ -80,7 +109,7 @@ def create_bq_connection(
         )
         if service_account_id:
             logger.info(
-                f"Connector {project_id}.{location}.{connection_id} already exists"
+                f"BQ connection {project_id}.{location}.{connection_id} already exists"
             )
         else:
             connection_name, service_account_id = self._create_bq_connection(
@@ -90,9 +119,15 @@ def create_bq_connection(
                 f"Created BQ connection {connection_name} with service account id: {service_account_id}"
             )
         service_account_id = cast(str, service_account_id)
+
         # Ensure IAM role on the BQ connection
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#grant_permission_on_function
-        self._ensure_iam_binding(project_id, service_account_id, iam_role)
+        if iam_role:
+            try:
+                self._ensure_iam_binding(project_id, service_account_id, iam_role)
+            except google.api_core.exceptions.PermissionDenied as ex:
+                ex.message = f"Failed ensuring IAM binding (role={iam_role}, service-account={service_account_id}). {ex.message}"
+                raise
 
     # Introduce retries to accommodate transient errors like:
     # (1) Etag mismatch,
@@ -125,7 +160,9 @@ def _ensure_iam_binding(
         project = f"projects/{project_id}"
         service_account = f"serviceAccount:{service_account_id}"
         role = f"roles/{iam_role}"
-        request = iam_policy_pb2.GetIamPolicyRequest(resource=project)
+        request = {
+            "resource": project
+        }  # Use a dictionary to avoid problematic google.iam namespace package.
         policy = self._cloud_resource_manager_client.get_iam_policy(request=request)
 
         # Check if the binding already exists, and if does, do nothing more
@@ -135,9 +172,15 @@ def _ensure_iam_binding(
                     return
 
         # Create a new binding
-        new_binding = policy_pb2.Binding(role=role, members=[service_account])
+        new_binding = {
+            "role": role,
+            "members": [service_account],
+        }  # Use a dictionary to avoid problematic google.iam namespace package.
         policy.bindings.append(new_binding)
-        request = iam_policy_pb2.SetIamPolicyRequest(resource=project, policy=policy)
+        request = {
+            "resource": project,
+            "policy": policy,
+        }  # Use a dictionary to avoid problematic google.iam namespace package.
         self._cloud_resource_manager_client.set_iam_policy(request=request)
 
         # We would wait for the IAM policy change to take effect
diff --git a/bigframes/constants.py b/bigframes/constants.py
index 8f5ed95e1a..89f27afd78 100644
--- a/bigframes/constants.py
+++ b/bigframes/constants.py
@@ -96,22 +96,27 @@
     }
 )
 
-# https://cloud.google.com/storage/docs/locational-endpoints
-LEP_ENABLED_BIGQUERY_LOCATIONS = frozenset(
+REP_NOT_ENABLED_BIGQUERY_LOCATIONS = frozenset(
     ALL_BIGQUERY_LOCATIONS - REP_ENABLED_BIGQUERY_LOCATIONS
 )
 
-LEP_DEPRECATION_WARNING_MESSAGE = textwrap.dedent(
+LOCATION_NEEDED_FOR_REP_MESSAGE = textwrap.dedent(
     """
-    Support for regional endpoints is not yet available in the location
-    {location} for BigQuery and BigQuery Storage APIs. For the supported
-    locations and APIs see https://cloud.google.com/bigquery/docs/regional-endpoints.
-    For other locations and APIs, currently an older, now deprecated locational
-    endpoints are being used, which requires your project to be allowlisted. In
-    future version 2.0 onwards the locational endpoints will no longer be
-    supported automatically when you enable regional endpoints. However, if you
-    still need them, you will be able to override the endpoints directly by
-    doing the following:
+    Must set location to use regional endpoints.
+    You can do it via bigframaes.pandas.options.bigquery.location.
+    The supported locations can be found at
+    https://cloud.google.com/bigquery/docs/regional-endpoints#supported-locations.
+    """
+).strip()
+
+REP_NOT_SUPPORTED_MESSAGE = textwrap.dedent(
+    """
+    Support for regional endpoints for BigQuery and BigQuery Storage APIs may
+    not be available in the location {location}. For the supported APIs and
+    locations see https://cloud.google.com/bigquery/docs/regional-endpoints.
+    If you have the (deprecated) locational endpoints enabled in your project
+    (which requires your project to be allowlisted), you can override the
+    endpoints directly by doing the following:
         bigframes.pandas.options.bigquery.client_endpoints_override = {{
             "bqclient": "https://{location}-bigquery.googleapis.com",
             "bqconnectionclient": "{location}-bigqueryconnection.googleapis.com",
diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py
index 7ede7b7e65..eba63ad72e 100644
--- a/bigframes/core/array_value.py
+++ b/bigframes/core/array_value.py
@@ -16,7 +16,6 @@
 from dataclasses import dataclass
 import datetime
 import functools
-import io
 import typing
 from typing import Iterable, List, Mapping, Optional, Sequence, Tuple
 import warnings
@@ -24,7 +23,6 @@
 import google.cloud.bigquery
 import pandas
 import pyarrow as pa
-import pyarrow.feather as pa_feather
 
 import bigframes.core.expression as ex
 import bigframes.core.guid
@@ -60,24 +58,20 @@ class ArrayValue:
 
     @classmethod
     def from_pyarrow(cls, arrow_table: pa.Table, session: Session):
-        adapted_table = local_data.adapt_pa_table(arrow_table)
-        schema = local_data.arrow_schema_to_bigframes(adapted_table.schema)
+        data_source = local_data.ManagedArrowTable.from_pyarrow(arrow_table)
+        return cls.from_managed(source=data_source, session=session)
 
-        iobytes = io.BytesIO()
-        pa_feather.write_feather(adapted_table, iobytes)
-        # Scan all columns by default, we define this list as it can be pruned while preserving source_def
+    @classmethod
+    def from_managed(cls, source: local_data.ManagedArrowTable, session: Session):
         scan_list = nodes.ScanList(
             tuple(
                 nodes.ScanItem(ids.ColumnId(item.column), item.dtype, item.column)
-                for item in schema.items
+                for item in source.schema.items
             )
         )
-
         node = nodes.ReadLocalNode(
-            iobytes.getvalue(),
-            data_schema=schema,
+            source,
             session=session,
-            n_rows=arrow_table.num_rows,
             scan_list=scan_list,
         )
         return cls(node)
@@ -103,6 +97,7 @@ def from_table(
         at_time: Optional[datetime.datetime] = None,
         primary_key: Sequence[str] = (),
         offsets_col: Optional[str] = None,
+        n_rows: Optional[int] = None,
     ):
         if offsets_col and primary_key:
             raise ValueError("must set at most one of 'offests', 'primary_key'")
@@ -132,7 +127,11 @@ def from_table(
             )
         )
         source_def = nodes.BigqueryDataSource(
-            table=table_def, at_time=at_time, sql_predicate=predicate, ordering=ordering
+            table=table_def,
+            at_time=at_time,
+            sql_predicate=predicate,
+            ordering=ordering,
+            n_rows=n_rows,
         )
         node = nodes.ReadTableNode(
             source=source_def,
@@ -182,7 +181,9 @@ def as_cached(
         Replace the node with an equivalent one that references a table where the value has been materialized to.
         """
         table = nodes.GbqTable.from_table(cache_table)
-        source = nodes.BigqueryDataSource(table, ordering=ordering)
+        source = nodes.BigqueryDataSource(
+            table, ordering=ordering, n_rows=cache_table.num_rows
+        )
         # Assumption: GBQ cached table uses field name as bq column name
         scan_list = nodes.ScanList(
             tuple(
@@ -412,7 +413,7 @@ def project_window_op(
         skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
         """
         # TODO: Support non-deterministic windowing
-        if window_spec.row_bounded or not op.order_independent:
+        if window_spec.is_row_bounded or not op.order_independent:
             if self.node.order_ambiguous and not self.session._strictly_ordered:
                 if not self.session._allows_ambiguity:
                     raise ValueError(
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 2992718412..c53f392417 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -53,6 +53,7 @@
 from bigframes import session
 from bigframes._config import sampling_options
 import bigframes.constants
+from bigframes.core import local_data
 import bigframes.core as core
 import bigframes.core.compile.googlesql as googlesql
 import bigframes.core.expression as ex
@@ -187,8 +188,8 @@ def from_local(
 
         pd_data = pd_data.set_axis(column_ids, axis=1)
         pd_data = pd_data.reset_index(names=index_ids)
-        as_pyarrow = pa.Table.from_pandas(pd_data, preserve_index=False)
-        array_value = core.ArrayValue.from_pyarrow(as_pyarrow, session=session)
+        managed_data = local_data.ManagedArrowTable.from_pandas(pd_data)
+        array_value = core.ArrayValue.from_managed(managed_data, session=session)
         block = cls(
             array_value,
             column_labels=column_labels,
@@ -590,6 +591,7 @@ def to_pandas_batches(
         page_size: Optional[int] = None,
         max_results: Optional[int] = None,
         allow_large_results: Optional[bool] = None,
+        squeeze: Optional[bool] = False,
     ):
         """Download results one message at a time.
 
@@ -605,7 +607,10 @@ def to_pandas_batches(
         for record_batch in execute_result.arrow_batches():
             df = io_pandas.arrow_to_pandas(record_batch, self.expr.schema)
             self._copy_index_to_pandas(df)
-            yield df
+            if squeeze:
+                yield df.squeeze(axis=1)
+            else:
+                yield df
 
     def _copy_index_to_pandas(self, df: pd.DataFrame):
         """Set the index on pandas DataFrame to match this block.
@@ -987,7 +992,7 @@ def apply_nary_op(
     def multi_apply_window_op(
         self,
         columns: typing.Sequence[str],
-        op: agg_ops.WindowOp,
+        op: agg_ops.UnaryWindowOp,
         window_spec: windows.WindowSpec,
         *,
         skip_null_groups: bool = False,
@@ -1058,7 +1063,7 @@ def project_exprs(
     def apply_window_op(
         self,
         column: str,
-        op: agg_ops.WindowOp,
+        op: agg_ops.UnaryWindowOp,
         window_spec: windows.WindowSpec,
         *,
         result_label: Label = None,
@@ -2708,11 +2713,13 @@ def _get_rows_as_json_values(self) -> Block:
             )
         )
 
+        dest_table = self.session.bqclient.get_table(destination)
         expr = core.ArrayValue.from_table(
-            self.session.bqclient.get_table(destination),
+            dest_table,
             schema=new_schema,
             session=self.session,
             offsets_col=ordering_column_name,
+            n_rows=dest_table.num_rows,
         ).drop_columns([ordering_column_name])
         block = Block(
             expr,
diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py
index f96471e200..0d31798f25 100644
--- a/bigframes/core/compile/aggregate_compiler.py
+++ b/bigframes/core/compile/aggregate_compiler.py
@@ -360,69 +360,73 @@ def _(
     if isinstance(op.bins, int):
         col_min = _apply_window_if_present(x.min(), window)
         col_max = _apply_window_if_present(x.max(), window)
+        adj = (col_max - col_min) * 0.001
         bin_width = (col_max - col_min) / op.bins
 
-        if op.labels is False:
-            for this_bin in range(op.bins - 1):
-                if op.right:
-                    case_expr = x <= (col_min + (this_bin + 1) * bin_width)
-                else:
-                    case_expr = x < (col_min + (this_bin + 1) * bin_width)
-                out = out.when(
-                    case_expr,
-                    compile_ibis_types.literal_to_ibis_scalar(
-                        this_bin, force_dtype=pd.Int64Dtype()
-                    ),
+        for this_bin in range(op.bins):
+            if op.labels is False:
+                value = compile_ibis_types.literal_to_ibis_scalar(
+                    this_bin,
+                    force_dtype=pd.Int64Dtype(),
                 )
-            out = out.when(x.notnull(), op.bins - 1)
-        else:
-            interval_struct = None
-            adj = (col_max - col_min) * 0.001
-            for this_bin in range(op.bins):
-                left_edge_adj = adj if this_bin == 0 and op.right else 0
-                right_edge_adj = adj if this_bin == op.bins - 1 and not op.right else 0
+            elif isinstance(op.labels, typing.Iterable):
+                value = compile_ibis_types.literal_to_ibis_scalar(
+                    list(op.labels)[this_bin],
+                    force_dtype=pd.StringDtype(storage="pyarrow"),
+                )
+            else:
+                left_adj = adj if this_bin == 0 and op.right else 0
+                right_adj = adj if this_bin == op.bins - 1 and not op.right else 0
 
-                left_edge = col_min + this_bin * bin_width - left_edge_adj
-                right_edge = col_min + (this_bin + 1) * bin_width + right_edge_adj
+                left = col_min + this_bin * bin_width - left_adj
+                right = col_min + (this_bin + 1) * bin_width + right_adj
 
                 if op.right:
-                    interval_struct = ibis_types.struct(
-                        {
-                            "left_exclusive": left_edge,
-                            "right_inclusive": right_edge,
-                        }
+                    value = ibis_types.struct(
+                        {"left_exclusive": left, "right_inclusive": right}
                     )
                 else:
-                    interval_struct = ibis_types.struct(
-                        {
-                            "left_inclusive": left_edge,
-                            "right_exclusive": right_edge,
-                        }
+                    value = ibis_types.struct(
+                        {"left_inclusive": left, "right_exclusive": right}
                     )
-
-                if this_bin < op.bins - 1:
-                    if op.right:
-                        case_expr = x <= (col_min + (this_bin + 1) * bin_width)
-                    else:
-                        case_expr = x < (col_min + (this_bin + 1) * bin_width)
-                    out = out.when(case_expr, interval_struct)
+            if this_bin == op.bins - 1:
+                case_expr = x.notnull()
+            else:
+                if op.right:
+                    case_expr = x <= (col_min + (this_bin + 1) * bin_width)
                 else:
-                    out = out.when(x.notnull(), interval_struct)
+                    case_expr = x < (col_min + (this_bin + 1) * bin_width)
+            out = out.when(case_expr, value)
     else:  # Interpret as intervals
-        for interval in op.bins:
+        for this_bin, interval in enumerate(op.bins):
             left = compile_ibis_types.literal_to_ibis_scalar(interval[0])
             right = compile_ibis_types.literal_to_ibis_scalar(interval[1])
             if op.right:
                 condition = (x > left) & (x <= right)
-                interval_struct = ibis_types.struct(
-                    {"left_exclusive": left, "right_inclusive": right}
-                )
             else:
                 condition = (x >= left) & (x < right)
-                interval_struct = ibis_types.struct(
-                    {"left_inclusive": left, "right_exclusive": right}
+
+            if op.labels is False:
+                value = compile_ibis_types.literal_to_ibis_scalar(
+                    this_bin,
+                    force_dtype=pd.Int64Dtype(),
                 )
-            out = out.when(condition, interval_struct)
+            elif isinstance(op.labels, typing.Iterable):
+                value = compile_ibis_types.literal_to_ibis_scalar(
+                    list(op.labels)[this_bin],
+                    force_dtype=pd.StringDtype(storage="pyarrow"),
+                )
+            else:
+                if op.right:
+                    value = ibis_types.struct(
+                        {"left_exclusive": left, "right_inclusive": right}
+                    )
+                else:
+                    value = ibis_types.struct(
+                        {"left_inclusive": left, "right_exclusive": right}
+                    )
+
+            out = out.when(condition, value)
     return out.end()
 
 
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 4443c495d7..6202a34ce2 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -26,15 +26,16 @@
 from bigframes_vendored.ibis.expr.operations import window as ibis_expr_window
 import bigframes_vendored.ibis.expr.operations as ibis_ops
 import bigframes_vendored.ibis.expr.types as ibis_types
-import pandas
+from google.cloud import bigquery
+import pyarrow as pa
 
+from bigframes.core import utils
 import bigframes.core.compile.aggregate_compiler as agg_compiler
 import bigframes.core.compile.googlesql
 import bigframes.core.compile.ibis_types
 import bigframes.core.compile.scalar_op_compiler as op_compilers
 import bigframes.core.compile.scalar_op_compiler as scalar_op_compiler
 import bigframes.core.expression as ex
-import bigframes.core.guid
 from bigframes.core.ordering import OrderingExpression
 import bigframes.core.sql
 from bigframes.core.window_spec import RangeWindowBounds, RowsWindowBounds, WindowSpec
@@ -231,7 +232,7 @@ def aggregate(
             col_out: agg_compiler.compile_aggregate(
                 aggregate,
                 bindings,
-                order_by=_convert_ordering_to_table_values(table, order_by),
+                order_by=_convert_row_ordering_to_table_values(table, order_by),
             )
             for aggregate, col_out in aggregations
         }
@@ -279,11 +280,8 @@ def _reproject_to_table(self) -> UnorderedIR:
         )
 
     @classmethod
-    def from_pandas(
-        cls,
-        pd_df: pandas.DataFrame,
-        scan_cols: bigframes.core.nodes.ScanList,
-        offsets: typing.Optional[str] = None,
+    def from_polars(
+        cls, pa_table: pa.Table, schema: Sequence[bigquery.SchemaField]
     ) -> UnorderedIR:
         # TODO: add offsets
         """
@@ -292,37 +290,16 @@ def from_pandas(
         Assumed that the dataframe has unique string column names and bigframes-suppported
         dtypes.
         """
+        import bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes
 
-        # ibis memtable cannot handle NA, must convert to None
-        # this destroys the schema however
-        ibis_values = pd_df.astype("object").where(pandas.notnull(pd_df), None)  # type: ignore
-        if offsets:
-            ibis_values = ibis_values.assign(**{offsets: range(len(pd_df))})
         # derive the ibis schema from the original pandas schema
-        ibis_schema = [
-            (
-                local_label,
-                bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype(dtype),
-            )
-            for id, dtype, local_label in scan_cols.items
-        ]
-        if offsets:
-            ibis_schema.append((offsets, ibis_dtypes.int64))
-
         keys_memtable = bigframes_vendored.ibis.memtable(
-            ibis_values, schema=bigframes_vendored.ibis.schema(ibis_schema)
+            pa_table,
+            schema=third_party_ibis_bqtypes.BigQuerySchema.to_ibis(list(schema)),
         )
-
-        columns = [
-            keys_memtable[local_label].name(col_id.sql)
-            for col_id, _, local_label in scan_cols.items
-        ]
-        if offsets:
-            columns.append(keys_memtable[offsets].name(offsets))
-
         return cls(
             keys_memtable,
-            columns=columns,
+            columns=tuple(keys_memtable[key] for key in keys_memtable.columns),
         )
 
     def join(
@@ -463,7 +440,7 @@ def project_window_op(
                 never_skip_nulls=never_skip_nulls,
             )
 
-        if expression.op.order_independent and not window_spec.row_bounded:
+        if expression.op.order_independent and window_spec.is_unbounded:
             # notably percentile_cont does not support ordering clause
             window_spec = window_spec.without_order()
         window = self._ibis_window_from_spec(window_spec)
@@ -541,16 +518,30 @@ def _ibis_window_from_spec(self, window_spec: WindowSpec):
         # 1. Order-independent op (aggregation, cut, rank) with unbound window - no ordering clause needed
         # 2. Order-independent op (aggregation, cut, rank) with range window - use ordering clause, ties allowed
         # 3. Order-depedenpent op (navigation functions, array_agg) or rows bounds - use total row order to break ties.
-        if window_spec.ordering:
-            order_by = _convert_ordering_to_table_values(
+        if window_spec.is_row_bounded:
+            if not window_spec.ordering:
+                # If window spec has following or preceding bounds, we need to apply an unambiguous ordering.
+                raise ValueError("No ordering provided for ordered analytic function")
+            order_by = _convert_row_ordering_to_table_values(
                 self._column_names,
                 window_spec.ordering,
             )
-        elif window_spec.row_bounded:
-            # If window spec has following or preceding bounds, we need to apply an unambiguous ordering.
-            raise ValueError("No ordering provided for ordered analytic function")
-        else:
+
+        elif window_spec.is_range_bounded:
+            order_by = [
+                _convert_range_ordering_to_table_value(
+                    self._column_names,
+                    window_spec.ordering[0],
+                )
+            ]
+        # The rest if branches are for unbounded windows
+        elif window_spec.ordering:
             # Unbound grouping window. Suitable for aggregations but not for analytic function application.
+            order_by = _convert_row_ordering_to_table_values(
+                self._column_names,
+                window_spec.ordering,
+            )
+        else:
             order_by = None
 
         window = bigframes_vendored.ibis.window(order_by=order_by, group_by=group_by)
@@ -575,7 +566,7 @@ def is_window(column: ibis_types.Value) -> bool:
     return any(isinstance(op, ibis_ops.WindowFunction) for op in matches)
 
 
-def _convert_ordering_to_table_values(
+def _convert_row_ordering_to_table_values(
     value_lookup: typing.Mapping[str, ibis_types.Value],
     ordering_columns: typing.Sequence[OrderingExpression],
 ) -> typing.Sequence[ibis_types.Value]:
@@ -603,6 +594,30 @@ def _convert_ordering_to_table_values(
     return ordering_values
 
 
+def _convert_range_ordering_to_table_value(
+    value_lookup: typing.Mapping[str, ibis_types.Value],
+    ordering_column: OrderingExpression,
+) -> ibis_types.Value:
+    """Converts the ordering for range windows to Ibis references.
+
+    Note that this method is different from `_convert_row_ordering_to_table_values` in
+    that it does not arrange null values. There are two reasons:
+    1. Manipulating null positions requires more than one ordering key, which is forbidden
+       by SQL window syntax for range rolling.
+    2. Pandas does not allow range rolling on timeseries with nulls.
+
+    Therefore, we opt for the simplest approach here: generate the simplest SQL and follow
+    the BigQuery engine behavior.
+    """
+    expr = op_compiler.compile_expression(
+        ordering_column.scalar_expression, value_lookup
+    )
+
+    if ordering_column.direction.is_ascending:
+        return bigframes_vendored.ibis.asc(expr)  # type: ignore
+    return bigframes_vendored.ibis.desc(expr)  # type: ignore
+
+
 def _string_cast_join_cond(
     lvalue: ibis_types.Column, rvalue: ibis_types.Column
 ) -> ibis_types.BooleanColumn:
@@ -692,8 +707,14 @@ def _add_boundary(
 ) -> ibis_expr_builders.LegacyWindowBuilder:
     if isinstance(bounds, RangeWindowBounds):
         return ibis_window.range(
-            start=_to_ibis_boundary(bounds.start),
-            end=_to_ibis_boundary(bounds.end),
+            start=_to_ibis_boundary(
+                None
+                if bounds.start is None
+                else utils.timedelta_to_micros(bounds.start)
+            ),
+            end=_to_ibis_boundary(
+                None if bounds.end is None else utils.timedelta_to_micros(bounds.end)
+            ),
         )
     if isinstance(bounds, RowsWindowBounds):
         if bounds.start is not None or bounds.end is not None:
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index 3d9bf19f76..04d3ea1bf9 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -14,7 +14,6 @@
 from __future__ import annotations
 
 import functools
-import io
 import typing
 
 import bigframes_vendored.ibis.backends.bigquery as ibis_bigquery
@@ -22,16 +21,13 @@
 import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
 import bigframes_vendored.ibis.expr.types as ibis_types
 import google.cloud.bigquery
-import pandas as pd
+import pyarrow as pa
 
 from bigframes import dtypes, operations
-from bigframes.core import utils
 import bigframes.core.compile.compiled as compiled
 import bigframes.core.compile.concat as concat_impl
 import bigframes.core.compile.explode
-import bigframes.core.compile.ibis_types
 import bigframes.core.compile.scalar_op_compiler as compile_scalar
-import bigframes.core.compile.schema_translator
 import bigframes.core.nodes as nodes
 import bigframes.core.ordering as bf_ordering
 import bigframes.core.rewrite as rewrites
@@ -86,6 +82,7 @@ def _replace_unsupported_ops(node: nodes.BigFrameNode):
     # TODO: Run all replacement rules as single bottom-up pass
     node = nodes.bottom_up(node, rewrites.rewrite_slice)
     node = nodes.bottom_up(node, rewrites.rewrite_timedelta_expressions)
+    node = nodes.bottom_up(node, rewrites.rewrite_range_rolling)
     return node
 
 
@@ -161,19 +158,22 @@ def compile_fromrange(
 
 @_compile_node.register
 def compile_readlocal(node: nodes.ReadLocalNode, *args):
-    array_as_pd = pd.read_feather(
-        io.BytesIO(node.feather_bytes),
-        columns=[item.source_id for item in node.scan_list.items],
-    )
-
-    # Convert timedeltas to microseconds for compatibility with BigQuery
-    _ = utils.replace_timedeltas_with_micros(array_as_pd)
-
     offsets = node.offsets_col.sql if node.offsets_col else None
-    return compiled.UnorderedIR.from_pandas(
-        array_as_pd, node.scan_list, offsets=offsets
+    pa_table = node.local_data_source.data
+    bq_schema = node.schema.to_bigquery()
+
+    pa_table = pa_table.select(list(item.source_id for item in node.scan_list.items))
+    pa_table = pa_table.rename_columns(
+        {item.source_id: item.id.sql for item in node.scan_list.items}
     )
 
+    if offsets:
+        pa_table = pa_table.append_column(
+            offsets, pa.array(range(pa_table.num_rows), type=pa.int64())
+        )
+        bq_schema = (*bq_schema, google.cloud.bigquery.SchemaField(offsets, "INT64"))
+    return compiled.UnorderedIR.from_polars(pa_table, bq_schema)
+
 
 @_compile_node.register
 def compile_readtable(node: nodes.ReadTableNode, *args):
diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py
index 54b0a1408a..d5f9b5c5f9 100644
--- a/bigframes/core/compile/ibis_types.py
+++ b/bigframes/core/compile/ibis_types.py
@@ -388,7 +388,8 @@ def literal_to_ibis_scalar(
         # Ibis has bug for casting nulltype to geospatial, so we perform intermediate cast first
         geotype = ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True)
         return bigframes_vendored.ibis.literal(None, geotype)
-    ibis_dtype = BIGFRAMES_TO_IBIS[force_dtype] if force_dtype else None
+
+    ibis_dtype = bigframes_dtype_to_ibis_dtype(force_dtype) if force_dtype else None
 
     if pd.api.types.is_list_like(literal):
         if validate:
diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py
index 6fac3c9b92..baa19eb990 100644
--- a/bigframes/core/compile/polars/compiler.py
+++ b/bigframes/core/compile/polars/compiler.py
@@ -16,7 +16,7 @@
 import dataclasses
 import functools
 import itertools
-from typing import cast, Optional, Sequence, Tuple, TYPE_CHECKING, Union
+from typing import cast, Optional, Sequence, Tuple, TYPE_CHECKING
 
 import bigframes.core
 from bigframes.core import window_spec
@@ -205,11 +205,10 @@ def compile_readlocal(self, node: nodes.ReadLocalNode):
         cols_to_read = {
             scan_item.source_id: scan_item.id.sql for scan_item in node.scan_list.items
         }
-        return (
-            pl.read_ipc(node.feather_bytes, columns=list(cols_to_read.keys()))
-            .lazy()
-            .rename(cols_to_read)
-        )
+        lazy_frame = cast(
+            pl.DataFrame, pl.from_arrow(node.local_data_source.data)
+        ).lazy()
+        return lazy_frame.select(cols_to_read.keys()).rename(cols_to_read)
 
     @compile_node.register
     def compile_filter(self, node: nodes.FilterNode):
@@ -360,6 +359,7 @@ def compile_window(self, node: nodes.WindowOpNode):
             return df.with_columns([agg_expr])
 
         else:  # row-bounded window
+            assert isinstance(window.bounds, window_spec.RowsWindowBounds)
             # Polars API semi-bounded, and any grouped rolling window challenging
             # https://github.com/pola-rs/polars/issues/4799
             # https://github.com/pola-rs/polars/issues/8976
@@ -383,9 +383,7 @@ def compile_window(self, node: nodes.WindowOpNode):
             return pl.concat([df, results], how="horizontal")
 
 
-def _get_period(
-    bounds: Union[window_spec.RowsWindowBounds, window_spec.RangeWindowBounds]
-) -> Optional[int]:
+def _get_period(bounds: window_spec.RowsWindowBounds) -> Optional[int]:
     """Returns None if the boundary is infinite."""
     if bounds.start is None or bounds.end is None:
         return None
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 0296762447..eda70f5cf1 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -747,12 +747,12 @@ def date_diff_op_impl(x: ibis_types.DateValue, y: ibis_types.DateValue):
 
 @scalar_op_compiler.register_binary_op(ops.date_add_op)
 def date_add_op_impl(x: ibis_types.DateValue, y: ibis_types.IntegerValue):
-    return x.cast("timestamp") + y.to_interval("us")  # type: ignore
+    return x.cast(ibis_dtypes.timestamp()) + y.to_interval("us")  # type: ignore
 
 
 @scalar_op_compiler.register_binary_op(ops.date_sub_op)
 def date_sub_op_impl(x: ibis_types.DateValue, y: ibis_types.IntegerValue):
-    return x.cast("timestamp") - y.to_interval("us")  # type: ignore
+    return x.cast(ibis_dtypes.timestamp()) - y.to_interval("us")  # type: ignore
 
 
 @scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True)
diff --git a/bigframes/core/compile/sqlglot/__init__.py b/bigframes/core/compile/sqlglot/__init__.py
new file mode 100644
index 0000000000..0a2669d7a2
--- /dev/null
+++ b/bigframes/core/compile/sqlglot/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/bigframes/core/compile/sqlglot/sqlglot_types.py b/bigframes/core/compile/sqlglot/sqlglot_types.py
new file mode 100644
index 0000000000..06c78c1435
--- /dev/null
+++ b/bigframes/core/compile/sqlglot/sqlglot_types.py
@@ -0,0 +1,84 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import typing
+
+import bigframes_vendored.constants as constants
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import sqlglot as sg
+
+import bigframes.dtypes
+
+
+class SQLGlotType:
+    @classmethod
+    def from_bigframes_dtype(
+        cls,
+        bigframes_dtype: typing.Union[
+            bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype, np.dtype[typing.Any]
+        ],
+    ):
+        if bigframes_dtype == bigframes.dtypes.INT_DTYPE:
+            return "INT64"
+        elif bigframes_dtype == bigframes.dtypes.FLOAT_DTYPE:
+            return "FLOAT64"
+        elif bigframes_dtype == bigframes.dtypes.STRING_DTYPE:
+            return "STRING"
+        elif bigframes_dtype == bigframes.dtypes.BOOL_DTYPE:
+            return "BOOLEAN"
+        elif bigframes_dtype == bigframes.dtypes.DATE_DTYPE:
+            return "DATE"
+        elif bigframes_dtype == bigframes.dtypes.TIME_DTYPE:
+            return "TIME"
+        elif bigframes_dtype == bigframes.dtypes.DATETIME_DTYPE:
+            return "DATETIME"
+        elif bigframes_dtype == bigframes.dtypes.TIMESTAMP_DTYPE:
+            return "TIMESTAMP"
+        elif bigframes_dtype == bigframes.dtypes.BYTES_DTYPE:
+            return "BYTES"
+        elif bigframes_dtype == bigframes.dtypes.NUMERIC_DTYPE:
+            return "NUMERIC"
+        elif bigframes_dtype == bigframes.dtypes.BIGNUMERIC_DTYPE:
+            return "BIGNUMERIC"
+        elif bigframes_dtype == bigframes.dtypes.JSON_DTYPE:
+            return "JSON"
+        elif bigframes_dtype == bigframes.dtypes.GEO_DTYPE:
+            return "GEOGRAPHY"
+        elif isinstance(bigframes_dtype, pd.ArrowDtype):
+            if pa.types.is_list(bigframes_dtype.pyarrow_dtype):
+                inner_bigframes_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(
+                    bigframes_dtype.pyarrow_dtype.value_type
+                )
+                return (
+                    f"ARRAY<{SQLGlotType.from_bigframes_dtype(inner_bigframes_dtype)}>"
+                )
+            elif pa.types.is_struct(bigframes_dtype.pyarrow_dtype):
+                struct_type = typing.cast(pa.StructType, bigframes_dtype.pyarrow_dtype)
+                inner_fields: list[str] = []
+                for i in range(struct_type.num_fields):
+                    field = struct_type.field(i)
+                    key = sg.to_identifier(field.name).sql("bigquery")
+                    dtype = SQLGlotType.from_bigframes_dtype(
+                        bigframes.dtypes.arrow_dtype_to_bigframes_dtype(field.type)
+                    )
+                    inner_fields.append(f"{key} {dtype}")
+                return "STRUCT<{}>".format(", ".join(inner_fields))
+
+        raise ValueError(
+            f"Unsupported type for {bigframes_dtype}. {constants.FEEDBACK_LINK}"
+        )
diff --git a/bigframes/core/groupby/dataframe_group_by.py b/bigframes/core/groupby/dataframe_group_by.py
index b97a5f4c48..f234bad126 100644
--- a/bigframes/core/groupby/dataframe_group_by.py
+++ b/bigframes/core/groupby/dataframe_group_by.py
@@ -14,12 +14,13 @@
 
 from __future__ import annotations
 
+import datetime
 import typing
 from typing import Literal, Sequence, Tuple, Union
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
-import jellyfish
+import numpy
 import pandas as pd
 
 from bigframes import session
@@ -31,6 +32,7 @@
 import bigframes.core.ordering as order
 import bigframes.core.utils as utils
 import bigframes.core.validations as validations
+from bigframes.core.window import rolling
 import bigframes.core.window as windows
 import bigframes.core.window_spec as window_specs
 import bigframes.dataframe as df
@@ -87,6 +89,8 @@ def __getitem__(
             typing.Sequence[blocks.Label],
         ],
     ):
+        import bigframes._tools.strings
+
         if utils.is_list_like(key):
             keys = list(key)
         else:
@@ -101,7 +105,7 @@ def __getitem__(
                 possible_key.append(
                     min(
                         self._block.column_labels,
-                        key=lambda item: jellyfish.damerau_levenshtein_distance(
+                        key=lambda item: bigframes._tools.strings.levenshtein_distance(
                             bad_key, item
                         ),
                     )
@@ -308,20 +312,41 @@ def diff(self, periods=1) -> series.Series:
     @validations.requires_ordering()
     def rolling(
         self,
-        window: int,
+        window: int | pd.Timedelta | numpy.timedelta64 | datetime.timedelta | str,
         min_periods=None,
+        on: str | None = None,
         closed: Literal["right", "left", "both", "neither"] = "right",
     ) -> windows.Window:
-        window_spec = window_specs.WindowSpec(
-            bounds=window_specs.RowsWindowBounds.from_window_size(window, closed),
-            min_periods=min_periods if min_periods is not None else window,
-            grouping_keys=tuple(ex.deref(col) for col in self._by_col_ids),
-        )
-        block = self._block.order_by(
-            [order.ascending_over(col) for col in self._by_col_ids],
-        )
-        return windows.Window(
-            block, window_spec, self._selected_cols, drop_null_groups=self._dropna
+        if isinstance(window, int):
+            window_spec = window_specs.WindowSpec(
+                bounds=window_specs.RowsWindowBounds.from_window_size(window, closed),
+                min_periods=min_periods if min_periods is not None else window,
+                grouping_keys=tuple(ex.deref(col) for col in self._by_col_ids),
+            )
+            block = self._block.order_by(
+                [order.ascending_over(col) for col in self._by_col_ids],
+            )
+            skip_agg_col_id = (
+                None if on is None else self._block.resolve_label_exact_or_error(on)
+            )
+            return windows.Window(
+                block,
+                window_spec,
+                self._selected_cols,
+                drop_null_groups=self._dropna,
+                skip_agg_column_id=skip_agg_col_id,
+            )
+
+        return rolling.create_range_window(
+            self._block,
+            window,
+            min_periods=min_periods,
+            value_column_ids=self._selected_cols,
+            on=on,
+            closed=closed,
+            is_series=False,
+            grouping_keys=self._by_col_ids,
+            drop_null_groups=self._dropna,
         )
 
     @validations.requires_ordering()
@@ -511,7 +536,7 @@ def _aggregate_all(
 
     def _apply_window_op(
         self,
-        op: agg_ops.WindowOp,
+        op: agg_ops.UnaryWindowOp,
         window: typing.Optional[window_specs.WindowSpec] = None,
         numeric_only: bool = False,
     ):
diff --git a/bigframes/core/groupby/series_group_by.py b/bigframes/core/groupby/series_group_by.py
index 761a02bd34..a29bb45a32 100644
--- a/bigframes/core/groupby/series_group_by.py
+++ b/bigframes/core/groupby/series_group_by.py
@@ -14,11 +14,14 @@
 
 from __future__ import annotations
 
+import datetime
 import typing
 from typing import Literal, Sequence, Union
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
+import numpy
+import pandas
 
 from bigframes import session
 from bigframes.core import expression as ex
@@ -29,6 +32,7 @@
 import bigframes.core.ordering as order
 import bigframes.core.utils as utils
 import bigframes.core.validations as validations
+from bigframes.core.window import rolling
 import bigframes.core.window as windows
 import bigframes.core.window_spec as window_specs
 import bigframes.dataframe as df
@@ -246,24 +250,36 @@ def diff(self, periods=1) -> series.Series:
     @validations.requires_ordering()
     def rolling(
         self,
-        window: int,
+        window: int | pandas.Timedelta | numpy.timedelta64 | datetime.timedelta | str,
         min_periods=None,
         closed: Literal["right", "left", "both", "neither"] = "right",
     ) -> windows.Window:
-        window_spec = window_specs.WindowSpec(
-            bounds=window_specs.RowsWindowBounds.from_window_size(window, closed),
-            min_periods=min_periods if min_periods is not None else window,
-            grouping_keys=tuple(ex.deref(col) for col in self._by_col_ids),
-        )
-        block = self._block.order_by(
-            [order.ascending_over(col) for col in self._by_col_ids],
-        )
-        return windows.Window(
-            block,
-            window_spec,
-            [self._value_column],
-            drop_null_groups=self._dropna,
+        if isinstance(window, int):
+            window_spec = window_specs.WindowSpec(
+                bounds=window_specs.RowsWindowBounds.from_window_size(window, closed),
+                min_periods=min_periods if min_periods is not None else window,
+                grouping_keys=tuple(ex.deref(col) for col in self._by_col_ids),
+            )
+            block = self._block.order_by(
+                [order.ascending_over(col) for col in self._by_col_ids],
+            )
+            return windows.Window(
+                block,
+                window_spec,
+                [self._value_column],
+                drop_null_groups=self._dropna,
+                is_series=True,
+            )
+
+        return rolling.create_range_window(
+            self._block,
+            window,
+            min_periods=min_periods,
+            value_column_ids=[self._value_column],
+            closed=closed,
             is_series=True,
+            grouping_keys=self._by_col_ids,
+            drop_null_groups=self._dropna,
         )
 
     @validations.requires_ordering()
@@ -294,7 +310,7 @@ def _aggregate(self, aggregate_op: agg_ops.UnaryAggregateOp) -> series.Series:
 
     def _apply_window_op(
         self,
-        op: agg_ops.WindowOp,
+        op: agg_ops.UnaryWindowOp,
         discard_name=False,
         window: typing.Optional[window_specs.WindowSpec] = None,
         never_skip_nulls: bool = False,
diff --git a/bigframes/core/local_data.py b/bigframes/core/local_data.py
index d891e385d5..70b1741af7 100644
--- a/bigframes/core/local_data.py
+++ b/bigframes/core/local_data.py
@@ -16,42 +16,281 @@
 
 from __future__ import annotations
 
+import dataclasses
+import functools
+import io
+import itertools
+import json
+from typing import Any, Callable, cast, Generator, Iterable, Literal, Optional, Union
+import uuid
+
+import geopandas  # type: ignore
+import numpy as np
+import pandas
 import pyarrow as pa
+import pyarrow.parquet  # type: ignore
 
 import bigframes.core.schema as schemata
 import bigframes.dtypes
 
 
-def arrow_schema_to_bigframes(arrow_schema: pa.Schema) -> schemata.ArraySchema:
-    """Infer the corresponding bigframes schema given a pyarrow schema."""
-    schema_items = tuple(
-        schemata.SchemaItem(
-            field.name,
-            bigframes_type_for_arrow_type(field.type),
+@dataclasses.dataclass(frozen=True)
+class LocalTableMetadata:
+    total_bytes: int
+    row_count: int
+
+    @classmethod
+    def from_arrow(cls, table: pa.Table) -> LocalTableMetadata:
+        return cls(total_bytes=table.nbytes, row_count=table.num_rows)
+
+
+_MANAGED_STORAGE_TYPES_OVERRIDES: dict[bigframes.dtypes.Dtype, pa.DataType] = {
+    # wkt to be precise
+    bigframes.dtypes.GEO_DTYPE: pa.string(),
+    # Just json as string
+    bigframes.dtypes.JSON_DTYPE: pa.string(),
+}
+
+
+@dataclasses.dataclass(frozen=True)
+class ManagedArrowTable:
+    data: pa.Table = dataclasses.field(hash=False)
+    schema: schemata.ArraySchema = dataclasses.field(hash=False)
+    id: uuid.UUID = dataclasses.field(default_factory=uuid.uuid4)
+
+    def __post_init__(self):
+        self.validate()
+
+    @functools.cached_property
+    def metadata(self) -> LocalTableMetadata:
+        return LocalTableMetadata.from_arrow(self.data)
+
+    @classmethod
+    def from_pandas(cls, dataframe: pandas.DataFrame) -> ManagedArrowTable:
+        """Creates managed table from pandas. Ignores index, col names must be unique strings"""
+        columns: list[pa.ChunkedArray] = []
+        fields: list[schemata.SchemaItem] = []
+        column_names = list(dataframe.columns)
+        assert len(column_names) == len(set(column_names))
+
+        for name, col in dataframe.items():
+            new_arr, bf_type = _adapt_pandas_series(col)
+            columns.append(new_arr)
+            fields.append(schemata.SchemaItem(str(name), bf_type))
+
+        return ManagedArrowTable(
+            pa.table(columns, names=column_names), schemata.ArraySchema(tuple(fields))
         )
-        for field in arrow_schema
-    )
-    return schemata.ArraySchema(schema_items)
 
+    @classmethod
+    def from_pyarrow(self, table: pa.Table) -> ManagedArrowTable:
+        columns: list[pa.ChunkedArray] = []
+        fields: list[schemata.SchemaItem] = []
+        for name, arr in zip(table.column_names, table.columns):
+            new_arr, bf_type = _adapt_arrow_array(arr)
+            columns.append(new_arr)
+            fields.append(schemata.SchemaItem(name, bf_type))
 
-def adapt_pa_table(arrow_table: pa.Table) -> pa.Table:
-    """Adapt a pyarrow table to one that can be handled by bigframes. Converts tz to UTC and unit to us for temporal types."""
-    new_schema = pa.schema(
-        [
-            pa.field(field.name, arrow_type_replacements(field.type))
-            for field in arrow_table.schema
-        ]
-    )
-    return arrow_table.cast(new_schema)
+        return ManagedArrowTable(
+            pa.table(columns, names=table.column_names),
+            schemata.ArraySchema(tuple(fields)),
+        )
+
+    def to_parquet(
+        self,
+        dst: Union[str, io.IOBase],
+        *,
+        offsets_col: Optional[str] = None,
+        geo_format: Literal["wkb", "wkt"] = "wkt",
+        duration_type: Literal["int", "duration"] = "duration",
+        json_type: Literal["string"] = "string",
+    ):
+        pa_table = self.data
+        if offsets_col is not None:
+            pa_table = pa_table.append_column(
+                offsets_col, pa.array(range(pa_table.num_rows), type=pa.int64())
+            )
+        if geo_format != "wkt":
+            raise NotImplementedError(f"geo format {geo_format} not yet implemented")
+        if duration_type != "duration":
+            raise NotImplementedError(
+                f"duration as {duration_type} not yet implemented"
+            )
+        assert json_type == "string"
+        pyarrow.parquet.write_table(pa_table, where=dst)
+
+    def itertuples(
+        self,
+        *,
+        geo_format: Literal["wkb", "wkt"] = "wkt",
+        duration_type: Literal["int", "timedelta"] = "timedelta",
+        json_type: Literal["string", "object"] = "string",
+    ) -> Iterable[tuple]:
+        """
+        Yield each row as an unlabeled tuple.
+
+        Row-wise iteration of columnar data is slow, avoid if possible.
+        """
+        for row_dict in _iter_table(
+            self.data,
+            self.schema,
+            geo_format=geo_format,
+            duration_type=duration_type,
+            json_type=json_type,
+        ):
+            yield tuple(row_dict.values())
+
+    def validate(self):
+        # TODO: Content-based validation for some datatypes (eg json, wkt, list) where logical domain is smaller than pyarrow type
+        for bf_field, arrow_field in zip(self.schema.items, self.data.schema):
+            expected_arrow_type = _get_managed_storage_type(bf_field.dtype)
+            arrow_type = arrow_field.type
+            if expected_arrow_type != arrow_type:
+                raise TypeError(
+                    f"Field {bf_field} has arrow array type: {arrow_type}, expected type: {expected_arrow_type}"
+                )
+
+
+# Sequential iterator, but could split into batches and leverage parallelism for speed
+def _iter_table(
+    table: pa.Table,
+    schema: schemata.ArraySchema,
+    *,
+    geo_format: Literal["wkb", "wkt"] = "wkt",
+    duration_type: Literal["int", "timedelta"] = "timedelta",
+    json_type: Literal["string", "object"] = "string",
+) -> Generator[dict[str, Any], None, None]:
+    """For when you feel like iterating row-wise over a column store. Don't expect speed."""
+
+    if geo_format != "wkt":
+        raise NotImplementedError(f"geo format {geo_format} not yet implemented")
 
+    @functools.singledispatch
+    def iter_array(
+        array: pa.Array, dtype: bigframes.dtypes.Dtype
+    ) -> Generator[Any, None, None]:
+        values = array.to_pylist()
+        if dtype == bigframes.dtypes.JSON_DTYPE:
+            if json_type == "object":
+                yield from map(lambda x: json.loads(x) if x is not None else x, values)
+            else:
+                yield from values
+        elif dtype == bigframes.dtypes.TIMEDELTA_DTYPE:
+            if duration_type == "int":
+                yield from map(
+                    lambda x: ((x.days * 3600 * 24) + x.seconds) * 1_000_000
+                    + x.microseconds
+                    if x is not None
+                    else x,
+                    values,
+                )
+            else:
+                yield from values
+        else:
+            yield from values
 
-def bigframes_type_for_arrow_type(pa_type: pa.DataType) -> bigframes.dtypes.Dtype:
-    return bigframes.dtypes.arrow_dtype_to_bigframes_dtype(
-        arrow_type_replacements(pa_type)
+    @iter_array.register
+    def _(
+        array: pa.ListArray, dtype: bigframes.dtypes.Dtype
+    ) -> Generator[Any, None, None]:
+        value_generator = iter_array(
+            array.flatten(), bigframes.dtypes.get_array_inner_type(dtype)
+        )
+        for (start, end) in itertools.pairwise(array.offsets):
+            arr_size = end.as_py() - start.as_py()
+            yield list(itertools.islice(value_generator, arr_size))
+
+    @iter_array.register
+    def _(
+        array: pa.StructArray, dtype: bigframes.dtypes.Dtype
+    ) -> Generator[Any, None, None]:
+        # yield from each subarray
+        sub_generators: dict[str, Generator[Any, None, None]] = {}
+        for field_name, dtype in bigframes.dtypes.get_struct_fields(dtype).items():
+            sub_generators[field_name] = iter_array(array.field(field_name), dtype)
+
+        keys = list(sub_generators.keys())
+        for row_values in zip(*sub_generators.values()):
+            yield {key: value for key, value in zip(keys, row_values)}
+
+    for batch in table.to_batches():
+        sub_generators: dict[str, Generator[Any, None, None]] = {}
+        for field in schema.items:
+            sub_generators[field.column] = iter_array(
+                batch.column(field.column), field.dtype
+            )
+
+        keys = list(sub_generators.keys())
+        for row_values in zip(*sub_generators.values()):
+            yield {key: value for key, value in zip(keys, row_values)}
+
+
+def _adapt_pandas_series(
+    series: pandas.Series,
+) -> tuple[Union[pa.ChunkedArray, pa.Array], bigframes.dtypes.Dtype]:
+    # Mostly rely on pyarrow conversions, but have to convert geo without its help.
+    if series.dtype == bigframes.dtypes.GEO_DTYPE:
+        series = geopandas.GeoSeries(series).to_wkt(rounding_precision=-1)
+        return pa.array(series, type=pa.string()), bigframes.dtypes.GEO_DTYPE
+    try:
+        return _adapt_arrow_array(pa.array(series))
+    except pa.ArrowInvalid as e:
+        if series.dtype == np.dtype("O"):
+            try:
+                return _adapt_pandas_series(series.astype(bigframes.dtypes.GEO_DTYPE))
+            except TypeError:
+                # Prefer original error
+                pass
+        raise e
+
+
+def _adapt_arrow_array(
+    array: Union[pa.ChunkedArray, pa.Array]
+) -> tuple[Union[pa.ChunkedArray, pa.Array], bigframes.dtypes.Dtype]:
+    target_type = _logical_type_replacements(array.type)
+    if target_type != array.type:
+        # TODO: Maybe warn if lossy conversion?
+        array = array.cast(target_type)
+    bf_type = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(target_type)
+
+    storage_type = _get_managed_storage_type(bf_type)
+    if storage_type != array.type:
+        array = array.cast(storage_type)
+    return array, bf_type
+
+
+def _get_managed_storage_type(dtype: bigframes.dtypes.Dtype) -> pa.DataType:
+    if dtype in _MANAGED_STORAGE_TYPES_OVERRIDES.keys():
+        return _MANAGED_STORAGE_TYPES_OVERRIDES[dtype]
+    return _physical_type_replacements(
+        bigframes.dtypes.bigframes_dtype_to_arrow_dtype(dtype)
     )
 
 
-def arrow_type_replacements(type: pa.DataType) -> pa.DataType:
+def _recursive_map_types(
+    f: Callable[[pa.DataType], pa.DataType]
+) -> Callable[[pa.DataType], pa.DataType]:
+    @functools.wraps(f)
+    def recursive_f(type: pa.DataType) -> pa.DataType:
+        if pa.types.is_list(type):
+            new_field_t = recursive_f(type.value_type)
+            if new_field_t != type.value_type:
+                return pa.list_(new_field_t)
+            return type
+        if pa.types.is_struct(type):
+            struct_type = cast(pa.StructType, type)
+            new_fields: list[pa.Field] = []
+            for i in range(struct_type.num_fields):
+                field = struct_type.field(i)
+                new_fields.append(field.with_type(recursive_f(field.type)))
+            return pa.struct(new_fields)
+        return f(type)
+
+    return recursive_f
+
+
+@_recursive_map_types
+def _logical_type_replacements(type: pa.DataType) -> pa.DataType:
     if pa.types.is_timestamp(type):
         # This is potentially lossy, but BigFrames doesn't support ns
         new_tz = "UTC" if (type.tz is not None) else None
@@ -66,10 +305,27 @@ def arrow_type_replacements(type: pa.DataType) -> pa.DataType:
         return pa.decimal128(38, 9)
     if pa.types.is_decimal256(type):
         return pa.decimal256(76, 38)
-    if pa.types.is_dictionary(type):
-        return arrow_type_replacements(type.value_type)
     if pa.types.is_large_string(type):
         # simple string type can handle the largest strings needed
         return pa.string()
+    if pa.types.is_dictionary(type):
+        return _logical_type_replacements(type.value_type)
+    if pa.types.is_null(type):
+        # null as a type not allowed, default type is float64 for bigframes
+        return pa.float64()
     else:
         return type
+
+
+_ARROW_MANAGED_STORAGE_OVERRIDES = {
+    bigframes.dtypes._BIGFRAMES_TO_ARROW[bf_dtype]: arrow_type
+    for bf_dtype, arrow_type in _MANAGED_STORAGE_TYPES_OVERRIDES.items()
+    if bf_dtype in bigframes.dtypes._BIGFRAMES_TO_ARROW
+}
+
+
+@_recursive_map_types
+def _physical_type_replacements(dtype: pa.DataType) -> pa.DataType:
+    if dtype in _ARROW_MANAGED_STORAGE_OVERRIDES:
+        return _ARROW_MANAGED_STORAGE_OVERRIDES[dtype]
+    return dtype
diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py
index 714a522183..8be46f531c 100644
--- a/bigframes/core/log_adapter.py
+++ b/bigframes/core/log_adapter.py
@@ -110,25 +110,42 @@ def submit_pandas_labels(
     bq_client.query(query, job_config=job_config)
 
 
-def class_logger(decorated_cls):
+def class_logger(decorated_cls=None, /, *, include_internal_calls=False):
     """Decorator that adds logging functionality to each method of the class."""
-    for attr_name, attr_value in decorated_cls.__dict__.items():
-        if callable(attr_value) and (attr_name not in _excluded_methods):
-            if isinstance(attr_value, staticmethod):
-                # TODO(b/390244171) support for staticmethod
-                pass
-            else:
+
+    def wrap(cls):
+        for attr_name, attr_value in cls.__dict__.items():
+            if callable(attr_value) and (attr_name not in _excluded_methods):
+                if isinstance(attr_value, staticmethod):
+                    # TODO(b/390244171) support for staticmethod
+                    pass
+                else:
+                    setattr(
+                        cls,
+                        attr_name,
+                        method_logger(
+                            attr_value,
+                            cls,
+                            include_internal_calls,
+                        ),
+                    )
+            elif isinstance(attr_value, property):
                 setattr(
-                    decorated_cls, attr_name, method_logger(attr_value, decorated_cls)
+                    cls,
+                    attr_name,
+                    property_logger(attr_value, cls, include_internal_calls),
                 )
-        elif isinstance(attr_value, property):
-            setattr(
-                decorated_cls, attr_name, property_logger(attr_value, decorated_cls)
-            )
-    return decorated_cls
+        return cls
+
+    if decorated_cls is None:
+        # The logger is used with parentheses
+        return wrap
+
+    # The logger is used without parentheses
+    return wrap(decorated_cls)
 
 
-def method_logger(method, decorated_cls):
+def method_logger(method, decorated_cls, include_internal_calls: bool):
     """Decorator that adds logging functionality to a method."""
 
     @functools.wraps(method)
@@ -138,7 +155,7 @@ def wrapper(self, *args, **kwargs):
         full_method_name = f"{class_name.lower()}-{api_method_name}"
 
         # Track directly called methods
-        if len(_call_stack) == 0:
+        if len(_call_stack) == 0 or include_internal_calls:
             add_api_method(full_method_name)
 
         _call_stack.append(full_method_name)
@@ -167,7 +184,7 @@ def wrapper(self, *args, **kwargs):
     return wrapper
 
 
-def property_logger(prop, decorated_cls):
+def property_logger(prop, decorated_cls, include_internal_calls: bool):
     """Decorator that adds logging functionality to a property."""
 
     def shared_wrapper(f):
@@ -177,7 +194,7 @@ def wrapped(*args, **kwargs):
             property_name = f.__name__
             full_property_name = f"{class_name.lower()}-{property_name.lower()}"
 
-            if len(_call_stack) == 0:
+            if len(_call_stack) == 0 or include_internal_calls:
                 add_api_method(full_property_name)
 
             _call_stack.append(full_property_name)
diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py
index fbc43e033a..99c8f09bc0 100644
--- a/bigframes/core/nodes.py
+++ b/bigframes/core/nodes.py
@@ -20,16 +20,23 @@
 import functools
 import itertools
 import typing
-from typing import Callable, cast, Iterable, Mapping, Optional, Sequence, Tuple
+from typing import (
+    AbstractSet,
+    Callable,
+    cast,
+    Iterable,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+)
 
 import google.cloud.bigquery as bq
 
-from bigframes.core import identifiers
+from bigframes.core import identifiers, local_data
 from bigframes.core.bigframe_node import BigFrameNode, COLUMN_SET, Field
 import bigframes.core.expression as ex
-import bigframes.core.guid
 from bigframes.core.ordering import OrderingExpression
-import bigframes.core.schema as schemata
 import bigframes.core.slices as slices
 import bigframes.core.window_spec as window
 import bigframes.dtypes
@@ -574,16 +581,44 @@ def with_id(self, id: identifiers.ColumnId) -> ScanItem:
 
 @dataclasses.dataclass(frozen=True)
 class ScanList:
+    """
+    Defines the set of columns to scan from a source, along with the variable to bind the columns to.
+    """
+
     items: typing.Tuple[ScanItem, ...]
 
+    def filter_cols(
+        self,
+        ids: AbstractSet[identifiers.ColumnId],
+    ) -> ScanList:
+        """Drop columns from the scan that except those in the 'ids' arg."""
+        result = ScanList(tuple(item for item in self.items if item.id in ids))
+        if len(result.items) == 0:
+            # We need to select something, or sql syntax breaks
+            result = ScanList(self.items[:1])
+        return result
+
+    def project(
+        self,
+        selections: Mapping[identifiers.ColumnId, identifiers.ColumnId],
+    ) -> ScanList:
+        """Project given ids from the scanlist, dropping previous bindings."""
+        by_id = {item.id: item for item in self.items}
+        result = ScanList(
+            tuple(
+                by_id[old_id].with_id(new_id) for old_id, new_id in selections.items()
+            )
+        )
+        if len(result.items) == 0:
+            # We need to select something, or sql syntax breaks
+            result = ScanList((self.items[:1]))
+        return result
+
 
 @dataclasses.dataclass(frozen=True, eq=False)
 class ReadLocalNode(LeafNode):
-    # TODO: Combine feather_bytes, data_schema, n_rows into a LocalDataDef struct
     # TODO: Track nullability for local data
-    feather_bytes: bytes
-    data_schema: schemata.ArraySchema
-    n_rows: int
+    local_data_source: local_data.ManagedArrowTable
     # Mapping of local ids to bfet id.
     scan_list: ScanList
     # Offsets are generated only if this is non-null
@@ -623,7 +658,7 @@ def explicitly_ordered(self) -> bool:
 
     @property
     def row_count(self) -> typing.Optional[int]:
-        return self.n_rows
+        return self.local_data_source.metadata.row_count
 
     @property
     def node_defined_ids(self) -> Tuple[identifiers.ColumnId, ...]:
@@ -659,7 +694,6 @@ class GbqTable:
     dataset_id: str = dataclasses.field()
     table_id: str = dataclasses.field()
     physical_schema: Tuple[bq.SchemaField, ...] = dataclasses.field()
-    n_rows: int = dataclasses.field()
     is_physically_stored: bool = dataclasses.field()
     cluster_cols: typing.Optional[Tuple[str, ...]]
 
@@ -675,13 +709,17 @@ def from_table(table: bq.Table, columns: Sequence[str] = ()) -> GbqTable:
             dataset_id=table.dataset_id,
             table_id=table.table_id,
             physical_schema=schema,
-            n_rows=table.num_rows,
             is_physically_stored=(table.table_type in ["TABLE", "MATERIALIZED_VIEW"]),
             cluster_cols=None
             if table.clustering_fields is None
             else tuple(table.clustering_fields),
         )
 
+    def get_table_ref(self) -> bq.TableReference:
+        return bq.TableReference(
+            bq.DatasetReference(self.project_id, self.dataset_id), self.table_id
+        )
+
     @property
     @functools.cache
     def schema_by_id(self):
@@ -701,6 +739,7 @@ class BigqueryDataSource:
     # Added for backwards compatibility, not validated
     sql_predicate: typing.Optional[str] = None
     ordering: typing.Optional[orderings.RowOrdering] = None
+    n_rows: Optional[int] = None
 
 
 ## Put ordering in here or just add order_by node above?
@@ -778,7 +817,7 @@ def variables_introduced(self) -> int:
     @property
     def row_count(self) -> typing.Optional[int]:
         if self.source.sql_predicate is None and self.source.table.is_physically_stored:
-            return self.source.table.n_rows
+            return self.source.n_rows
         return None
 
     @property
@@ -1074,6 +1113,11 @@ def variables_introduced(self) -> int:
         # This operation only renames variables, doesn't actually create new ones
         return 0
 
+    @property
+    def has_multi_referenced_ids(self) -> bool:
+        referenced = tuple(ref.ref.id for ref in self.input_output_pairs)
+        return len(referenced) != len(set(referenced))
+
     # TODO: Reuse parent namespace
     # Currently, Selection node allows renaming an reusing existing names, so it must establish a
     # new namespace.
@@ -1358,7 +1402,7 @@ def _validate(self):
         """Validate the local data in the node."""
         # Since inner order and row bounds are coupled, rank ops can't be row bounded
         assert (
-            not self.window_spec.row_bounded
+            not self.window_spec.is_row_bounded
         ) or self.expression.op.implicitly_inherits_order
         assert all(ref in self.child.ids for ref in self.expression.column_references)
 
@@ -1420,7 +1464,9 @@ def inherits_order(self) -> bool:
         op_inherits_order = (
             not self.expression.op.order_independent
         ) and self.expression.op.implicitly_inherits_order
-        return op_inherits_order or self.window_spec.row_bounded
+        # range-bounded windows do not inherit orders because their ordering are
+        # already defined before rewrite time.
+        return op_inherits_order or self.window_spec.is_row_bounded
 
     @property
     def additive_base(self) -> BigFrameNode:
diff --git a/bigframes/core/reshape/tile.py b/bigframes/core/reshape/tile.py
index d9a5a87145..86ccf52408 100644
--- a/bigframes/core/reshape/tile.py
+++ b/bigframes/core/reshape/tile.py
@@ -20,6 +20,7 @@
 import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile
 import pandas as pd
 
+import bigframes.constants
 import bigframes.core.expression as ex
 import bigframes.core.ordering as order
 import bigframes.core.utils as utils
@@ -41,15 +42,37 @@ def cut(
     right: typing.Optional[bool] = True,
     labels: typing.Union[typing.Iterable[str], bool, None] = None,
 ) -> bigframes.series.Series:
-    if labels is not None and labels is not False:
+    if (
+        labels is not None
+        and labels is not False
+        and not isinstance(labels, typing.Iterable)
+    ):
+        raise ValueError(
+            "Bin labels must either be False, None or passed in as a list-like argument"
+        )
+    if (
+        isinstance(labels, typing.Iterable)
+        and len(list(labels)) > 0
+        and not isinstance(list(labels)[0], str)
+    ):
         raise NotImplementedError(
-            "The 'labels' parameter must be either False or None. "
-            "Please provide a valid value for 'labels'."
+            "When using an iterable for labels, only iterables of strings are supported "
+            f"but found {type(list(labels)[0])}. {constants.FEEDBACK_LINK}"
         )
 
+    if x.size == 0:
+        raise ValueError("Cannot cut empty array.")
+
     if isinstance(bins, int):
         if bins <= 0:
             raise ValueError("`bins` should be a positive integer.")
+        if isinstance(labels, typing.Iterable):
+            labels = tuple(labels)
+            if len(labels) != bins:
+                raise ValueError(
+                    f"Bin labels({len(labels)}) must be same as the value of bins({bins})"
+                )
+
         op = agg_ops.CutOp(bins, right=right, labels=labels)
         return x._apply_window_op(op, window_spec=window_specs.unbound())
     elif isinstance(bins, typing.Iterable):
@@ -58,6 +81,7 @@ def cut(
             bins = tuple((bin.left.item(), bin.right.item()) for bin in bins)
             # To maintain consistency with pandas' behavior
             right = True
+            labels = None
         elif len(list(bins)) == 0:
             as_index = pd.IntervalIndex.from_tuples(list(bins))
             bins = tuple()
@@ -66,6 +90,7 @@ def cut(
             bins = tuple(bins)
             # To maintain consistency with pandas' behavior
             right = True
+            labels = None
         elif pd.api.types.is_number(list(bins)[0]):
             bins_list = list(bins)
             as_index = pd.IntervalIndex.from_breaks(bins_list)
@@ -81,11 +106,24 @@ def cut(
             raise ValueError("`bins` iterable should contain tuples or numerics.")
 
         if as_index.is_overlapping:
-            raise ValueError("Overlapping IntervalIndex is not accepted.")
-        elif len(as_index) == 0:
-            op = agg_ops.CutOp(bins, right=right, labels=labels)
+            raise ValueError("Overlapping IntervalIndex is not accepted.")  # TODO: test
+
+        if isinstance(labels, typing.Iterable):
+            labels = tuple(labels)
+            if len(labels) != len(as_index):
+                raise ValueError(
+                    f"Bin labels({len(labels)}) must be same as the number of bin edges"
+                    f"({len(as_index)})"
+                )
+
+        if len(as_index) == 0:
+            dtype = agg_ops.CutOp(bins, right=right, labels=labels).output_type()
             return bigframes.series.Series(
-                [pd.NA] * len(x), dtype=op.output_type(), name=x.name
+                [pd.NA] * len(x),
+                dtype=dtype,
+                name=x.name,
+                index=x.index,
+                session=x._session,
             )
         else:
             op = agg_ops.CutOp(bins, right=right, labels=labels)
diff --git a/bigframes/core/rewrite/__init__.py b/bigframes/core/rewrite/__init__.py
index e5f7578911..128cefe94c 100644
--- a/bigframes/core/rewrite/__init__.py
+++ b/bigframes/core/rewrite/__init__.py
@@ -17,8 +17,10 @@
 from bigframes.core.rewrite.legacy_align import legacy_join_as_projection
 from bigframes.core.rewrite.order import pull_up_order
 from bigframes.core.rewrite.pruning import column_pruning
+from bigframes.core.rewrite.scan_reduction import try_reduce_to_table_scan
 from bigframes.core.rewrite.slices import pullup_limit_from_slice, rewrite_slice
 from bigframes.core.rewrite.timedeltas import rewrite_timedelta_expressions
+from bigframes.core.rewrite.windows import rewrite_range_rolling
 
 __all__ = [
     "legacy_join_as_projection",
@@ -29,4 +31,6 @@
     "remap_variables",
     "pull_up_order",
     "column_pruning",
+    "rewrite_range_rolling",
+    "try_reduce_to_table_scan",
 ]
diff --git a/bigframes/core/rewrite/pruning.py b/bigframes/core/rewrite/pruning.py
index 5a94f2aa40..5f4990094c 100644
--- a/bigframes/core/rewrite/pruning.py
+++ b/bigframes/core/rewrite/pruning.py
@@ -170,7 +170,7 @@ def prune_readlocal(
     node: bigframes.core.nodes.ReadLocalNode,
     selection: AbstractSet[identifiers.ColumnId],
 ) -> bigframes.core.nodes.ReadLocalNode:
-    new_scan_list = filter_scanlist(node.scan_list, selection)
+    new_scan_list = node.scan_list.filter_cols(selection)
     return dataclasses.replace(
         node,
         scan_list=new_scan_list,
@@ -183,18 +183,5 @@ def prune_readtable(
     node: bigframes.core.nodes.ReadTableNode,
     selection: AbstractSet[identifiers.ColumnId],
 ) -> bigframes.core.nodes.ReadTableNode:
-    new_scan_list = filter_scanlist(node.scan_list, selection)
+    new_scan_list = node.scan_list.filter_cols(selection)
     return dataclasses.replace(node, scan_list=new_scan_list)
-
-
-def filter_scanlist(
-    scanlist: bigframes.core.nodes.ScanList,
-    ids: AbstractSet[identifiers.ColumnId],
-):
-    result = bigframes.core.nodes.ScanList(
-        tuple(item for item in scanlist.items if item.id in ids)
-    )
-    if len(result.items) == 0:
-        # We need to select something, or stuff breaks
-        result = bigframes.core.nodes.ScanList(scanlist.items[:1])
-    return result
diff --git a/bigframes/core/rewrite/scan_reduction.py b/bigframes/core/rewrite/scan_reduction.py
new file mode 100644
index 0000000000..be8db4827c
--- /dev/null
+++ b/bigframes/core/rewrite/scan_reduction.py
@@ -0,0 +1,47 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import dataclasses
+import functools
+from typing import Optional
+
+from bigframes.core import nodes
+
+
+def try_reduce_to_table_scan(root: nodes.BigFrameNode) -> Optional[nodes.ReadTableNode]:
+    for node in root.unique_nodes():
+        if not isinstance(node, (nodes.ReadTableNode, nodes.SelectionNode)):
+            return None
+    result = root.bottom_up(merge_scan)
+    if isinstance(result, nodes.ReadTableNode):
+        return result
+    return None
+
+
+@functools.singledispatch
+def merge_scan(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
+    return node
+
+
+@merge_scan.register
+def _(node: nodes.SelectionNode) -> nodes.BigFrameNode:
+    if not isinstance(node.child, nodes.ReadTableNode):
+        return node
+    if node.has_multi_referenced_ids:
+        return node
+
+    selection = {
+        aliased_ref.ref.id: aliased_ref.id for aliased_ref in node.input_output_pairs
+    }
+    new_scan_list = node.child.scan_list.project(selection)
+    return dataclasses.replace(node.child, scan_list=new_scan_list)
diff --git a/bigframes/core/rewrite/windows.py b/bigframes/core/rewrite/windows.py
new file mode 100644
index 0000000000..9f55db23af
--- /dev/null
+++ b/bigframes/core/rewrite/windows.py
@@ -0,0 +1,45 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import dataclasses
+
+from bigframes import operations as ops
+from bigframes.core import nodes
+
+
+def rewrite_range_rolling(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
+    if not isinstance(node, nodes.WindowOpNode):
+        return node
+
+    if not node.window_spec.is_range_bounded:
+        return node
+
+    if len(node.window_spec.ordering) != 1:
+        raise ValueError(
+            "Range rolling should only be performed on exactly one column."
+        )
+
+    ordering_expr = node.window_spec.ordering[0]
+
+    new_ordering = dataclasses.replace(
+        ordering_expr,
+        scalar_expression=ops.UnixMicros().as_expr(ordering_expr.scalar_expression),
+    )
+
+    return dataclasses.replace(
+        node,
+        window_spec=dataclasses.replace(node.window_spec, ordering=(new_ordering,)),
+    )
diff --git a/bigframes/core/schema.py b/bigframes/core/schema.py
index c379db72be..c4cbb51ef9 100644
--- a/bigframes/core/schema.py
+++ b/bigframes/core/schema.py
@@ -67,9 +67,13 @@ def dtypes(self) -> typing.Tuple[bigframes.dtypes.Dtype, ...]:
     def _mapping(self) -> typing.Dict[ColumnIdentifierType, bigframes.dtypes.Dtype]:
         return {item.column: item.dtype for item in self.items}
 
-    def to_bigquery(self) -> typing.Tuple[google.cloud.bigquery.SchemaField, ...]:
+    def to_bigquery(
+        self, overrides: dict[bigframes.dtypes.Dtype, str] = {}
+    ) -> typing.Tuple[google.cloud.bigquery.SchemaField, ...]:
         return tuple(
-            bigframes.dtypes.convert_to_schema_field(item.column, item.dtype)
+            bigframes.dtypes.convert_to_schema_field(
+                item.column, item.dtype, overrides=overrides
+            )
             for item in self.items
         )
 
diff --git a/bigframes/core/sql.py b/bigframes/core/sql.py
index f4de177f37..d197993305 100644
--- a/bigframes/core/sql.py
+++ b/bigframes/core/sql.py
@@ -23,7 +23,7 @@
 import math
 from typing import cast, Collection, Iterable, Mapping, Optional, TYPE_CHECKING, Union
 
-import shapely  # type: ignore
+import shapely.geometry.base  # type: ignore
 
 import bigframes.core.compile.googlesql as googlesql
 
@@ -33,9 +33,19 @@
     import bigframes.core.ordering
 
 
+# shapely.wkt.dumps was moved to shapely.io.to_wkt in 2.0.
+try:
+    from shapely.io import to_wkt  # type: ignore
+except ImportError:
+    from shapely.wkt import dumps  # type: ignore
+
+    to_wkt = dumps
+
+
 ### Writing SQL Values (literals, column references, table references, etc.)
 def simple_literal(value: bytes | str | int | bool | float | datetime.datetime | None):
     """Return quoted input string."""
+
     # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#literals
     if value is None:
         return "NULL"
@@ -65,8 +75,8 @@ def simple_literal(value: bytes | str | int | bool | float | datetime.datetime |
         return f"DATE('{value.isoformat()}')"
     elif isinstance(value, datetime.time):
         return f"TIME(DATETIME('1970-01-01 {value.isoformat()}'))"
-    elif isinstance(value, shapely.Geometry):
-        return f"ST_GEOGFROMTEXT({simple_literal(shapely.to_wkt(value))})"
+    elif isinstance(value, shapely.geometry.base.BaseGeometry):
+        return f"ST_GEOGFROMTEXT({simple_literal(to_wkt(value))})"
     elif isinstance(value, decimal.Decimal):
         # TODO: disambiguate BIGNUMERIC based on scale and/or precision
         return f"CAST('{str(value)}' AS NUMERIC)"
diff --git a/bigframes/core/utils.py b/bigframes/core/utils.py
index 684290bf81..ee09fc69cb 100644
--- a/bigframes/core/utils.py
+++ b/bigframes/core/utils.py
@@ -18,15 +18,11 @@
 from typing import Hashable, Iterable, List
 import warnings
 
-import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.io.common as vendored_pandas_io_common
 import numpy as np
 import pandas as pd
-import pandas.api.types as pdtypes
-import pyarrow as pa
 import typing_extensions
 
-import bigframes.dtypes as dtypes
 import bigframes.exceptions as bfe
 
 UNNAMED_COLUMN_ID = "bigframes_unnamed_column"
@@ -222,76 +218,3 @@ def timedelta_to_micros(
         ) * 1_000_000 + timedelta.microseconds
 
     raise TypeError(f"Unrecognized input type: {type(timedelta)}")
-
-
-def replace_timedeltas_with_micros(dataframe: pd.DataFrame) -> List[str]:
-    """
-    Replaces in-place timedeltas to integer values in microseconds. Nanosecond part is ignored.
-
-    Returns:
-        The names of updated columns
-    """
-    updated_columns = []
-
-    for col in dataframe.columns:
-        if pdtypes.is_timedelta64_dtype(dataframe[col].dtype):
-            dataframe[col] = dataframe[col].apply(timedelta_to_micros)
-            updated_columns.append(col)
-
-    if pdtypes.is_timedelta64_dtype(dataframe.index.dtype):
-        dataframe.index = dataframe.index.map(timedelta_to_micros)
-        updated_columns.append(dataframe.index.name)
-
-    return updated_columns
-
-
-def _search_for_nested_json_type(arrow_type: pa.DataType) -> bool:
-    """
-    Searches recursively for JSON array type within a PyArrow DataType.
-    """
-    if arrow_type == dtypes.JSON_ARROW_TYPE:
-        return True
-    if pa.types.is_list(arrow_type):
-        return _search_for_nested_json_type(arrow_type.value_type)
-    if pa.types.is_struct(arrow_type):
-        for i in range(arrow_type.num_fields):
-            if _search_for_nested_json_type(arrow_type.field(i).type):
-                return True
-        return False
-    return False
-
-
-def replace_json_with_string(dataframe: pd.DataFrame) -> List[str]:
-    """
-    Due to a BigQuery IO limitation with loading JSON from Parquet files (b/374784249),
-    we're using a workaround: storing JSON as strings and then parsing them into JSON
-    objects.
-    TODO(b/395912450): Remove workaround solution once b/374784249 got resolved.
-    """
-    updated_columns = []
-
-    for col in dataframe.columns:
-        column_type = dataframe[col].dtype
-        if column_type == dtypes.JSON_DTYPE:
-            dataframe[col] = dataframe[col].astype(dtypes.STRING_DTYPE)
-            updated_columns.append(col)
-        elif isinstance(column_type, pd.ArrowDtype) and _search_for_nested_json_type(
-            column_type.pyarrow_dtype
-        ):
-            raise NotImplementedError(
-                f"Nested JSON types, found in column `{col}`: `{column_type}`', "
-                f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"
-            )
-
-    if dataframe.index.dtype == dtypes.JSON_DTYPE:
-        dataframe.index = dataframe.index.astype(dtypes.STRING_DTYPE)
-        updated_columns.append(dataframe.index.name)
-    elif isinstance(
-        dataframe.index.dtype, pd.ArrowDtype
-    ) and _search_for_nested_json_type(dataframe.index.dtype.pyarrow_dtype):
-        raise NotImplementedError(
-            f"Nested JSON types, found in the index: `{dataframe.index.dtype}`', "
-            f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"
-        )
-
-    return updated_columns
diff --git a/bigframes/core/window/__init__.py b/bigframes/core/window/__init__.py
index 7758145fd4..1d888ca7e6 100644
--- a/bigframes/core/window/__init__.py
+++ b/bigframes/core/window/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC
+# Copyright 2025 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,86 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from __future__ import annotations
+from bigframes.core.window.rolling import Window
 
-import typing
-
-import bigframes_vendored.pandas.core.window.rolling as vendored_pandas_rolling
-
-from bigframes.core import log_adapter, window_spec
-import bigframes.core.blocks as blocks
-import bigframes.operations.aggregations as agg_ops
-
-
-@log_adapter.class_logger
-class Window(vendored_pandas_rolling.Window):
-    __doc__ = vendored_pandas_rolling.Window.__doc__
-
-    def __init__(
-        self,
-        block: blocks.Block,
-        window_spec: window_spec.WindowSpec,
-        value_column_ids: typing.Sequence[str],
-        drop_null_groups: bool = True,
-        is_series: bool = False,
-    ):
-        self._block = block
-        self._window_spec = window_spec
-        self._value_column_ids = value_column_ids
-        self._drop_null_groups = drop_null_groups
-        self._is_series = is_series
-
-    def count(self):
-        return self._apply_aggregate(agg_ops.count_op)
-
-    def sum(self):
-        return self._apply_aggregate(agg_ops.sum_op)
-
-    def mean(self):
-        return self._apply_aggregate(agg_ops.mean_op)
-
-    def var(self):
-        return self._apply_aggregate(agg_ops.var_op)
-
-    def std(self):
-        return self._apply_aggregate(agg_ops.std_op)
-
-    def max(self):
-        return self._apply_aggregate(agg_ops.max_op)
-
-    def min(self):
-        return self._apply_aggregate(agg_ops.min_op)
-
-    def _apply_aggregate(
-        self,
-        op: agg_ops.UnaryAggregateOp,
-    ):
-        block = self._block
-        labels = [block.col_id_to_label[col] for col in self._value_column_ids]
-        block, result_ids = block.multi_apply_window_op(
-            self._value_column_ids,
-            op,
-            self._window_spec,
-            skip_null_groups=self._drop_null_groups,
-            never_skip_nulls=True,
-        )
-
-        if self._window_spec.grouping_keys:
-            original_index_ids = block.index_columns
-            block = block.reset_index(drop=False)
-            index_ids = (
-                *[col.id.name for col in self._window_spec.grouping_keys],
-                *original_index_ids,
-            )
-            block = block.set_index(col_ids=index_ids)
-
-        if self._is_series:
-            from bigframes.series import Series
-
-            return Series(block.select_columns(result_ids).with_column_labels(labels))
-        else:
-            from bigframes.dataframe import DataFrame
-
-            return DataFrame(
-                block.select_columns(result_ids).with_column_labels(labels)
-            )
+__all__ = ["Window"]
diff --git a/bigframes/core/window/ordering.py b/bigframes/core/window/ordering.py
new file mode 100644
index 0000000000..0bea585bb0
--- /dev/null
+++ b/bigframes/core/window/ordering.py
@@ -0,0 +1,86 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from functools import singledispatch
+
+from bigframes.core import expression as ex
+from bigframes.core import nodes, ordering
+
+
+@singledispatch
+def find_order_direction(
+    root: nodes.BigFrameNode, column_id: str
+) -> ordering.OrderingDirection | None:
+    """Returns the order of the given column with tree traversal. If the column cannot be found,
+    or the ordering information is not available, return None.
+    """
+    return None
+
+
+@find_order_direction.register
+def _(root: nodes.OrderByNode, column_id: str):
+    if len(root.by) == 0:
+        # This is a no-op
+        return find_order_direction(root.child, column_id)
+
+    # Make sure the window key is the prefix of sorting keys.
+    order_expr = root.by[0]
+    scalar_expr = order_expr.scalar_expression
+    if isinstance(scalar_expr, ex.DerefOp) and scalar_expr.id.name == column_id:
+        return order_expr.direction
+
+    return None
+
+
+@find_order_direction.register
+def _(root: nodes.ReversedNode, column_id: str):
+    direction = find_order_direction(root.child, column_id)
+
+    if direction is None:
+        return None
+    return direction.reverse()
+
+
+@find_order_direction.register
+def _(root: nodes.SelectionNode, column_id: str):
+    for alias_ref in root.input_output_pairs:
+        if alias_ref.id.name == column_id:
+            return find_order_direction(root.child, alias_ref.ref.id.name)
+
+
+@find_order_direction.register
+def _(root: nodes.FilterNode, column_id: str):
+    return find_order_direction(root.child, column_id)
+
+
+@find_order_direction.register
+def _(root: nodes.InNode, column_id: str):
+    return find_order_direction(root.left_child, column_id)
+
+
+@find_order_direction.register
+def _(root: nodes.WindowOpNode, column_id: str):
+    return find_order_direction(root.child, column_id)
+
+
+@find_order_direction.register
+def _(root: nodes.ProjectionNode, column_id: str):
+    for expr, ref in root.assignments:
+        if ref.name == column_id and isinstance(expr, ex.DerefOp):
+            # This source column is renamed.
+            return find_order_direction(root.child, expr.id.name)
+
+    return find_order_direction(root.child, column_id)
diff --git a/bigframes/core/window/rolling.py b/bigframes/core/window/rolling.py
new file mode 100644
index 0000000000..a9c6dfdfa7
--- /dev/null
+++ b/bigframes/core/window/rolling.py
@@ -0,0 +1,185 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import datetime
+import typing
+
+import bigframes_vendored.pandas.core.window.rolling as vendored_pandas_rolling
+import numpy
+import pandas
+
+from bigframes import dtypes
+from bigframes.core import expression as ex
+from bigframes.core import log_adapter, ordering, window_spec
+import bigframes.core.blocks as blocks
+from bigframes.core.window import ordering as window_ordering
+import bigframes.operations.aggregations as agg_ops
+
+
+@log_adapter.class_logger
+class Window(vendored_pandas_rolling.Window):
+    __doc__ = vendored_pandas_rolling.Window.__doc__
+
+    def __init__(
+        self,
+        block: blocks.Block,
+        window_spec: window_spec.WindowSpec,
+        value_column_ids: typing.Sequence[str],
+        drop_null_groups: bool = True,
+        is_series: bool = False,
+        skip_agg_column_id: str | None = None,
+    ):
+        self._block = block
+        self._window_spec = window_spec
+        self._value_column_ids = value_column_ids
+        self._drop_null_groups = drop_null_groups
+        self._is_series = is_series
+        # The column ID that won't be aggregated on.
+        # This is equivalent to pandas `on` parameter in rolling()
+        self._skip_agg_column_id = skip_agg_column_id
+
+    def count(self):
+        return self._apply_aggregate(agg_ops.count_op)
+
+    def sum(self):
+        return self._apply_aggregate(agg_ops.sum_op)
+
+    def mean(self):
+        return self._apply_aggregate(agg_ops.mean_op)
+
+    def var(self):
+        return self._apply_aggregate(agg_ops.var_op)
+
+    def std(self):
+        return self._apply_aggregate(agg_ops.std_op)
+
+    def max(self):
+        return self._apply_aggregate(agg_ops.max_op)
+
+    def min(self):
+        return self._apply_aggregate(agg_ops.min_op)
+
+    def _apply_aggregate(
+        self,
+        op: agg_ops.UnaryAggregateOp,
+    ):
+        agg_block = self._aggregate_block(op)
+
+        if self._is_series:
+            from bigframes.series import Series
+
+            return Series(agg_block)
+        else:
+            from bigframes.dataframe import DataFrame
+
+            # Preserve column order.
+            column_labels = [
+                self._block.col_id_to_label[col_id] for col_id in self._value_column_ids
+            ]
+            return DataFrame(agg_block)._reindex_columns(column_labels)
+
+    def _aggregate_block(self, op: agg_ops.UnaryAggregateOp) -> blocks.Block:
+        agg_col_ids = [
+            col_id
+            for col_id in self._value_column_ids
+            if col_id != self._skip_agg_column_id
+        ]
+        block, result_ids = self._block.multi_apply_window_op(
+            agg_col_ids,
+            op,
+            self._window_spec,
+            skip_null_groups=self._drop_null_groups,
+            never_skip_nulls=True,
+        )
+
+        if self._window_spec.grouping_keys:
+            original_index_ids = block.index_columns
+            block = block.reset_index(drop=False)
+            index_ids = (
+                *[col.id.name for col in self._window_spec.grouping_keys],
+                *original_index_ids,
+            )
+            block = block.set_index(col_ids=index_ids)
+
+        labels = [self._block.col_id_to_label[col] for col in agg_col_ids]
+        if self._skip_agg_column_id is not None:
+            result_ids = [self._skip_agg_column_id, *result_ids]
+            labels.insert(0, self._block.col_id_to_label[self._skip_agg_column_id])
+
+        return block.select_columns(result_ids).with_column_labels(labels)
+
+
+def create_range_window(
+    block: blocks.Block,
+    window: pandas.Timedelta | numpy.timedelta64 | datetime.timedelta | str,
+    *,
+    value_column_ids: typing.Sequence[str] = tuple(),
+    min_periods: int | None,
+    on: str | None = None,
+    closed: typing.Literal["right", "left", "both", "neither"],
+    is_series: bool,
+    grouping_keys: typing.Sequence[str] = tuple(),
+    drop_null_groups: bool = True,
+) -> Window:
+
+    if on is None:
+        # Rolling on index
+        index_dtypes = block.index.dtypes
+        if len(index_dtypes) > 1:
+            raise ValueError("Range rolling on MultiIndex is not supported")
+        if index_dtypes[0] != dtypes.TIMESTAMP_DTYPE:
+            raise ValueError("Index type should be timestamps with timezones")
+        rolling_key_col_id = block.index_columns[0]
+    else:
+        # Rolling on a specific column
+        rolling_key_col_id = block.resolve_label_exact_or_error(on)
+        if block.expr.get_column_type(rolling_key_col_id) != dtypes.TIMESTAMP_DTYPE:
+            raise ValueError(f"Column {on} type should be timestamps with timezones")
+
+    order_direction = window_ordering.find_order_direction(
+        block.expr.node, rolling_key_col_id
+    )
+    if order_direction is None:
+        target_str = "index" if on is None else f"column {on}"
+        raise ValueError(
+            f"The {target_str} might not be in a monotonic order. Please sort by {target_str} before rolling."
+        )
+    if isinstance(window, str):
+        window = pandas.Timedelta(window)
+    spec = window_spec.WindowSpec(
+        bounds=window_spec.RangeWindowBounds.from_timedelta_window(window, closed),
+        min_periods=1 if min_periods is None else min_periods,
+        ordering=(
+            ordering.OrderingExpression(ex.deref(rolling_key_col_id), order_direction),
+        ),
+        grouping_keys=tuple(ex.deref(col) for col in grouping_keys),
+    )
+
+    selected_value_col_ids = (
+        value_column_ids if value_column_ids else block.value_columns
+    )
+    # This step must be done after finding the order direction of the window key.
+    if grouping_keys:
+        block = block.order_by([ordering.ascending_over(col) for col in grouping_keys])
+
+    return Window(
+        block,
+        spec,
+        value_column_ids=selected_value_col_ids,
+        is_series=is_series,
+        skip_agg_column_id=None if on is None else rolling_key_col_id,
+        drop_null_groups=drop_null_groups,
+    )
diff --git a/bigframes/core/window_spec.py b/bigframes/core/window_spec.py
index 142e3a7e00..d08ba3d12a 100644
--- a/bigframes/core/window_spec.py
+++ b/bigframes/core/window_spec.py
@@ -14,9 +14,13 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, replace
+import datetime
 import itertools
 from typing import Literal, Mapping, Optional, Set, Tuple, Union
 
+import numpy as np
+import pandas as pd
+
 import bigframes.core.expression as ex
 import bigframes.core.identifiers as ids
 import bigframes.core.ordering as orderings
@@ -168,9 +172,31 @@ def __post_init__(self):
 
 @dataclass(frozen=True)
 class RangeWindowBounds:
-    # TODO(b/388916840) Support range rolling on timeseries with timedeltas.
-    start: Optional[int] = None
-    end: Optional[int] = None
+    """Represents a time range window, inclusively bounded by start and end"""
+
+    start: pd.Timedelta | None = None
+    end: pd.Timedelta | None = None
+
+    @classmethod
+    def from_timedelta_window(
+        cls,
+        window: pd.Timedelta | np.timedelta64 | datetime.timedelta,
+        closed: Literal["right", "left", "both", "neither"],
+    ) -> RangeWindowBounds:
+        window = pd.Timedelta(window)
+        tick = pd.Timedelta("1us")
+        zero = pd.Timedelta(0)
+
+        if closed == "right":
+            return cls(-(window - tick), zero)
+        elif closed == "left":
+            return cls(-window, -tick)
+        elif closed == "both":
+            return cls(-window, zero)
+        elif closed == "neither":
+            return cls(-(window - tick), -tick)
+        else:
+            raise ValueError(f"Unsupported value for 'closed' parameter: {closed}")
 
     def __post_init__(self):
         if self.start is None:
@@ -187,10 +213,12 @@ def __post_init__(self):
 class WindowSpec:
     """
     Specifies a window over which aggregate and analytic function may be applied.
-    grouping_keys: set of column ids to group on
-    preceding: Number of preceding rows in the window
-    following: Number of preceding rows in the window
-    ordering: List of columns ids and ordering direction to override base ordering
+
+    Attributes:
+        grouping_keys: A set of column ids to group on
+        bounds: The window boundaries
+        ordering: A list of columns ids and ordering direction to override base ordering
+        min_periods: The minimum number of observations in window required to have a value
     """
 
     grouping_keys: Tuple[ex.DerefOp, ...] = tuple()
@@ -199,7 +227,7 @@ class WindowSpec:
     min_periods: int = 0
 
     @property
-    def row_bounded(self):
+    def is_row_bounded(self):
         """
         Whether the window is bounded by row offsets.
 
@@ -208,6 +236,26 @@ def row_bounded(self):
         """
         return isinstance(self.bounds, RowsWindowBounds)
 
+    @property
+    def is_range_bounded(self):
+        """
+        Whether the window is bounded by range offsets.
+
+        This is relevant for determining whether the window requires a total order
+        to calculate deterministically.
+        """
+        return isinstance(self.bounds, RangeWindowBounds)
+
+    @property
+    def is_unbounded(self):
+        """
+        Whether the window is unbounded.
+
+        This is relevant for determining whether the window requires a total order
+        to calculate deterministically.
+        """
+        return self.bounds is None
+
     @property
     def all_referenced_columns(self) -> Set[ids.ColumnId]:
         """
@@ -220,7 +268,7 @@ def all_referenced_columns(self) -> Set[ids.ColumnId]:
 
     def without_order(self) -> WindowSpec:
         """Removes ordering clause if ordering isn't required to define bounds."""
-        if self.row_bounded:
+        if self.is_row_bounded:
             raise ValueError("Cannot remove order from row-bounded window")
         return replace(self, ordering=())
 
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 7f9e62b7dd..95ea487786 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -67,12 +67,12 @@
 import bigframes.core.utils as utils
 import bigframes.core.validations as validations
 import bigframes.core.window
+from bigframes.core.window import rolling
 import bigframes.core.window_spec as windows
 import bigframes.dtypes
 import bigframes.exceptions as bfe
 import bigframes.formatting_helpers as formatter
 import bigframes.operations as ops
-import bigframes.operations.aggregations
 import bigframes.operations.aggregations as agg_ops
 import bigframes.operations.ai
 import bigframes.operations.plotting as plotting
@@ -1634,19 +1634,62 @@ def to_pandas(
     ) -> pandas.DataFrame | pandas.Series:
         """Write DataFrame to pandas DataFrame.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame({'col': [4, 2, 2]})
+
+        Download the data from BigQuery and convert it into an in-memory pandas DataFrame.
+
+            >>> df.to_pandas()
+               col
+            0    4
+            1    2
+            2    2
+
+        Estimate job statistics without processing or downloading data by using `dry_run=True`.
+
+            >>> df.to_pandas(dry_run=True) # doctest: +SKIP
+            columnCount                                                            1
+            columnDtypes                                              {'col': Int64}
+            indexLevel                                                             1
+            indexDtypes                                                      [Int64]
+            projectId                                                  bigframes-dev
+            location                                                              US
+            jobType                                                            QUERY
+            destinationTable       {'projectId': 'bigframes-dev', 'datasetId': '_...
+            useLegacySql                                                       False
+            referencedTables                                                    None
+            totalBytesProcessed                                                    0
+            cacheHit                                                           False
+            statementType                                                     SELECT
+            creationTime                            2025-04-02 20:17:12.038000+00:00
+            dtype: object
+
         Args:
             max_download_size (int, default None):
-                Download size threshold in MB. If max_download_size is exceeded when downloading data
-                (e.g., to_pandas()), the data will be downsampled if
-                bigframes.options.sampling.enable_downsampling is True, otherwise, an error will be
-                raised. If set to a value other than None, this will supersede the global config.
+                .. deprecated:: 2.0.0
+                    ``max_download_size`` parameter is deprecated. Please use ``to_pandas_batches()``
+                    method instead.
+
+                Download size threshold in MB. If ``max_download_size`` is exceeded when downloading data,
+                the data will be downsampled if ``bigframes.options.sampling.enable_downsampling`` is
+                ``True``, otherwise, an error will be raised. If set to a value other than ``None``,
+                this will supersede the global config.
             sampling_method (str, default None):
+                .. deprecated:: 2.0.0
+                    ``sampling_method`` parameter is deprecated. Please use ``sample()`` method instead.
+
                 Downsampling algorithms to be chosen from, the choices are: "head": This algorithm
                 returns a portion of the data from the beginning. It is fast and requires minimal
                 computations to perform the downsampling; "uniform": This algorithm returns uniform
                 random samples of the data. If set to a value other than None, this will supersede
                 the global config.
             random_state (int, default None):
+                .. deprecated:: 2.0.0
+                    ``random_state`` parameter is deprecated. Please use ``sample()`` method instead.
+
                 The seed for the uniform downsampling algorithm. If provided, the uniform method may
                 take longer to execute and require more computation. If set to a value other than
                 None, this will supersede the global config.
@@ -1666,8 +1709,19 @@ def to_pandas(
                 downsampled rows and all columns of this DataFrame. If dry_run is set, a pandas
                 Series containing dry run statistics will be returned.
         """
-
-        # TODO(orrbradford): Optimize this in future. Potentially some cases where we can return the stored query job
+        if max_download_size is not None:
+            msg = bfe.format_message(
+                "DEPRECATED: The `max_download_size` parameters for `DataFrame.to_pandas()` "
+                "are deprecated and will be removed soon. Please use `DataFrame.to_pandas_batches()`."
+            )
+            warnings.warn(msg, category=FutureWarning)
+        if sampling_method is not None or random_state is not None:
+            msg = bfe.format_message(
+                "DEPRECATED: The `sampling_method` and `random_state` parameters for "
+                "`DataFrame.to_pandas()` are deprecated and will be removed soon. "
+                "Please use `DataFrame.sample().to_pandas()` instead for sampling."
+            )
+            warnings.warn(msg, category=FutureWarning, stacklevel=2)
 
         if dry_run:
             dry_run_stats, dry_run_job = self._block._compute_dry_run(
@@ -1702,11 +1756,40 @@ def to_pandas_batches(
         page_size and max_results determine the size and number of batches,
         see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame({'col': [4, 3, 2, 2, 3]})
+
+        Iterate through the results in batches, limiting the total rows yielded
+        across all batches via `max_results`:
+
+            >>> for df_batch in df.to_pandas_batches(max_results=3):
+            ...     print(df_batch)
+               col
+            0    4
+            1    3
+            2    2
+
+        Alternatively, control the approximate size of each batch using `page_size`
+        and fetch batches manually using `next()`:
+
+            >>> it = df.to_pandas_batches(page_size=2)
+            >>> next(it)
+               col
+            0    4
+            1    3
+            >>> next(it)
+               col
+            2    2
+            3    2
+
         Args:
             page_size (int, default None):
-                The size of each batch.
+                The maximum number of rows of each batch. Non-positive values are ignored.
             max_results (int, default None):
-                If given, only download this many rows at maximum.
+                The maximum total number of rows of all batches.
             allow_large_results (bool, default None):
                 If not None, overrides the global setting to allow or disallow large query results
                 over the default size limit of 10 GB.
@@ -3310,16 +3393,33 @@ def _perform_join_by_index(
     @validations.requires_ordering()
     def rolling(
         self,
-        window: int,
+        window: int | pandas.Timedelta | numpy.timedelta64 | datetime.timedelta | str,
         min_periods=None,
+        on: str | None = None,
         closed: Literal["right", "left", "both", "neither"] = "right",
     ) -> bigframes.core.window.Window:
-        window_def = windows.WindowSpec(
-            bounds=windows.RowsWindowBounds.from_window_size(window, closed),
-            min_periods=min_periods if min_periods is not None else window,
-        )
-        return bigframes.core.window.Window(
-            self._block, window_def, self._block.value_columns
+        if isinstance(window, int):
+            window_def = windows.WindowSpec(
+                bounds=windows.RowsWindowBounds.from_window_size(window, closed),
+                min_periods=min_periods if min_periods is not None else window,
+            )
+            skip_agg_col_id = (
+                None if on is None else self._block.resolve_label_exact_or_error(on)
+            )
+            return bigframes.core.window.Window(
+                self._block,
+                window_def,
+                self._block.value_columns,
+                skip_agg_column_id=skip_agg_col_id,
+            )
+
+        return rolling.create_range_window(
+            self._block,
+            window,
+            min_periods=min_periods,
+            on=on,
+            closed=closed,
+            is_series=False,
         )
 
     @validations.requires_ordering()
@@ -3483,7 +3583,7 @@ def pct_change(self, periods: int = 1) -> DataFrame:
 
     def _apply_window_op(
         self,
-        op: agg_ops.WindowOp,
+        op: agg_ops.UnaryWindowOp,
         window_spec: windows.WindowSpec,
     ):
         block, result_ids = self._block.multi_apply_window_op(
@@ -3768,7 +3868,7 @@ def to_gbq(
 
             # The client code owns this table reference now
             temp_table_ref = (
-                self._session._temp_storage_manager.generate_unique_resource_id()
+                self._session._anon_dataset_manager.generate_unique_resource_id()
             )
             destination_table = f"{temp_table_ref.project}.{temp_table_ref.dataset_id}.{temp_table_ref.table_id}"
 
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 22cc521e8e..47b128dae6 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -28,7 +28,7 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-import shapely  # type: ignore
+import shapely.geometry  # type: ignore
 
 # Type hints for Pandas dtypes supported by BigQuery DataFrame
 Dtype = Union[
@@ -352,6 +352,24 @@ def is_comparable(type_: ExpressionType) -> bool:
     return (type_ is not None) and is_orderable(type_)
 
 
+def get_struct_fields(type_: ExpressionType) -> dict[str, Dtype]:
+    assert isinstance(type_, pd.ArrowDtype)
+    assert isinstance(type_.pyarrow_dtype, pa.StructType)
+    struct_type = type_.pyarrow_dtype
+    result: dict[str, Dtype] = {}
+    for field_no in range(struct_type.num_fields):
+        field = struct_type.field(field_no)
+        result[field.name] = arrow_dtype_to_bigframes_dtype(field.type)
+    return result
+
+
+def get_array_inner_type(type_: ExpressionType) -> Dtype:
+    assert isinstance(type_, pd.ArrowDtype)
+    assert isinstance(type_.pyarrow_dtype, pa.ListType)
+    list_type = type_.pyarrow_dtype
+    return arrow_dtype_to_bigframes_dtype(list_type.value_type)
+
+
 _ORDERABLE_SIMPLE_TYPES = set(
     mapping.dtype for mapping in SIMPLE_TYPES if mapping.orderable
 )
@@ -456,6 +474,8 @@ def bigframes_dtype_to_arrow_dtype(
     if bigframes_dtype in _BIGFRAMES_TO_ARROW:
         return _BIGFRAMES_TO_ARROW[bigframes_dtype]
     if isinstance(bigframes_dtype, pd.ArrowDtype):
+        if pa.types.is_duration(bigframes_dtype.pyarrow_dtype):
+            return bigframes_dtype.pyarrow_dtype
         if pa.types.is_list(bigframes_dtype.pyarrow_dtype):
             return bigframes_dtype.pyarrow_dtype
         if pa.types.is_struct(bigframes_dtype.pyarrow_dtype):
@@ -486,7 +506,7 @@ def bigframes_dtype_to_literal(
     if isinstance(bigframes_dtype, pd.StringDtype):
         return "string"
     if isinstance(bigframes_dtype, gpd.array.GeometryDtype):
-        return shapely.Point((0, 0))
+        return shapely.geometry.Point((0, 0))
 
     raise TypeError(
         f"No literal  conversion for {bigframes_dtype}. {constants.FEEDBACK_LINK}"
@@ -697,9 +717,10 @@ def convert_schema_field(
 
 
 def convert_to_schema_field(
-    name: str,
-    bigframes_dtype: Dtype,
+    name: str, bigframes_dtype: Dtype, overrides: dict[Dtype, str] = {}
 ) -> google.cloud.bigquery.SchemaField:
+    if bigframes_dtype in overrides:
+        return google.cloud.bigquery.SchemaField(name, overrides[bigframes_dtype])
     if bigframes_dtype in _BIGFRAMES_TO_TK:
         return google.cloud.bigquery.SchemaField(
             name, _BIGFRAMES_TO_TK[bigframes_dtype]
@@ -709,7 +730,7 @@ def convert_to_schema_field(
             inner_type = arrow_dtype_to_bigframes_dtype(
                 bigframes_dtype.pyarrow_dtype.value_type
             )
-            inner_field = convert_to_schema_field(name, inner_type)
+            inner_field = convert_to_schema_field(name, inner_type, overrides)
             return google.cloud.bigquery.SchemaField(
                 name, inner_field.field_type, mode="REPEATED", fields=inner_field.fields
             )
@@ -719,7 +740,9 @@ def convert_to_schema_field(
             for i in range(struct_type.num_fields):
                 field = struct_type.field(i)
                 inner_bf_type = arrow_dtype_to_bigframes_dtype(field.type)
-                inner_fields.append(convert_to_schema_field(field.name, inner_bf_type))
+                inner_fields.append(
+                    convert_to_schema_field(field.name, inner_bf_type, overrides)
+                )
 
             return google.cloud.bigquery.SchemaField(
                 name, "RECORD", fields=inner_fields
diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py
index 44aea57898..8a591f6916 100644
--- a/bigframes/functions/_function_client.py
+++ b/bigframes/functions/_function_client.py
@@ -196,6 +196,7 @@ def provision_bq_managed_function(
         name,
         packages,
         is_row_processor,
+        bq_connection_id,
         *,
         capture_references=False,
     ):
@@ -273,12 +274,21 @@ def provision_bq_managed_function(
             udf_code = textwrap.dedent(inspect.getsource(func))
             udf_code = udf_code[udf_code.index("def") :]
 
+        with_connection_clause = (
+            (
+                f"WITH CONNECTION `{self._gcp_project_id}.{self._bq_location}.{self._bq_connection_id}`"
+            )
+            if bq_connection_id
+            else ""
+        )
+
         create_function_ddl = (
             textwrap.dedent(
                 f"""
                 CREATE OR REPLACE FUNCTION {persistent_func_id}({','.join(bq_function_args)})
                 RETURNS {bq_function_return_type}
                 LANGUAGE python
+                {with_connection_clause}
                 OPTIONS ({managed_function_options_str})
                 AS r'''
                 __UDF_PLACE_HOLDER__
@@ -365,7 +375,7 @@ def create_cloud_function(
         is_row_processor=False,
         vpc_connector=None,
         memory_mib=1024,
-        ingress_settings="all",
+        ingress_settings="internal-only",
     ):
         """Create a cloud function from the given user defined function."""
 
diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py
index c04de54be6..ec0e977782 100644
--- a/bigframes/functions/_function_session.py
+++ b/bigframes/functions/_function_session.py
@@ -167,7 +167,7 @@ def _resolve_bigquery_connection_id(
         if not bigquery_connection:
             bigquery_connection = session._bq_connection  # type: ignore
 
-        bigquery_connection = clients.resolve_full_bq_connection_name(
+        bigquery_connection = clients.get_canonical_bq_connection_id(
             bigquery_connection,
             default_project=dataset_ref.project,
             default_location=bq_location,
@@ -237,6 +237,7 @@ def _try_delattr(self, func: Callable, attr: str) -> None:
     # https://github.com/ibis-project/ibis/blob/master/ibis/backends/bigquery/udf/__init__.py
     def remote_function(
         self,
+        *,
         input_types: Union[None, type, Sequence[type]] = None,
         output_type: Optional[type] = None,
         session: Optional[Session] = None,
@@ -251,7 +252,7 @@ def remote_function(
         reuse: bool = True,
         name: Optional[str] = None,
         packages: Optional[Sequence[str]] = None,
-        cloud_function_service_account: Optional[str] = None,
+        cloud_function_service_account: str,
         cloud_function_kms_key_name: Optional[str] = None,
         cloud_function_docker_repository: Optional[str] = None,
         max_batching_rows: Optional[int] = 1000,
@@ -259,9 +260,9 @@ def remote_function(
         cloud_function_max_instances: Optional[int] = None,
         cloud_function_vpc_connector: Optional[str] = None,
         cloud_function_memory_mib: Optional[int] = 1024,
-        cloud_function_ingress_settings: Optional[
-            Literal["all", "internal-only", "internal-and-gclb"]
-        ] = None,
+        cloud_function_ingress_settings: Literal[
+            "all", "internal-only", "internal-and-gclb"
+        ] = "internal-only",
     ):
         """Decorator to turn a user defined function into a BigQuery remote function.
 
@@ -384,8 +385,8 @@ def remote_function(
                 Explicit name of the external package dependencies. Each dependency
                 is added to the `requirements.txt` as is, and can be of the form
                 supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/.
-            cloud_function_service_account (str, Optional):
-                Service account to use for the cloud functions. If not provided then
+            cloud_function_service_account (str):
+                Service account to use for the cloud functions. If "default" provided then
                 the default service account would be used. See
                 https://cloud.google.com/functions/docs/securing/function-identity
                 for more details. Please make sure the service account has the
@@ -448,29 +449,20 @@ def remote_function(
                 https://cloud.google.com/functions/docs/configuring/memory.
             cloud_function_ingress_settings (str, Optional):
                 Ingress settings controls dictating what traffic can reach the
-                function. By default `all` will be used. It must be one of:
-                `all`, `internal-only`, `internal-and-gclb`. See for more details
+                function. Options are: `all`, `internal-only`, or `internal-and-gclb`.
+                If no setting is provided, `internal-only` will be used by default.
+                See for more details
                 https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings.
         """
         # Some defaults may be used from the session if not provided otherwise.
         session = self._resolve_session(session)
 
-        # raise a UserWarning if user does not explicitly set cloud_function_service_account to a
-        # user-managed cloud_function_service_account of to default
-        msg = bfe.format_message(
-            "You have not explicitly set a user-managed `cloud_function_service_account`. "
-            "Using the default Compute Engine service account. "
-            "In BigFrames 2.0 onwards, you would have to explicitly set `cloud_function_service_account` "
-            'either to a user-managed service account (preferred) or to `"default"` '
-            "to use the default Compute Engine service account (discouraged). "
-            "See, https://cloud.google.com/functions/docs/securing/function-identity."
-        )
-
+        # If the user forces the cloud function service argument to None, throw
+        # an exception
         if cloud_function_service_account is None:
-            warnings.warn(msg, stacklevel=2, category=FutureWarning)
-
-        if cloud_function_service_account == "default":
-            cloud_function_service_account = None
+            raise ValueError(
+                'You must provide a user managed cloud_function_service_account, or "default" if you would like to let the default service account be used.'
+            )
 
         # A BigQuery client is required to perform BQ operations.
         bigquery_client = self._resolve_bigquery_client(session, bigquery_client)
@@ -516,24 +508,11 @@ def remote_function(
             )
 
         if cloud_function_ingress_settings is None:
-            cloud_function_ingress_settings = "all"
+            cloud_function_ingress_settings = "internal-only"
             msg = bfe.format_message(
-                "The `cloud_function_ingress_settings` are set to 'all' by default, "
-                "which will change to 'internal-only' for enhanced security in future version 2.0 onwards. "
-                "However, you will be able to explicitly pass cloud_function_ingress_settings='all' if you need. "
-                "See https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings for details."
+                "The `cloud_function_ingress_settings` is being set to 'internal-only' by default."
             )
-            warnings.warn(msg, category=FutureWarning, stacklevel=2)
-
-        if cloud_function_ingress_settings is None:
-            cloud_function_ingress_settings = "all"
-            msg = bfe.format_message(
-                "The `cloud_function_ingress_settings` are set to 'all' by default, "
-                "which will change to 'internal-only' for enhanced security in future version 2.0 onwards. "
-                "However, you will be able to explicitly pass cloud_function_ingress_settings='all' if you need. "
-                "See https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings for details."
-            )
-            warnings.warn(msg, category=FutureWarning, stacklevel=2)
+            warnings.warn(msg, category=UserWarning, stacklevel=2)
 
         bq_connection_manager = session.bqconnectionmanager
 
@@ -615,7 +594,9 @@ def wrapper(func):
                 bq_connection_manager,
                 cloud_function_region,
                 cloud_functions_client,
-                cloud_function_service_account,
+                None
+                if cloud_function_service_account == "default"
+                else cloud_function_service_account,
                 cloud_function_kms_key_name,
                 cloud_function_docker_repository,
                 session=session,  # type: ignore
@@ -794,6 +775,13 @@ def udf(
                 Name of the BigQuery connection. It is used to provide an
                 identity to the serverless instances running the user code. It
                 helps BigQuery manage and track the resources used by the udf.
+                This connection is required for internet access and for
+                interacting with other GCP services. To access GCP services, the
+                appropriate IAM permissions must also be granted to the
+                connection's Service Account. When it defaults to None, the udf
+                will be created without any connection. A udf without a
+                connection has no internet access and no access to other GCP
+                services.
             name (str, Optional):
                 Explicit name of the persisted BigQuery managed function. Use it
                 with caution, because more than one users working in the same
@@ -805,7 +793,7 @@ def udf(
                 ``bigframes.pandas.reset_session``/
                 ``bigframes.pandas.clean_up_by_session_id``) does not clean up
                 the function, and leaves it for the user to manage the function
-                and the associated cloud function directly.
+                directly.
             packages (str[], Optional):
                 Explicit name of the external package dependencies. Each
                 dependency is added to the `requirements.txt` as is, and can be
@@ -826,9 +814,13 @@ def udf(
 
         bq_location, _ = _utils.get_remote_function_locations(bigquery_client.location)
 
-        # A connection is required for BQ managed function.
-        bq_connection_id = self._resolve_bigquery_connection_id(
-            session, dataset_ref, bq_location, bigquery_connection
+        # A connection is optional for BQ managed function.
+        bq_connection_id = (
+            self._resolve_bigquery_connection_id(
+                session, dataset_ref, bq_location, bigquery_connection
+            )
+            if bigquery_connection
+            else None
         )
 
         bq_connection_manager = session.bqconnectionmanager
@@ -926,6 +918,7 @@ def wrapper(func):
                 name=name,
                 packages=packages,
                 is_row_processor=is_row_processor,
+                bq_connection_id=bq_connection_id,
             )
 
             # TODO(shobs): Find a better way to support udfs with param named
diff --git a/bigframes/functions/function.py b/bigframes/functions/function.py
index 30b3d23056..858c25fada 100644
--- a/bigframes/functions/function.py
+++ b/bigframes/functions/function.py
@@ -27,9 +27,7 @@
     from bigframes.session import Session
 
 import google.api_core.exceptions
-import google.api_core.retry
 from google.cloud import bigquery
-import google.iam.v1
 
 import bigframes.core.compile.ibis_types
 import bigframes.dtypes
diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index 01917fd6d8..81637333b0 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -117,6 +117,12 @@ def model(self) -> bigquery.Model:
         """Get the BQML model associated with this wrapper"""
         return self._model
 
+    def recommend(self, input_data: bpd.DataFrame) -> bpd.DataFrame:
+        return self._apply_ml_tvf(
+            input_data,
+            self._model_manipulation_sql_generator.ml_recommend,
+        )
+
     def predict(self, input_data: bpd.DataFrame) -> bpd.DataFrame:
         return self._apply_ml_tvf(
             input_data,
diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py
index c98e18322a..ece950a5a2 100644
--- a/bigframes/ml/decomposition.py
+++ b/bigframes/ml/decomposition.py
@@ -19,6 +19,7 @@
 
 from typing import List, Literal, Optional, Union
 
+import bigframes_vendored.sklearn.decomposition._mf
 import bigframes_vendored.sklearn.decomposition._pca
 from google.cloud import bigquery
 
@@ -27,7 +28,15 @@
 import bigframes.pandas as bpd
 import bigframes.session
 
-_BQML_PARAMS_MAPPING = {"svd_solver": "pcaSolver"}
+_BQML_PARAMS_MAPPING = {
+    "svd_solver": "pcaSolver",
+    "feedback_type": "feedbackType",
+    "num_factors": "numFactors",
+    "user_col": "userColumn",
+    "item_col": "itemColumn",
+    "_input_label_columns": "inputLabelColumns",
+    "l2_reg": "l2Regularization",
+}
 
 
 @log_adapter.class_logger
@@ -197,3 +206,159 @@ def score(
 
         # TODO(b/291973741): X param is ignored. Update BQML supports input in ML.EVALUATE.
         return self._bqml_model.evaluate()
+
+
+@log_adapter.class_logger
+class MatrixFactorization(
+    base.UnsupervisedTrainablePredictor,
+    bigframes_vendored.sklearn.decomposition._mf.MatrixFactorization,
+):
+    __doc__ = bigframes_vendored.sklearn.decomposition._mf.MatrixFactorization.__doc__
+
+    def __init__(
+        self,
+        *,
+        feedback_type: Literal["explicit", "implicit"] = "explicit",
+        num_factors: int,
+        user_col: str,
+        item_col: str,
+        rating_col: str = "rating",
+        # TODO: Add support for hyperparameter tuning.
+        l2_reg: float = 1.0,
+    ):
+
+        feedback_type = feedback_type.lower()  # type: ignore
+        if feedback_type not in ("explicit", "implicit"):
+            raise ValueError("Expected feedback_type to be `explicit` or `implicit`.")
+
+        self.feedback_type = feedback_type
+
+        if not isinstance(num_factors, int):
+            raise TypeError(
+                f"Expected num_factors to be an int, but got {type(num_factors)}."
+            )
+
+        if num_factors < 0:
+            raise ValueError(
+                f"Expected num_factors to be a positive integer, but got {num_factors}."
+            )
+
+        self.num_factors = num_factors
+
+        if not isinstance(user_col, str):
+            raise TypeError(f"Expected user_col to be a str, but got {type(user_col)}.")
+
+        self.user_col = user_col
+
+        if not isinstance(item_col, str):
+            raise TypeError(f"Expected item_col to be STR, but got {type(item_col)}.")
+
+        self.item_col = item_col
+
+        if not isinstance(rating_col, str):
+            raise TypeError(
+                f"Expected rating_col to be a str, but got {type(rating_col)}."
+            )
+
+        self._input_label_columns = [rating_col]
+
+        if not isinstance(l2_reg, (float, int)):
+            raise TypeError(
+                f"Expected l2_reg to be a float or int, but got {type(l2_reg)}."
+            )
+
+        self.l2_reg = l2_reg
+        self._bqml_model: Optional[core.BqmlModel] = None
+        self._bqml_model_factory = globals.bqml_model_factory()
+
+    @property
+    def rating_col(self) -> str:
+        """str: The rating column name. Defaults to 'rating'."""
+        return self._input_label_columns[0]
+
+    @classmethod
+    def _from_bq(
+        cls, session: bigframes.session.Session, bq_model: bigquery.Model
+    ) -> MatrixFactorization:
+        assert bq_model.model_type == "MATRIX_FACTORIZATION"
+
+        kwargs = utils.retrieve_params_from_bq_model(
+            cls, bq_model, _BQML_PARAMS_MAPPING
+        )
+
+        model = cls(**kwargs)
+        model._bqml_model = core.BqmlModel(session, bq_model)
+        return model
+
+    @property
+    def _bqml_options(self) -> dict:
+        """The model options as they will be set for BQML"""
+        options: dict = {
+            "model_type": "matrix_factorization",
+            "feedback_type": self.feedback_type,
+            "user_col": self.user_col,
+            "item_col": self.item_col,
+            "rating_col": self.rating_col,
+            "l2_reg": self.l2_reg,
+        }
+
+        if self.num_factors is not None:
+            options["num_factors"] = self.num_factors
+
+        return options
+
+    def _fit(
+        self,
+        X: utils.ArrayType,
+        y=None,
+        transforms: Optional[List[str]] = None,
+    ) -> MatrixFactorization:
+        if y is not None:
+            raise ValueError(
+                "Label column not supported for Matrix Factorization model but y was not `None`"
+            )
+
+        (X,) = utils.batch_convert_to_dataframe(X)
+
+        self._bqml_model = self._bqml_model_factory.create_model(
+            X_train=X,
+            transforms=transforms,
+            options=self._bqml_options,
+        )
+        return self
+
+    def predict(self, X: utils.ArrayType) -> bpd.DataFrame:
+        if not self._bqml_model:
+            raise RuntimeError("A model must be fitted before recommend")
+
+        (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session)
+
+        return self._bqml_model.recommend(X)
+
+    def to_gbq(self, model_name: str, replace: bool = False) -> MatrixFactorization:
+        """Save the model to BigQuery.
+
+        Args:
+            model_name (str):
+                The name of the model.
+            replace (bool, default False):
+                Determine whether to replace if the model already exists. Default to False.
+
+        Returns:
+            MatrixFactorization: Saved model."""
+        if not self._bqml_model:
+            raise RuntimeError("A model must be fitted before it can be saved")
+
+        new_model = self._bqml_model.copy(model_name, replace)
+        return new_model.session.read_gbq_model(model_name)
+
+    def score(
+        self,
+        X=None,
+        y=None,
+    ) -> bpd.DataFrame:
+        if not self._bqml_model:
+            raise RuntimeError("A model must be fitted before score")
+
+        # TODO(b/291973741): X param is ignored. Update BQML supports input in ML.EVALUATE.
+        return self._bqml_model.evaluate()
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 1fd9fbc4a7..cce05ea1f2 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -21,9 +21,8 @@
 
 import bigframes_vendored.constants as constants
 from google.cloud import bigquery
-import typing_extensions
 
-from bigframes import clients, dtypes, exceptions
+from bigframes import dtypes, exceptions
 import bigframes.bigquery as bbq
 from bigframes.core import blocks, global_session, log_adapter
 import bigframes.dataframe
@@ -34,20 +33,6 @@
     "max_iterations": "maxIterations",
 }
 
-_TEXT_GENERATOR_BISON_ENDPOINT = "text-bison"
-_TEXT_GENERATOR_BISON_32K_ENDPOINT = "text-bison-32k"
-_TEXT_GENERATOR_ENDPOINTS = (
-    _TEXT_GENERATOR_BISON_ENDPOINT,
-    _TEXT_GENERATOR_BISON_32K_ENDPOINT,
-)
-
-_EMBEDDING_GENERATOR_GECKO_ENDPOINT = "textembedding-gecko"
-_EMBEDDING_GENERATOR_GECKO_MULTILINGUAL_ENDPOINT = "textembedding-gecko-multilingual"
-_PALM2_EMBEDDING_GENERATOR_ENDPOINTS = (
-    _EMBEDDING_GENERATOR_GECKO_ENDPOINT,
-    _EMBEDDING_GENERATOR_GECKO_MULTILINGUAL_ENDPOINT,
-)
-
 _TEXT_EMBEDDING_005_ENDPOINT = "text-embedding-005"
 _TEXT_EMBEDDING_004_ENDPOINT = "text-embedding-004"
 _TEXT_MULTILINGUAL_EMBEDDING_002_ENDPOINT = "text-multilingual-embedding-002"
@@ -59,7 +44,6 @@
 
 _MULTIMODAL_EMBEDDING_001_ENDPOINT = "multimodalembedding@001"
 
-_GEMINI_PRO_ENDPOINT = "gemini-pro"
 _GEMINI_1P5_PRO_PREVIEW_ENDPOINT = "gemini-1.5-pro-preview-0514"
 _GEMINI_1P5_PRO_FLASH_PREVIEW_ENDPOINT = "gemini-1.5-flash-preview-0514"
 _GEMINI_1P5_PRO_001_ENDPOINT = "gemini-1.5-pro-001"
@@ -67,8 +51,9 @@
 _GEMINI_1P5_FLASH_001_ENDPOINT = "gemini-1.5-flash-001"
 _GEMINI_1P5_FLASH_002_ENDPOINT = "gemini-1.5-flash-002"
 _GEMINI_2_FLASH_EXP_ENDPOINT = "gemini-2.0-flash-exp"
+_GEMINI_2_FLASH_001_ENDPOINT = "gemini-2.0-flash-001"
+_GEMINI_2_FLASH_LITE_001_ENDPOINT = "gemini-2.0-flash-lite-001"
 _GEMINI_ENDPOINTS = (
-    _GEMINI_PRO_ENDPOINT,
     _GEMINI_1P5_PRO_PREVIEW_ENDPOINT,
     _GEMINI_1P5_PRO_FLASH_PREVIEW_ENDPOINT,
     _GEMINI_1P5_PRO_001_ENDPOINT,
@@ -76,6 +61,8 @@
     _GEMINI_1P5_FLASH_001_ENDPOINT,
     _GEMINI_1P5_FLASH_002_ENDPOINT,
     _GEMINI_2_FLASH_EXP_ENDPOINT,
+    _GEMINI_2_FLASH_001_ENDPOINT,
+    _GEMINI_2_FLASH_LITE_001_ENDPOINT,
 )
 _GEMINI_PREVIEW_ENDPOINTS = (
     _GEMINI_1P5_PRO_PREVIEW_ENDPOINT,
@@ -83,7 +70,6 @@
     _GEMINI_2_FLASH_EXP_ENDPOINT,
 )
 _GEMINI_FINE_TUNE_SCORE_ENDPOINTS = (
-    _GEMINI_PRO_ENDPOINT,
     _GEMINI_1P5_PRO_002_ENDPOINT,
     _GEMINI_1P5_FLASH_002_ENDPOINT,
 )
@@ -108,7 +94,6 @@
 
 
 _ML_GENERATE_TEXT_STATUS = "ml_generate_text_status"
-_ML_EMBED_TEXT_STATUS = "ml_embed_text_status"
 _ML_GENERATE_EMBEDDING_STATUS = "ml_generate_embedding_status"
 
 _MODEL_NOT_SUPPORTED_WARNING = (
@@ -118,514 +103,7 @@
     "You should use this model name only if you are sure that it is supported in BigQuery."
 )
 
-
-@typing_extensions.deprecated(
-    "PaLM2TextGenerator is going to be deprecated. Use GeminiTextGenerator(https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. ",
-    category=exceptions.ApiDeprecationWarning,
-)
-@log_adapter.class_logger
-class PaLM2TextGenerator(base.BaseEstimator):
-    """PaLM2 text generator LLM model.
-
-    .. note::
-        PaLM2TextGenerator is going to be deprecated. Use GeminiTextGenerator(https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead.
-
-    Args:
-        model_name (str, Default to "text-bison"):
-            The model for natural language tasks. “text-bison” returns model fine-tuned to follow natural language instructions
-            and is suitable for a variety of language tasks. "text-bison-32k" supports up to 32k tokens per request.
-            Default to "text-bison".
-        session (bigframes.Session or None):
-            BQ session to create the model. If None, use the global default session.
-        connection_name (str or None):
-            Connection to connect with remote service. str of the format <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>.
-            If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach
-            permission if the connection isn't fully set up.
-        max_iterations (Optional[int], Default to 300):
-            The number of steps to run when performing supervised tuning.
-    """
-
-    def __init__(
-        self,
-        *,
-        model_name: Literal["text-bison", "text-bison-32k"] = "text-bison",
-        session: Optional[bigframes.Session] = None,
-        connection_name: Optional[str] = None,
-        max_iterations: int = 300,
-    ):
-        self.model_name = model_name
-        self.session = session or global_session.get_global_session()
-        self.max_iterations = max_iterations
-        self._bq_connection_manager = self.session.bqconnectionmanager
-
-        connection_name = connection_name or self.session._bq_connection
-        self.connection_name = clients.resolve_full_bq_connection_name(
-            connection_name,
-            default_project=self.session._project,
-            default_location=self.session._location,
-        )
-
-        self._bqml_model_factory = globals.bqml_model_factory()
-        self._bqml_model: core.BqmlModel = self._create_bqml_model()
-
-    def _create_bqml_model(self):
-        # Parse and create connection if needed.
-        if not self.connection_name:
-            raise ValueError(
-                "Must provide connection_name, either in constructor or through session options."
-            )
-
-        if self._bq_connection_manager:
-            connection_name_parts = self.connection_name.split(".")
-            if len(connection_name_parts) != 3:
-                raise ValueError(
-                    f"connection_name must be of the format <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>, got {self.connection_name}."
-                )
-            self._bq_connection_manager.create_bq_connection(
-                project_id=connection_name_parts[0],
-                location=connection_name_parts[1],
-                connection_id=connection_name_parts[2],
-                iam_role="aiplatform.user",
-            )
-
-        if self.model_name not in _TEXT_GENERATOR_ENDPOINTS:
-            msg = exceptions.format_message(
-                _MODEL_NOT_SUPPORTED_WARNING.format(
-                    model_name=self.model_name,
-                    known_models=", ".join(_TEXT_GENERATOR_ENDPOINTS),
-                )
-            )
-            warnings.warn(msg)
-
-        options = {
-            "endpoint": self.model_name,
-        }
-
-        return self._bqml_model_factory.create_remote_model(
-            session=self.session, connection_name=self.connection_name, options=options
-        )
-
-    @classmethod
-    def _from_bq(
-        cls, session: bigframes.Session, bq_model: bigquery.Model
-    ) -> PaLM2TextGenerator:
-        assert bq_model.model_type == "MODEL_TYPE_UNSPECIFIED"
-        assert "remoteModelInfo" in bq_model._properties
-        assert "endpoint" in bq_model._properties["remoteModelInfo"]
-        assert "connection" in bq_model._properties["remoteModelInfo"]
-
-        # Parse the remote model endpoint
-        bqml_endpoint = bq_model._properties["remoteModelInfo"]["endpoint"]
-        model_connection = bq_model._properties["remoteModelInfo"]["connection"]
-        model_endpoint = bqml_endpoint.split("/")[-1]
-
-        kwargs = utils.retrieve_params_from_bq_model(
-            cls, bq_model, _BQML_PARAMS_MAPPING
-        )
-
-        model = cls(
-            **kwargs,
-            session=session,
-            model_name=model_endpoint,
-            connection_name=model_connection,
-        )
-        model._bqml_model = core.BqmlModel(session, bq_model)
-        return model
-
-    @property
-    def _bqml_options(self) -> dict:
-        """The model options as they will be set for BQML"""
-        options = {
-            "max_iterations": self.max_iterations,
-            "data_split_method": "NO_SPLIT",
-        }
-        return options
-
-    def fit(
-        self,
-        X: utils.ArrayType,
-        y: utils.ArrayType,
-    ) -> PaLM2TextGenerator:
-        """Fine tune PaLM2TextGenerator model.
-
-        .. note::
-
-            This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
-            Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
-            and might have limited support. For more information, see the launch stage descriptions
-            (https://cloud.google.com/products#product-launch-stages).
-
-        Args:
-            X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
-                DataFrame of shape (n_samples, n_features). Training data.
-            y (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
-                Training labels.
-
-        Returns:
-            PaLM2TextGenerator: Fitted estimator.
-        """
-        X, y = utils.batch_convert_to_dataframe(X, y)
-
-        options = self._bqml_options
-        options["endpoint"] = self.model_name + "@001"
-        options["prompt_col"] = X.columns.tolist()[0]
-
-        self._bqml_model = self._bqml_model_factory.create_llm_remote_model(
-            X,
-            y,
-            options=options,
-            connection_name=self.connection_name,
-        )
-        return self
-
-    def predict(
-        self,
-        X: utils.ArrayType,
-        *,
-        temperature: float = 0.0,
-        max_output_tokens: int = 128,
-        top_k: int = 40,
-        top_p: float = 0.95,
-    ) -> bigframes.dataframe.DataFrame:
-        """Predict the result from input DataFrame.
-
-        Args:
-            X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
-                Input DataFrame or Series, can contain one or more columns. If multiple columns are in the DataFrame, it must contain a "prompt" column for prediction.
-                Prompts can include preamble, questions, suggestions, instructions, or examples.
-
-            temperature (float, default 0.0):
-                The temperature is used for sampling during the response generation, which occurs when topP and topK are applied.
-                Temperature controls the degree of randomness in token selection. Lower temperatures are good for prompts that expect a true or correct response,
-                while higher temperatures can lead to more diverse or unexpected results. A temperature of 0 is deterministic:
-                the highest probability token is always selected. For most use cases, try starting with a temperature of 0.2.
-                Default 0. Possible values [0.0, 1.0].
-
-            max_output_tokens (int, default 128):
-                Maximum number of tokens that can be generated in the response. Specify a lower value for shorter responses and a higher value for longer responses.
-                A token may be smaller than a word. A token is approximately four characters. 100 tokens correspond to roughly 60-80 words.
-                Default 128. For the 'text-bison' model, possible values are in the range [1, 1024]. For the 'text-bison-32k' model, possible values are in the range [1, 8192].
-                Please ensure that the specified value for max_output_tokens is within the appropriate range for the model being used.
-
-            top_k (int, default 40):
-                Top-k changes how the model selects tokens for output. A top-k of 1 means the selected token is the most probable among all tokens
-                in the model's vocabulary (also called greedy decoding), while a top-k of 3 means that the next token is selected from among the 3 most probable tokens (using temperature).
-                For each token selection step, the top K tokens with the highest probabilities are sampled. Then tokens are further filtered based on topP with the final token selected using temperature sampling.
-                Specify a lower value for less random responses and a higher value for more random responses.
-                Default 40. Possible values [1, 40].
-
-            top_p (float, default 0.95)::
-                Top-p changes how the model selects tokens for output. Tokens are selected from most K (see topK parameter) probable to least until the sum of their probabilities equals the top-p value.
-                For example, if tokens A, B, and C have a probability of 0.3, 0.2, and 0.1 and the top-p value is 0.5, then the model will select either A or B as the next token (using temperature)
-                and not consider C at all.
-                Specify a lower value for less random responses and a higher value for more random responses.
-                Default 0.95. Possible values [0.0, 1.0].
-
-
-        Returns:
-            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
-        """
-
-        # Params reference: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
-        if temperature < 0.0 or temperature > 1.0:
-            raise ValueError(f"temperature must be [0.0, 1.0], but is {temperature}.")
-
-        if (
-            self.model_name == _TEXT_GENERATOR_BISON_ENDPOINT
-            and max_output_tokens not in range(1, 1025)
-        ):
-            raise ValueError(
-                f"max_output_token must be [1, 1024] for TextBison model, but is {max_output_tokens}."
-            )
-
-        if (
-            self.model_name == _TEXT_GENERATOR_BISON_32K_ENDPOINT
-            and max_output_tokens not in range(1, 8193)
-        ):
-            raise ValueError(
-                f"max_output_token must be [1, 8192] for TextBison 32k model, but is {max_output_tokens}."
-            )
-
-        if top_k not in range(1, 41):
-            raise ValueError(f"top_k must be [1, 40], but is {top_k}.")
-
-        if top_p < 0.0 or top_p > 1.0:
-            raise ValueError(f"top_p must be [0.0, 1.0], but is {top_p}.")
-
-        (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session)
-
-        if len(X.columns) == 1:
-            # BQML identified the column by name
-            col_label = cast(blocks.Label, X.columns[0])
-            X = X.rename(columns={col_label: "prompt"})
-
-        options = {
-            "temperature": temperature,
-            "max_output_tokens": max_output_tokens,
-            "top_k": top_k,
-            "top_p": top_p,
-            "flatten_json_output": True,
-        }
-
-        df = self._bqml_model.generate_text(X, options)
-
-        if (df[_ML_GENERATE_TEXT_STATUS] != "").any():
-            msg = exceptions.format_message(
-                f"Some predictions failed. Check column {_ML_GENERATE_TEXT_STATUS} for "
-                "detailed status. You may want to filter the failed rows and retry."
-            )
-            warnings.warn(msg, category=RuntimeWarning)
-
-        return df
-
-    def score(
-        self,
-        X: utils.ArrayType,
-        y: utils.ArrayType,
-        task_type: Literal[
-            "text_generation", "classification", "summarization", "question_answering"
-        ] = "text_generation",
-    ) -> bigframes.dataframe.DataFrame:
-        """Calculate evaluation metrics of the model.
-
-        .. note::
-
-            This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
-            Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
-            and might have limited support. For more information, see the launch stage descriptions
-            (https://cloud.google.com/products#product-launch-stages).
-
-        .. note::
-
-            Output matches that of the BigQuery ML.EVALUATE function.
-            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#remote-model-llm
-            for the outputs relevant to this model type.
-
-        Args:
-            X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
-                A BigQuery DataFrame as evaluation data, which contains only one column of input_text
-                that contains the prompt text to use when evaluating the model.
-            y (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
-                A BigQuery DataFrame as evaluation labels, which contains only one column of output_text
-                that you would expect to be returned by the model.
-            task_type (str):
-                The type of the task for LLM model. Default to "text_generation".
-                Possible values: "text_generation", "classification", "summarization", and "question_answering".
-
-        Returns:
-            bigframes.dataframe.DataFrame: The DataFrame as evaluation result.
-        """
-        if not self._bqml_model:
-            raise RuntimeError("A model must be fitted before score")
-
-        X, y = utils.batch_convert_to_dataframe(X, y, session=self._bqml_model.session)
-
-        if len(X.columns) != 1 or len(y.columns) != 1:
-            raise ValueError(
-                f"Only support one column as input for X and y. {constants.FEEDBACK_LINK}"
-            )
-
-        # BQML identified the column by name
-        X_col_label = cast(blocks.Label, X.columns[0])
-        y_col_label = cast(blocks.Label, y.columns[0])
-        X = X.rename(columns={X_col_label: "input_text"})
-        y = y.rename(columns={y_col_label: "output_text"})
-
-        input_data = X.join(y, how="outer")
-
-        return self._bqml_model.llm_evaluate(input_data, task_type)
-
-    def to_gbq(self, model_name: str, replace: bool = False) -> PaLM2TextGenerator:
-        """Save the model to BigQuery.
-
-        Args:
-            model_name (str):
-                The name of the model.
-            replace (bool, default False):
-                Determine whether to replace if the model already exists. Default to False.
-
-        Returns:
-            PaLM2TextGenerator: Saved model."""
-
-        new_model = self._bqml_model.copy(model_name, replace)
-        return new_model.session.read_gbq_model(model_name)
-
-
-@typing_extensions.deprecated(
-    "PaLM2TextEmbeddingGenerator has been deprecated. Use TextEmbeddingGenerator(https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator) instead. ",
-    category=exceptions.ApiDeprecationWarning,
-)
-@log_adapter.class_logger
-class PaLM2TextEmbeddingGenerator(base.BaseEstimator):
-    """PaLM2 text embedding generator LLM model.
-
-    .. note::
-        PaLM2TextEmbeddingGenerator has been deprecated. Use TextEmbeddingGenerator(https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator) instead.
-
-
-    Args:
-        model_name (str, Default to "textembedding-gecko"):
-            The model for text embedding. “textembedding-gecko” returns model embeddings for text inputs.
-            "textembedding-gecko-multilingual" returns model embeddings for text inputs which support over 100 languages.
-            Default to "textembedding-gecko".
-        version (str or None):
-            Model version. Accepted values are "001", "002", "003", "latest" etc. Will use the default version if unset.
-            See https://cloud.google.com/vertex-ai/docs/generative-ai/learn/model-versioning for details.
-        session (bigframes.Session or None):
-            BQ session to create the model. If None, use the global default session.
-        connection_name (str or None):
-            Connection to connect with remote service. str of the format <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>.
-            If None, use default connection in session context.
-    """
-
-    def __init__(
-        self,
-        *,
-        model_name: Literal[
-            "textembedding-gecko", "textembedding-gecko-multilingual"
-        ] = "textembedding-gecko",
-        version: Optional[str] = None,
-        session: Optional[bigframes.Session] = None,
-        connection_name: Optional[str] = None,
-    ):
-        self.model_name = model_name
-        self.version = version
-        self.session = session or global_session.get_global_session()
-        self._bq_connection_manager = self.session.bqconnectionmanager
-
-        connection_name = connection_name or self.session._bq_connection
-        self.connection_name = clients.resolve_full_bq_connection_name(
-            connection_name,
-            default_project=self.session._project,
-            default_location=self.session._location,
-        )
-
-        self._bqml_model_factory = globals.bqml_model_factory()
-        self._bqml_model: core.BqmlModel = self._create_bqml_model()
-
-    def _create_bqml_model(self):
-        # Parse and create connection if needed.
-        if not self.connection_name:
-            raise ValueError(
-                "Must provide connection_name, either in constructor or through session options."
-            )
-
-        if self._bq_connection_manager:
-            connection_name_parts = self.connection_name.split(".")
-            if len(connection_name_parts) != 3:
-                raise ValueError(
-                    f"connection_name must be of the format <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>, got {self.connection_name}."
-                )
-            self._bq_connection_manager.create_bq_connection(
-                project_id=connection_name_parts[0],
-                location=connection_name_parts[1],
-                connection_id=connection_name_parts[2],
-                iam_role="aiplatform.user",
-            )
-
-        if self.model_name not in _PALM2_EMBEDDING_GENERATOR_ENDPOINTS:
-            msg = exceptions.format_message(
-                _MODEL_NOT_SUPPORTED_WARNING.format(
-                    model_name=self.model_name,
-                    known_models=", ".join(_PALM2_EMBEDDING_GENERATOR_ENDPOINTS),
-                )
-            )
-            warnings.warn(msg)
-
-        endpoint = (
-            self.model_name + "@" + self.version if self.version else self.model_name
-        )
-        options = {
-            "endpoint": endpoint,
-        }
-        return self._bqml_model_factory.create_remote_model(
-            session=self.session, connection_name=self.connection_name, options=options
-        )
-
-    @classmethod
-    def _from_bq(
-        cls, session: bigframes.Session, bq_model: bigquery.Model
-    ) -> PaLM2TextEmbeddingGenerator:
-        assert bq_model.model_type == "MODEL_TYPE_UNSPECIFIED"
-        assert "remoteModelInfo" in bq_model._properties
-        assert "endpoint" in bq_model._properties["remoteModelInfo"]
-        assert "connection" in bq_model._properties["remoteModelInfo"]
-
-        # Parse the remote model endpoint
-        bqml_endpoint = bq_model._properties["remoteModelInfo"]["endpoint"]
-        model_connection = bq_model._properties["remoteModelInfo"]["connection"]
-        model_endpoint = bqml_endpoint.split("/")[-1]
-
-        model_name, version = utils.parse_model_endpoint(model_endpoint)
-
-        model = cls(
-            session=session,
-            # str to literals
-            model_name=model_name,  # type: ignore
-            version=version,
-            connection_name=model_connection,
-        )
-
-        model._bqml_model = core.BqmlModel(session, bq_model)
-        return model
-
-    def predict(self, X: utils.ArrayType) -> bigframes.dataframe.DataFrame:
-        """Predict the result from input DataFrame.
-
-        Args:
-            X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
-                Input DataFrame or Series, can contain one or more columns. If multiple columns are in the DataFrame, it must contain a "content" column for prediction.
-
-        Returns:
-            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
-        """
-
-        # Params reference: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
-        (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session)
-
-        if len(X.columns) == 1:
-            # BQML identified the column by name
-            col_label = cast(blocks.Label, X.columns[0])
-            X = X.rename(columns={col_label: "content"})
-
-        options = {
-            "flatten_json_output": True,
-        }
-
-        df = self._bqml_model.generate_embedding(X, options)
-        df = df.rename(
-            columns={
-                "ml_generate_embedding_result": "text_embedding",
-                "ml_generate_embedding_statistics": "statistics",
-                "ml_generate_embedding_status": _ML_EMBED_TEXT_STATUS,
-            }
-        )
-
-        if (df[_ML_EMBED_TEXT_STATUS] != "").any():
-            msg = exceptions.format_message(
-                f"Some predictions failed. Check column {_ML_EMBED_TEXT_STATUS} for "
-                "detailed status. You may want to filter the failed rows and retry."
-            )
-            warnings.warn(msg, category=RuntimeWarning)
-
-        return df
-
-    def to_gbq(
-        self, model_name: str, replace: bool = False
-    ) -> PaLM2TextEmbeddingGenerator:
-        """Save the model to BigQuery.
-
-        Args:
-            model_name (str):
-                The name of the model.
-            replace (bool, default False):
-                Determine whether to replace if the model already exists. Default to False.
-
-        Returns:
-            PaLM2TextEmbeddingGenerator: Saved model."""
-
-        new_model = self._bqml_model.copy(model_name, replace)
-        return new_model.session.read_gbq_model(model_name)
+_REMOVE_DEFAULT_MODEL_WARNING = "Since upgrading the default model can cause unintended breakages, the default model will be removed in BigFrames 3.0. Please supply an explicit model to avoid this message."
 
 
 @log_adapter.class_logger
@@ -637,7 +115,8 @@ class TextEmbeddingGenerator(base.RetriableRemotePredictor):
             The model for text embedding. Possible values are "text-embedding-005", "text-embedding-004"
             or "text-multilingual-embedding-002". text-embedding models returns model embeddings for text inputs.
             text-multilingual-embedding models returns model embeddings for text inputs which support over 100 languages.
-            Default to "text-embedding-004".
+            If no setting is provided, "text-embedding-004" will be used by
+            default and a warning will be issued.
         session (bigframes.Session or None):
             BQ session to create the model. If None, use the global default session.
         connection_name (str or None):
@@ -648,14 +127,20 @@ class TextEmbeddingGenerator(base.RetriableRemotePredictor):
     def __init__(
         self,
         *,
-        model_name: Literal[
-            "text-embedding-005",
-            "text-embedding-004",
-            "text-multilingual-embedding-002",
-        ] = "text-embedding-004",
+        model_name: Optional[
+            Literal[
+                "text-embedding-005",
+                "text-embedding-004",
+                "text-multilingual-embedding-002",
+            ]
+        ] = None,
         session: Optional[bigframes.Session] = None,
         connection_name: Optional[str] = None,
     ):
+        if model_name is None:
+            model_name = "text-embedding-004"
+            msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
+            warnings.warn(msg, category=FutureWarning, stacklevel=2)
         self.model_name = model_name
         self.session = session or global_session.get_global_session()
         self.connection_name = connection_name
@@ -780,7 +265,8 @@ class MultimodalEmbeddingGenerator(base.RetriableRemotePredictor):
     Args:
         model_name (str, Default to "multimodalembedding@001"):
             The model for multimodal embedding. Can set to "multimodalembedding@001". Multimodal-embedding models returns model embeddings for text, image and video inputs.
-            Default to "multimodalembedding@001".
+            If no setting is provided, "multimodalembedding@001" will be used by
+            default and a warning will be issued.
         session (bigframes.Session or None):
             BQ session to create the model. If None, use the global default session.
         connection_name (str or None):
@@ -791,12 +277,16 @@ class MultimodalEmbeddingGenerator(base.RetriableRemotePredictor):
     def __init__(
         self,
         *,
-        model_name: Literal["multimodalembedding@001"] = "multimodalembedding@001",
+        model_name: Optional[Literal["multimodalembedding@001"]] = None,
         session: Optional[bigframes.Session] = None,
         connection_name: Optional[str] = None,
     ):
         if not bigframes.options.experiments.blob:
             raise NotImplementedError()
+        if model_name is None:
+            model_name = "multimodalembedding@001"
+            msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
+            warnings.warn(msg, category=FutureWarning, stacklevel=2)
         self.model_name = model_name
         self.session = session or global_session.get_global_session()
         self.connection_name = connection_name
@@ -918,23 +408,24 @@ def to_gbq(
         return new_model.session.read_gbq_model(model_name)
 
 
-@typing_extensions.deprecated(
-    "gemini-pro and gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. ",
-    category=exceptions.ApiDeprecationWarning,
-)
 @log_adapter.class_logger
 class GeminiTextGenerator(base.RetriableRemotePredictor):
     """Gemini text generator LLM model.
 
     .. note::
-        gemini-pro and gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead.
+        gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead.
 
     Args:
-        model_name (str, Default to "gemini-pro"):
-            The model for natural language tasks. Accepted values are "gemini-pro", "gemini-1.5-pro-preview-0514", "gemini-1.5-flash-preview-0514", "gemini-1.5-pro-001", "gemini-1.5-pro-002", "gemini-1.5-flash-001", "gemini-1.5-flash-002" and "gemini-2.0-flash-exp". Default to "gemini-pro".
+        model_name (str, Default to "gemini-2.0-flash-001"):
+            The model for natural language tasks. Accepted values are
+            "gemini-1.5-pro-preview-0514", "gemini-1.5-flash-preview-0514",
+            "gemini-1.5-pro-001", "gemini-1.5-pro-002", "gemini-1.5-flash-001",
+            "gemini-1.5-flash-002", "gemini-2.0-flash-exp",
+            "gemini-2.0-flash-lite-001", and "gemini-2.0-flash-001".
+            If no setting is provided, "gemini-2.0-flash-001" will be used by
+            default and a warning will be issued.
 
         .. note::
-            "gemini-pro" is going to be deprecated. Bigframes 2 will transition to using gemini-2.0-X.
             "gemini-2.0-flash-exp", "gemini-1.5-pro-preview-0514" and "gemini-1.5-flash-preview-0514" is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
             Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
             and might have limited support. For more information, see the launch stage descriptions
@@ -953,16 +444,19 @@ class GeminiTextGenerator(base.RetriableRemotePredictor):
     def __init__(
         self,
         *,
-        model_name: Literal[
-            "gemini-pro",
-            "gemini-1.5-pro-preview-0514",
-            "gemini-1.5-flash-preview-0514",
-            "gemini-1.5-pro-001",
-            "gemini-1.5-pro-002",
-            "gemini-1.5-flash-001",
-            "gemini-1.5-flash-002",
-            "gemini-2.0-flash-exp",
-        ] = "gemini-pro",
+        model_name: Optional[
+            Literal[
+                "gemini-1.5-pro-preview-0514",
+                "gemini-1.5-flash-preview-0514",
+                "gemini-1.5-pro-001",
+                "gemini-1.5-pro-002",
+                "gemini-1.5-flash-001",
+                "gemini-1.5-flash-002",
+                "gemini-2.0-flash-exp",
+                "gemini-2.0-flash-001",
+                "gemini-2.0-flash-lite-001",
+            ]
+        ] = None,
         session: Optional[bigframes.Session] = None,
         connection_name: Optional[str] = None,
         max_iterations: int = 300,
@@ -977,6 +471,10 @@ def __init__(
                 "(https://cloud.google.com/products#product-launch-stages)."
             )
             warnings.warn(msg, category=exceptions.PreviewWarning)
+        if model_name is None:
+            model_name = "gemini-2.0-flash-001"
+            msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
+            warnings.warn(msg, category=FutureWarning, stacklevel=2)
         self.model_name = model_name
         self.session = session or global_session.get_global_session()
         self.max_iterations = max_iterations
@@ -1052,8 +550,8 @@ def fit(
         X: utils.ArrayType,
         y: utils.ArrayType,
     ) -> GeminiTextGenerator:
-        """Fine tune GeminiTextGenerator model. Only support "gemini-pro", "gemini-1.5-pro-002",
-           "gemini-1.5-flash-002" models for now.
+        """Fine tune GeminiTextGenerator model. Only support "gemini-1.5-pro-002",
+           and "gemini-1.5-flash-002" models for now.
 
         .. note::
 
@@ -1073,16 +571,13 @@ def fit(
         """
         if self.model_name not in _GEMINI_FINE_TUNE_SCORE_ENDPOINTS:
             raise NotImplementedError(
-                "fit() only supports gemini-pro, \
-                    gemini-1.5-pro-002, or gemini-1.5-flash-002 model."
+                "fit() only supports gemini-1.5-pro-002, or gemini-1.5-flash-002 model."
             )
 
         X, y = utils.batch_convert_to_dataframe(X, y)
 
         options = self._bqml_options
-        options["endpoint"] = (
-            "gemini-1.0-pro-002" if self.model_name == "gemini-pro" else self.model_name
-        )
+        options["endpoint"] = self.model_name
         options["prompt_col"] = X.columns.tolist()[0]
 
         self._bqml_model = self._bqml_model_factory.create_llm_remote_model(
@@ -1231,7 +726,8 @@ def score(
             "text_generation", "classification", "summarization", "question_answering"
         ] = "text_generation",
     ) -> bigframes.dataframe.DataFrame:
-        """Calculate evaluation metrics of the model. Only support "gemini-pro" and "gemini-1.5-pro-002", and "gemini-1.5-flash-002".
+        """Calculate evaluation metrics of the model. Only support
+            "gemini-1.5-pro-002", and "gemini-1.5-flash-002".
 
         .. note::
 
@@ -1265,8 +761,7 @@ def score(
 
         if self.model_name not in _GEMINI_FINE_TUNE_SCORE_ENDPOINTS:
             raise NotImplementedError(
-                "score() only supports gemini-pro \
-                , gemini-1.5-pro-002, and gemini-1.5-flash-2 model."
+                "score() only supports gemini-1.5-pro-002, and gemini-1.5-flash-2 model."
             )
 
         X, y = utils.batch_convert_to_dataframe(X, y, session=self._bqml_model.session)
@@ -1329,7 +824,8 @@ class Claude3TextGenerator(base.RetriableRemotePredictor):
             "claude-3-5-sonnet" is Anthropic's most powerful AI model and maintains the speed and cost of Claude 3 Sonnet, which is a mid-tier model.
             "claude-3-opus" is Anthropic's second-most powerful AI model, with strong performance on highly complex tasks.
             https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude#available-claude-models
-            Default to "claude-3-sonnet".
+            If no setting is provided, "claude-3-sonnet" will be used by default
+            and a warning will be issued.
         session (bigframes.Session or None):
             BQ session to create the model. If None, use the global default session.
         connection_name (str or None):
@@ -1341,12 +837,21 @@ class Claude3TextGenerator(base.RetriableRemotePredictor):
     def __init__(
         self,
         *,
-        model_name: Literal[
-            "claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"
-        ] = "claude-3-sonnet",
+        model_name: Optional[
+            Literal[
+                "claude-3-sonnet",
+                "claude-3-haiku",
+                "claude-3-5-sonnet",
+                "claude-3-opus",
+            ]
+        ] = None,
         session: Optional[bigframes.Session] = None,
         connection_name: Optional[str] = None,
     ):
+        if model_name is None:
+            model_name = "claude-3-sonnet"
+            msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
+            warnings.warn(msg, category=FutureWarning, stacklevel=2)
         self.model_name = model_name
         self.session = session or global_session.get_global_session()
         self.connection_name = connection_name
diff --git a/bigframes/ml/loader.py b/bigframes/ml/loader.py
index eef72584bc..83c665a50b 100644
--- a/bigframes/ml/loader.py
+++ b/bigframes/ml/loader.py
@@ -42,6 +42,7 @@
         "LINEAR_REGRESSION": linear_model.LinearRegression,
         "LOGISTIC_REGRESSION": linear_model.LogisticRegression,
         "KMEANS": cluster.KMeans,
+        "MATRIX_FACTORIZATION": decomposition.MatrixFactorization,
         "PCA": decomposition.PCA,
         "BOOSTED_TREE_REGRESSOR": ensemble.XGBRegressor,
         "BOOSTED_TREE_CLASSIFIER": ensemble.XGBClassifier,
@@ -56,11 +57,6 @@
 
 _BQML_ENDPOINT_TYPE_MAPPING = MappingProxyType(
     {
-        llm._TEXT_GENERATOR_BISON_ENDPOINT: llm.PaLM2TextGenerator,
-        llm._TEXT_GENERATOR_BISON_32K_ENDPOINT: llm.PaLM2TextGenerator,
-        llm._EMBEDDING_GENERATOR_GECKO_ENDPOINT: llm.PaLM2TextEmbeddingGenerator,
-        llm._EMBEDDING_GENERATOR_GECKO_MULTILINGUAL_ENDPOINT: llm.PaLM2TextEmbeddingGenerator,
-        llm._GEMINI_PRO_ENDPOINT: llm.GeminiTextGenerator,
         llm._GEMINI_1P5_PRO_PREVIEW_ENDPOINT: llm.GeminiTextGenerator,
         llm._GEMINI_1P5_PRO_FLASH_PREVIEW_ENDPOINT: llm.GeminiTextGenerator,
         llm._GEMINI_1P5_PRO_001_ENDPOINT: llm.GeminiTextGenerator,
@@ -68,6 +64,8 @@
         llm._GEMINI_1P5_FLASH_001_ENDPOINT: llm.GeminiTextGenerator,
         llm._GEMINI_1P5_FLASH_002_ENDPOINT: llm.GeminiTextGenerator,
         llm._GEMINI_2_FLASH_EXP_ENDPOINT: llm.GeminiTextGenerator,
+        llm._GEMINI_2_FLASH_001_ENDPOINT: llm.GeminiTextGenerator,
+        llm._GEMINI_2_FLASH_LITE_001_ENDPOINT: llm.GeminiTextGenerator,
         llm._CLAUDE_3_HAIKU_ENDPOINT: llm.Claude3TextGenerator,
         llm._CLAUDE_3_SONNET_ENDPOINT: llm.Claude3TextGenerator,
         llm._CLAUDE_3_5_SONNET_ENDPOINT: llm.Claude3TextGenerator,
@@ -83,6 +81,7 @@
 def from_bq(
     session: bigframes.session.Session, bq_model: bigquery.Model
 ) -> Union[
+    decomposition.MatrixFactorization,
     decomposition.PCA,
     cluster.KMeans,
     linear_model.LinearRegression,
@@ -95,8 +94,6 @@ def from_bq(
     imported.TensorFlowModel,
     imported.ONNXModel,
     imported.XGBoostModel,
-    llm.PaLM2TextGenerator,
-    llm.PaLM2TextEmbeddingGenerator,
     llm.Claude3TextGenerator,
     llm.TextEmbeddingGenerator,
     llm.MultimodalEmbeddingGenerator,
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index e89f17bcaa..a756fac3b9 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -299,6 +299,11 @@ def alter_model(
         return "\n".join(parts)
 
     # ML prediction TVFs
+    def ml_recommend(self, source_sql: str) -> str:
+        """Encode ML.RECOMMEND for BQML"""
+        return f"""SELECT * FROM ML.RECOMMEND(MODEL {self._model_ref_sql()},
+  ({source_sql}))"""
+
     def ml_predict(self, source_sql: str) -> str:
         """Encode ML.PREDICT for BQML"""
         return f"""SELECT * FROM ML.PREDICT(MODEL {self._model_ref_sql()},
diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py
index d25791d3e4..e3f15e67a1 100644
--- a/bigframes/operations/aggregations.py
+++ b/bigframes/operations/aggregations.py
@@ -340,15 +340,17 @@ class CutOp(UnaryWindowOp):
     # TODO: Unintuitive, refactor into multiple ops?
     bins: typing.Union[int, Iterable]
     right: Optional[bool]
-    labels: Optional[bool]
+    labels: typing.Union[bool, Iterable[str], None]
 
     @property
     def skips_nulls(self):
         return False
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
-        if isinstance(self.bins, int) and (self.labels is False):
+        if self.labels is False:
             return dtypes.INT_DTYPE
+        elif isinstance(self.labels, Iterable):
+            return dtypes.STRING_DTYPE
         else:
             # Assumption: buckets use same numeric type
             if isinstance(self.bins, int):
diff --git a/bigframes/operations/ai.py b/bigframes/operations/ai.py
index 0ff92187cf..9d73fd43c1 100644
--- a/bigframes/operations/ai.py
+++ b/bigframes/operations/ai.py
@@ -46,7 +46,7 @@ def filter(self, instruction: str, model, ground_with_google_search: bool = Fals
             >>> bpd.options.compute.ai_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
             >>> df = bpd.DataFrame({"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]})
             >>> df.ai.filter("{city} is the capital of {country}", model)
@@ -160,7 +160,7 @@ def map(
             >>> bpd.options.compute.ai_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
             >>> df = bpd.DataFrame({"ingredient_1": ["Burger Bun", "Soy Bean"], "ingredient_2": ["Beef Patty", "Bittern"]})
             >>> df.ai.map("What is the food made from {ingredient_1} and {ingredient_2}? One word only.", output_column="food", model=model)
@@ -283,7 +283,7 @@ def join(
             >>> bpd.options.compute.ai_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
             >>> cities = bpd.DataFrame({'city': ['Seattle', 'Ottawa', 'Berlin', 'Shanghai', 'New Delhi']})
             >>> continents = bpd.DataFrame({'continent': ['North America', 'Africa', 'Asia']})
@@ -525,7 +525,7 @@ def top_k(
             >>> bpd.options.compute.ai_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
             >>> df = bpd.DataFrame(
             ... {
diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index b4fae68a4f..54078557ed 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -297,16 +297,16 @@ def _resolve_connection(self, connection: Optional[str] = None) -> str:
             ValueError: If the connection cannot be resolved to a valid string.
         """
         connection = connection or self._block.session._bq_connection
-        return clients.resolve_full_bq_connection_name(
+        return clients.get_canonical_bq_connection_id(
             connection,
             default_project=self._block.session._project,
             default_location=self._block.session._location,
         )
 
-    def _get_runtime_json_str(
-        self, mode: str = "R", with_metadata: bool = False
+    def get_runtime_json_str(
+        self, mode: str = "R", *, with_metadata: bool = False
     ) -> bigframes.series.Series:
-        """Get the runtime and apply the ToJSONSTring transformation.
+        """Get the runtime (contains signed URL to access gcs data) and apply the ToJSONSTring transformation.
 
         .. note::
             BigFrames Blob is still under experiments. It may not work and
@@ -317,7 +317,7 @@ def _get_runtime_json_str(
                 Default to "R". Possible values are "R" (read-only) and
                 "RW" (read-write)
             with_metadata (bool, default False): whether to include metadata
-                in the JOSN string. Default to False.
+                in the JSON string. Default to False.
 
         Returns:
             str: the runtime object in the JSON string.
@@ -325,13 +325,6 @@ def _get_runtime_json_str(
         runtime = self._get_runtime(mode=mode, with_metadata=with_metadata)
         return runtime._apply_unary_op(ops.ToJSONString())
 
-    # TODO(b/404605969): remove cleanups when UDF fixes dataset deletion.
-    def _add_to_cleanup_set(self, udf):
-        """Add udf name to session cleanup set. Won't need this after UDF fixes dataset deletion."""
-        self.session._function_session._update_temp_artifacts(
-            udf.bigframes_bigquery_function, ""
-        )
-
     def image_blur(
         self,
         ksize: tuple[int, int],
@@ -365,7 +358,7 @@ def image_blur(
         import bigframes.blob._functions as blob_func
 
         connection = self._resolve_connection(connection)
-        df = self._get_runtime_json_str(mode="R").to_frame()
+        df = self.get_runtime_json_str(mode="R").to_frame()
 
         if dst is None:
             ext = self.uri().str.extract(FILE_EXT_REGEX)
@@ -404,7 +397,7 @@ def image_blur(
             container_memory=container_memory,
         ).udf()
 
-        dst_rt = dst.blob._get_runtime_json_str(mode="RW")
+        dst_rt = dst.blob.get_runtime_json_str(mode="RW")
 
         df = df.join(dst_rt, how="outer")
         df["ksize_x"], df["ksize_y"] = ksize
@@ -413,8 +406,6 @@ def image_blur(
         res = self._df_apply_udf(df, image_blur_udf)
         res.cache()  # to execute the udf
 
-        self._add_to_cleanup_set(image_blur_udf)
-
         return dst
 
     def image_resize(
@@ -461,7 +452,7 @@ def image_resize(
         import bigframes.blob._functions as blob_func
 
         connection = self._resolve_connection(connection)
-        df = self._get_runtime_json_str(mode="R").to_frame()
+        df = self.get_runtime_json_str(mode="R").to_frame()
 
         if dst is None:
             ext = self.uri().str.extract(FILE_EXT_REGEX)
@@ -501,7 +492,7 @@ def image_resize(
             container_memory=container_memory,
         ).udf()
 
-        dst_rt = dst.blob._get_runtime_json_str(mode="RW")
+        dst_rt = dst.blob.get_runtime_json_str(mode="RW")
 
         df = df.join(dst_rt, how="outer")
         df["dsize_x"], df["dsizye_y"] = dsize
@@ -511,8 +502,6 @@ def image_resize(
         res = self._df_apply_udf(df, image_resize_udf)
         res.cache()  # to execute the udf
 
-        self._add_to_cleanup_set(image_resize_udf)
-
         return dst
 
     def image_normalize(
@@ -552,7 +541,7 @@ def image_normalize(
         import bigframes.blob._functions as blob_func
 
         connection = self._resolve_connection(connection)
-        df = self._get_runtime_json_str(mode="R").to_frame()
+        df = self.get_runtime_json_str(mode="R").to_frame()
 
         if dst is None:
             ext = self.uri().str.extract(FILE_EXT_REGEX)
@@ -593,7 +582,7 @@ def image_normalize(
             container_memory=container_memory,
         ).udf()
 
-        dst_rt = dst.blob._get_runtime_json_str(mode="RW")
+        dst_rt = dst.blob.get_runtime_json_str(mode="RW")
 
         df = df.join(dst_rt, how="outer")
         df["alpha"] = alpha
@@ -604,8 +593,6 @@ def image_normalize(
         res = self._df_apply_udf(df, image_normalize_udf)
         res.cache()  # to execute the udf
 
-        self._add_to_cleanup_set(image_normalize_udf)
-
         return dst
 
     def pdf_extract(
@@ -657,13 +644,12 @@ def pdf_extract(
             container_memory=container_memory,
         ).udf()
 
-        src_rt = self._get_runtime_json_str(mode="R")
+        src_rt = self.get_runtime_json_str(mode="R")
 
         res = src_rt.apply(pdf_extract_udf)
 
         content_series = res._apply_unary_op(ops.JSONValue(json_path="$.content"))
 
-        self._add_to_cleanup_set(pdf_extract_udf)
         if verbose:
             status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
             res_df = bpd.DataFrame({"status": status_series, "content": content_series})
@@ -736,7 +722,7 @@ def pdf_chunk(
             container_memory=container_memory,
         ).udf()
 
-        src_rt = self._get_runtime_json_str(mode="R")
+        src_rt = self.get_runtime_json_str(mode="R")
         df = src_rt.to_frame()
         df["chunk_size"] = chunk_size
         df["overlap_size"] = overlap_size
@@ -744,7 +730,6 @@ def pdf_chunk(
         res = self._df_apply_udf(df, pdf_chunk_udf)
 
         content_series = bbq.json_extract_string_array(res, "$.content")
-        self._add_to_cleanup_set(pdf_chunk_udf)
         if verbose:
             status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
             res_df = bpd.DataFrame({"status": status_series, "content": content_series})
diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py
index d1089f993e..8c5c54e8ca 100644
--- a/bigframes/operations/semantics.py
+++ b/bigframes/operations/semantics.py
@@ -57,7 +57,7 @@ def agg(
             >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
             >>> df = bpd.DataFrame(
             ... {
@@ -326,7 +326,7 @@ def filter(self, instruction: str, model, ground_with_google_search: bool = Fals
             >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
             >>> df = bpd.DataFrame({"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]})
             >>> df.semantics.filter("{city} is the capital of {country}", model)
@@ -440,7 +440,7 @@ def map(
             >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
             >>> df = bpd.DataFrame({"ingredient_1": ["Burger Bun", "Soy Bean"], "ingredient_2": ["Beef Patty", "Bittern"]})
             >>> df.semantics.map("What is the food made from {ingredient_1} and {ingredient_2}? One word only.", output_column="food", model=model)
@@ -563,7 +563,7 @@ def join(
             >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
             >>> cities = bpd.DataFrame({'city': ['Seattle', 'Ottawa', 'Berlin', 'Shanghai', 'New Delhi']})
             >>> continents = bpd.DataFrame({'continent': ['North America', 'Africa', 'Asia']})
@@ -805,7 +805,7 @@ def top_k(
             >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
             >>> df = bpd.DataFrame(
             ... {
diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py
index 46d4344499..784af8418d 100644
--- a/bigframes/operations/strings.py
+++ b/bigframes/operations/strings.py
@@ -305,9 +305,7 @@ def to_blob(self, connection: Optional[str] = None) -> series.Series:
             raise NotImplementedError()
 
         session = self._block.session
-        connection = session._create_bq_connection(
-            connection=connection, iam_role="storage.objectUser"
-        )
+        connection = session._create_bq_connection(connection=connection)
         return self._apply_binary_op(connection, ops.obj_make_ref_op)
 
 
diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 730c287e1f..8e1e03e024 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -65,14 +65,19 @@
 
 
 def remote_function(
+    # Make sure that the input/output types, and dataset can be used
+    # positionally. This avoids the worst of the breaking change from 1.x to
+    # 2.x while still preventing possible mixups between consecutive str
+    # parameters.
     input_types: Union[None, type, Sequence[type]] = None,
     output_type: Optional[type] = None,
     dataset: Optional[str] = None,
+    *,
     bigquery_connection: Optional[str] = None,
     reuse: bool = True,
     name: Optional[str] = None,
     packages: Optional[Sequence[str]] = None,
-    cloud_function_service_account: Optional[str] = None,
+    cloud_function_service_account: str,
     cloud_function_kms_key_name: Optional[str] = None,
     cloud_function_docker_repository: Optional[str] = None,
     max_batching_rows: Optional[int] = 1000,
@@ -80,9 +85,9 @@ def remote_function(
     cloud_function_max_instances: Optional[int] = None,
     cloud_function_vpc_connector: Optional[str] = None,
     cloud_function_memory_mib: Optional[int] = 1024,
-    cloud_function_ingress_settings: Optional[
-        Literal["all", "internal-only", "internal-and-gclb"]
-    ] = None,
+    cloud_function_ingress_settings: Literal[
+        "all", "internal-only", "internal-and-gclb"
+    ] = "internal-only",
 ):
     return global_session.with_default_session(
         bigframes.session.Session.remote_function,
@@ -112,9 +117,9 @@ def udf(
     *,
     input_types: Union[None, type, Sequence[type]] = None,
     output_type: Optional[type] = None,
-    dataset: Optional[str] = None,
+    dataset: str,
     bigquery_connection: Optional[str] = None,
-    name: Optional[str] = None,
+    name: str,
     packages: Optional[Sequence[str]] = None,
 ):
     return global_session.with_default_session(
diff --git a/bigframes/series.py b/bigframes/series.py
index be87129929..87f1f1d141 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -23,7 +23,19 @@
 import numbers
 import textwrap
 import typing
-from typing import Any, cast, List, Literal, Mapping, Optional, Sequence, Tuple, Union
+from typing import (
+    Any,
+    cast,
+    Iterable,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+)
+import warnings
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.series as vendored_pandas_series
@@ -46,9 +58,11 @@
 import bigframes.core.utils as utils
 import bigframes.core.validations as validations
 import bigframes.core.window
+from bigframes.core.window import rolling
 import bigframes.core.window_spec as windows
 import bigframes.dataframe
 import bigframes.dtypes
+import bigframes.exceptions as bfe
 import bigframes.formatting_helpers as formatter
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
@@ -385,19 +399,62 @@ def to_pandas(
     ) -> pandas.Series:
         """Writes Series to pandas Series.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series([4, 3, 2])
+
+        Download the data from BigQuery and convert it into an in-memory pandas Series.
+
+            >>> s.to_pandas()
+            0    4
+            1    3
+            2    2
+            dtype: Int64
+
+        Estimate job statistics without processing or downloading data by using `dry_run=True`.
+
+            >>> s.to_pandas(dry_run=True) # doctest: +SKIP
+            columnCount                                                            1
+            columnDtypes                                               {None: Int64}
+            indexLevel                                                             1
+            indexDtypes                                                      [Int64]
+            projectId                                                  bigframes-dev
+            location                                                              US
+            jobType                                                            QUERY
+            destinationTable       {'projectId': 'bigframes-dev', 'datasetId': '_...
+            useLegacySql                                                       False
+            referencedTables                                                    None
+            totalBytesProcessed                                                    0
+            cacheHit                                                           False
+            statementType                                                     SELECT
+            creationTime                            2025-04-03 18:54:59.219000+00:00
+            dtype: object
+
         Args:
             max_download_size (int, default None):
-                Download size threshold in MB. If max_download_size is exceeded when downloading data
-                (e.g., to_pandas()), the data will be downsampled if
-                bigframes.options.sampling.enable_downsampling is True, otherwise, an error will be
-                raised. If set to a value other than None, this will supersede the global config.
+                .. deprecated:: 2.0.0
+                    ``max_download_size`` parameter is deprecated. Please use ``to_pandas_batches()``
+                    method instead.
+
+                Download size threshold in MB. If ``max_download_size`` is exceeded when downloading data,
+                the data will be downsampled if ``bigframes.options.sampling.enable_downsampling`` is
+                ``True``, otherwise, an error will be raised. If set to a value other than ``None``,
+                this will supersede the global config.
             sampling_method (str, default None):
+                .. deprecated:: 2.0.0
+                    ``sampling_method`` parameter is deprecated. Please use ``sample()`` method instead.
+
                 Downsampling algorithms to be chosen from, the choices are: "head": This algorithm
                 returns a portion of the data from the beginning. It is fast and requires minimal
                 computations to perform the downsampling; "uniform": This algorithm returns uniform
                 random samples of the data. If set to a value other than None, this will supersede
                 the global config.
             random_state (int, default None):
+                .. deprecated:: 2.0.0
+                    ``random_state`` parameter is deprecated. Please use ``sample()`` method instead.
+
                 The seed for the uniform downsampling algorithm. If provided, the uniform method may
                 take longer to execute and require more computation. If set to a value other than
                 None, this will supersede the global config.
@@ -416,6 +473,19 @@ def to_pandas(
                 is not exceeded; otherwise, a pandas Series with downsampled rows of the DataFrame. If dry_run
                 is set to True, a pandas Series containing dry run statistics will be returned.
         """
+        if max_download_size is not None:
+            msg = bfe.format_message(
+                "DEPRECATED: The `max_download_size` parameters for `Series.to_pandas()` "
+                "are deprecated and will be removed soon. Please use `Series.to_pandas_batches()`."
+            )
+            warnings.warn(msg, category=FutureWarning)
+        if sampling_method is not None or random_state is not None:
+            msg = bfe.format_message(
+                "DEPRECATED: The `sampling_method` and `random_state` parameters for "
+                "`Series.to_pandas()` are deprecated and will be removed soon. "
+                "Please use `Series.sample().to_pandas()` instead for sampling."
+            )
+            warnings.warn(msg, category=FutureWarning)
 
         if dry_run:
             dry_run_stats, dry_run_job = self._block._compute_dry_run(
@@ -445,6 +515,70 @@ def to_pandas(
         series.name = self._name
         return series
 
+    def to_pandas_batches(
+        self,
+        page_size: Optional[int] = None,
+        max_results: Optional[int] = None,
+        *,
+        allow_large_results: Optional[bool] = None,
+    ) -> Iterable[pandas.Series]:
+        """Stream Series results to an iterable of pandas Series.
+
+        page_size and max_results determine the size and number of batches,
+        see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series([4, 3, 2, 2, 3])
+
+        Iterate through the results in batches, limiting the total rows yielded
+        across all batches via `max_results`:
+
+            >>> for s_batch in s.to_pandas_batches(max_results=3):
+            ...     print(s_batch)
+            0    4
+            1    3
+            2    2
+            dtype: Int64
+
+        Alternatively, control the approximate size of each batch using `page_size`
+        and fetch batches manually using `next()`:
+
+            >>> it = s.to_pandas_batches(page_size=2)
+            >>> next(it)
+            0    4
+            1    3
+            dtype: Int64
+            >>> next(it)
+            2    2
+            3    2
+            dtype: Int64
+
+        Args:
+            page_size (int, default None):
+                The maximum number of rows of each batch. Non-positive values are ignored.
+            max_results (int, default None):
+                The maximum total number of rows of all batches.
+            allow_large_results (bool, default None):
+                If not None, overrides the global setting to allow or disallow large query results
+                over the default size limit of 10 GB.
+
+        Returns:
+            Iterable[pandas.Series]:
+                An iterable of smaller Series which combine to
+                form the original Series. Results stream from bigquery,
+                see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.table.RowIterator#google_cloud_bigquery_table_RowIterator_to_arrow_iterable
+        """
+        df = self._block.to_pandas_batches(
+            page_size=page_size,
+            max_results=max_results,
+            allow_large_results=allow_large_results,
+            squeeze=True,
+        )
+        return df
+
     def _compute_dry_run(self) -> bigquery.QueryJob:
         _, query_job = self._block._compute_dry_run((self._value_column,))
         return query_job
@@ -1378,7 +1512,9 @@ def _apply_aggregation(
     ) -> Any:
         return self._block.get_stat(self._value_column, op)
 
-    def _apply_window_op(self, op: agg_ops.WindowOp, window_spec: windows.WindowSpec):
+    def _apply_window_op(
+        self, op: agg_ops.UnaryWindowOp, window_spec: windows.WindowSpec
+    ):
         block = self._block
         block, result_id = block.apply_window_op(
             self._value_column, op, window_spec=window_spec, result_label=self.name
@@ -1439,16 +1575,26 @@ def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series:
     @validations.requires_ordering()
     def rolling(
         self,
-        window: int,
-        min_periods=None,
+        window: int | pandas.Timedelta | numpy.timedelta64 | datetime.timedelta | str,
+        min_periods: int | None = None,
         closed: Literal["right", "left", "both", "neither"] = "right",
     ) -> bigframes.core.window.Window:
-        window_spec = windows.WindowSpec(
-            bounds=windows.RowsWindowBounds.from_window_size(window, closed),
-            min_periods=min_periods if min_periods is not None else window,
-        )
-        return bigframes.core.window.Window(
-            self._block, window_spec, self._block.value_columns, is_series=True
+        if isinstance(window, int):
+            # Rows rolling
+            window_spec = windows.WindowSpec(
+                bounds=windows.RowsWindowBounds.from_window_size(window, closed),
+                min_periods=window if min_periods is None else min_periods,
+            )
+            return bigframes.core.window.Window(
+                self._block, window_spec, self._block.value_columns, is_series=True
+            )
+
+        return rolling.create_range_window(
+            block=self._block,
+            window=window,
+            min_periods=min_periods,
+            closed=closed,
+            is_series=True,
         )
 
     @validations.requires_ordering()
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 3ac9b75039..9d45019fc5 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -53,30 +53,23 @@
     ReadPickleBuffer,
     StorageOptions,
 )
-import pyarrow as pa
 
 from bigframes import exceptions as bfe
 from bigframes import version
 import bigframes._config.bigquery_options as bigquery_options
 import bigframes.clients
-import bigframes.core.blocks as blocks
-import bigframes.core.compile
-import bigframes.core.guid
-import bigframes.core.pruning
+from bigframes.core import blocks
 
 # Even though the ibis.backends.bigquery import is unused, it's needed
 # to register new and replacement ops with the Ibis BigQuery backend.
-import bigframes.dataframe
-import bigframes.dtypes
 import bigframes.functions._function_session as bff_session
 import bigframes.functions.function as bff
+from bigframes.session import bigquery_session, bq_caching_executor, executor
 import bigframes.session._io.bigquery as bf_io_bigquery
+import bigframes.session.anonymous_dataset
 import bigframes.session.clients
-import bigframes.session.executor
 import bigframes.session.loader
 import bigframes.session.metrics
-import bigframes.session.planner
-import bigframes.session.temp_storage
 import bigframes.session.validation
 
 # Avoid circular imports.
@@ -107,22 +100,6 @@
 
 logger = logging.getLogger(__name__)
 
-# Excludes geography and nested (array, struct) datatypes
-INLINABLE_DTYPES: Sequence[bigframes.dtypes.Dtype] = (
-    pandas.BooleanDtype(),
-    pandas.Float64Dtype(),
-    pandas.Int64Dtype(),
-    pandas.StringDtype(storage="pyarrow"),
-    pandas.ArrowDtype(pa.binary()),
-    pandas.ArrowDtype(pa.date32()),
-    pandas.ArrowDtype(pa.time64("us")),
-    pandas.ArrowDtype(pa.timestamp("us")),
-    pandas.ArrowDtype(pa.timestamp("us", tz="UTC")),
-    pandas.ArrowDtype(pa.decimal128(38, 9)),
-    pandas.ArrowDtype(pa.decimal256(76, 38)),
-    pandas.ArrowDtype(pa.duration("us")),
-)
-
 
 class Session(
     third_party_pandas_gbq.GBQIOMixin,
@@ -247,22 +224,32 @@ def __init__(
 
         self._metrics = bigframes.session.metrics.ExecutionMetrics()
         self._function_session = bff_session.FunctionSession()
-        self._temp_storage_manager = (
-            bigframes.session.temp_storage.AnonymousDatasetManager(
+        self._anon_dataset_manager = (
+            bigframes.session.anonymous_dataset.AnonymousDatasetManager(
                 self._clients_provider.bqclient,
                 location=self._location,
                 session_id=self._session_id,
                 kms_key=self._bq_kms_key_name,
             )
         )
-        self._executor: bigframes.session.executor.Executor = (
-            bigframes.session.executor.BigQueryCachingExecutor(
-                bqclient=self._clients_provider.bqclient,
-                bqstoragereadclient=self._clients_provider.bqstoragereadclient,
-                storage_manager=self._temp_storage_manager,
-                strictly_ordered=self._strictly_ordered,
-                metrics=self._metrics,
+        # Session temp tables don't support specifying kms key, so use anon dataset if kms key specified
+        self._session_resource_manager = (
+            bigquery_session.SessionResourceManager(
+                self.bqclient,
+                self._location,
             )
+            if (self._bq_kms_key_name is None)
+            else None
+        )
+        self._temp_storage_manager = (
+            self._session_resource_manager or self._anon_dataset_manager
+        )
+        self._executor: executor.Executor = bq_caching_executor.BigQueryCachingExecutor(
+            bqclient=self._clients_provider.bqclient,
+            bqstoragereadclient=self._clients_provider.bqstoragereadclient,
+            storage_manager=self._temp_storage_manager,
+            strictly_ordered=self._strictly_ordered,
+            metrics=self._metrics,
         )
         self._loader = bigframes.session.loader.GbqDataLoader(
             session=self,
@@ -375,7 +362,7 @@ def _allows_ambiguity(self) -> bool:
 
     @property
     def _anonymous_dataset(self):
-        return self._temp_storage_manager.dataset
+        return self._anon_dataset_manager.dataset
 
     def __hash__(self):
         # Stable hash needed to use in expression tree
@@ -388,9 +375,11 @@ def close(self):
 
         # Protect against failure when the Session is a fake for testing or
         # failed to initialize.
-        temp_storage_manager = getattr(self, "_temp_storage_manager", None)
-        if temp_storage_manager:
-            self._temp_storage_manager.clean_up_tables()
+        if anon_dataset_manager := getattr(self, "_anon_dataset_manager", None):
+            anon_dataset_manager.close()
+
+        if session_resource_manager := getattr(self, "_session_resource_manager", None):
+            session_resource_manager.close()
 
         remote_function_session = getattr(self, "_function_session", None)
         if remote_function_session:
@@ -793,19 +782,29 @@ def _read_pandas(
                 "bigframes.pandas.DataFrame."
             )
 
+        mem_usage = pandas_dataframe.memory_usage(deep=True).sum()
         if write_engine == "default":
-            try:
-                inline_df = self._read_pandas_inline(pandas_dataframe)
-                return inline_df
-            except ValueError:
-                pass
-            return self._read_pandas_load_job(pandas_dataframe, api_name)
-        elif write_engine == "bigquery_inline":
+            write_engine = (
+                "bigquery_load"
+                if mem_usage > MAX_INLINE_DF_BYTES
+                else "bigquery_inline"
+            )
+
+        if write_engine == "bigquery_inline":
+            if mem_usage > MAX_INLINE_DF_BYTES:
+                raise ValueError(
+                    f"DataFrame size ({mem_usage} bytes) exceeds the maximum allowed "
+                    f"for inline data ({MAX_INLINE_DF_BYTES} bytes)."
+                )
             return self._read_pandas_inline(pandas_dataframe)
         elif write_engine == "bigquery_load":
-            return self._read_pandas_load_job(pandas_dataframe, api_name)
+            return self._loader.read_pandas(
+                pandas_dataframe, method="load", api_name=api_name
+            )
         elif write_engine == "bigquery_streaming":
-            return self._read_pandas_streaming(pandas_dataframe)
+            return self._loader.read_pandas(
+                pandas_dataframe, method="stream", api_name=api_name
+            )
         else:
             raise ValueError(f"Got unexpected write_engine '{write_engine}'")
 
@@ -814,56 +813,8 @@ def _read_pandas_inline(
     ) -> dataframe.DataFrame:
         import bigframes.dataframe as dataframe
 
-        memory_usage = pandas_dataframe.memory_usage(deep=True).sum()
-        if memory_usage > MAX_INLINE_DF_BYTES:
-            raise ValueError(
-                f"DataFrame size ({memory_usage} bytes) exceeds the maximum allowed "
-                f"for inline data ({MAX_INLINE_DF_BYTES} bytes)."
-            )
-
-        try:
-            local_block = blocks.Block.from_local(pandas_dataframe, self)
-            inline_df = dataframe.DataFrame(local_block)
-        except (
-            pa.ArrowInvalid,  # Thrown by arrow for unsupported types, such as geo.
-            pa.ArrowTypeError,  # Thrown by arrow for types without mapping (geo).
-            ValueError,  # Thrown by ibis for some unhandled types
-            TypeError,  # Not all types handleable by local code path
-        ) as exc:
-            raise ValueError(
-                f"Could not convert with a BigQuery type: `{exc}`. "
-            ) from exc
-
-        # Make sure all types are inlinable to avoid escaping errors.
-        inline_types = inline_df._block.expr.schema.dtypes
-        noninlinable_types = [
-            dtype for dtype in inline_types if dtype not in INLINABLE_DTYPES
-        ]
-        if len(noninlinable_types) != 0:
-            raise ValueError(
-                f"Could not inline with a BigQuery type: `{noninlinable_types}`. "
-                f"{constants.FEEDBACK_LINK}"
-            )
-
-        return inline_df
-
-    def _read_pandas_load_job(
-        self,
-        pandas_dataframe: pandas.DataFrame,
-        api_name: str,
-    ) -> dataframe.DataFrame:
-        try:
-            return self._loader.read_pandas_load_job(pandas_dataframe, api_name)
-        except (pa.ArrowInvalid, pa.ArrowTypeError) as exc:
-            raise ValueError(
-                f"Could not convert with a BigQuery type: `{exc}`."
-            ) from exc
-
-    def _read_pandas_streaming(
-        self,
-        pandas_dataframe: pandas.DataFrame,
-    ) -> dataframe.DataFrame:
-        return self._loader.read_pandas_streaming(pandas_dataframe)
+        local_block = blocks.Block.from_local(pandas_dataframe, self)
+        return dataframe.DataFrame(local_block)
 
     def read_csv(
         self,
@@ -906,117 +857,183 @@ def read_csv(
             engine=engine,
             write_engine=write_engine,
         )
-        table = self._temp_storage_manager.allocate_temp_table()
-
-        if engine is not None and engine == "bigquery":
-            if any(param is not None for param in (dtype, names)):
-                not_supported = ("dtype", "names")
-                raise NotImplementedError(
-                    f"BigQuery engine does not support these arguments: {not_supported}. "
-                    f"{constants.FEEDBACK_LINK}"
-                )
 
-            # TODO(b/338089659): Looks like we can relax this 1 column
-            # restriction if we check the contents of an iterable are strings
-            # not integers.
-            if (
-                # Empty tuples, None, and False are allowed and falsey.
-                index_col
-                and not isinstance(index_col, bigframes.enums.DefaultIndexKind)
-                and not isinstance(index_col, str)
-            ):
-                raise NotImplementedError(
-                    "BigQuery engine only supports a single column name for `index_col`, "
-                    f"got: {repr(index_col)}. {constants.FEEDBACK_LINK}"
-                )
+        if engine != "bigquery":
+            # Using pandas.read_csv by default and warning about potential issues with
+            # large files.
+            return self._read_csv_w_pandas_engines(
+                filepath_or_buffer,
+                sep=sep,
+                header=header,
+                names=names,
+                index_col=index_col,
+                usecols=usecols,  # type: ignore
+                dtype=dtype,
+                engine=engine,
+                encoding=encoding,
+                write_engine=write_engine,
+                **kwargs,
+            )
+        else:
+            return self._read_csv_w_bigquery_engine(
+                filepath_or_buffer,
+                sep=sep,
+                header=header,
+                names=names,
+                index_col=index_col,
+                usecols=usecols,  # type: ignore
+                dtype=dtype,
+                encoding=encoding,
+            )
 
-            # None and False cannot be passed to read_gbq.
-            # TODO(b/338400133): When index_col is None, we should be using the
-            # first column of the CSV as the index to be compatible with the
-            # pandas engine. According to the pandas docs, only "False"
-            # indicates a default sequential index.
-            if not index_col:
-                index_col = ()
+    def _read_csv_w_pandas_engines(
+        self,
+        filepath_or_buffer,
+        *,
+        sep,
+        header,
+        names,
+        index_col,
+        usecols,
+        dtype,
+        engine,
+        encoding,
+        write_engine,
+        **kwargs,
+    ) -> dataframe.DataFrame:
+        """Reads a CSV file using pandas engines into a BigQuery DataFrames.
 
-            index_col = typing.cast(
-                Union[
-                    Sequence[str],  # Falsey values
-                    bigframes.enums.DefaultIndexKind,
-                    str,
-                ],
-                index_col,
+        This method serves as the implementation backend for read_csv when the
+        specified engine is one supported directly by pandas ('c', 'python',
+        'pyarrow').
+        """
+        if isinstance(index_col, bigframes.enums.DefaultIndexKind):
+            raise NotImplementedError(
+                f"With index_col={repr(index_col)}, only engine='bigquery' is supported. "
+                f"{constants.FEEDBACK_LINK}"
+            )
+        if any(arg in kwargs for arg in ("chunksize", "iterator")):
+            raise NotImplementedError(
+                "'chunksize' and 'iterator' arguments are not supported. "
+                f"{constants.FEEDBACK_LINK}"
             )
+        if isinstance(filepath_or_buffer, str):
+            self._check_file_size(filepath_or_buffer)
 
-            # usecols should only be an iterable of strings (column names) for use as columns in read_gbq.
-            columns: Tuple[Any, ...] = tuple()
-            if usecols is not None:
-                if isinstance(usecols, Iterable) and all(
-                    isinstance(col, str) for col in usecols
-                ):
-                    columns = tuple(col for col in usecols)
-                else:
-                    raise NotImplementedError(
-                        "BigQuery engine only supports an iterable of strings for `usecols`. "
-                        f"{constants.FEEDBACK_LINK}"
-                    )
+        pandas_df = pandas.read_csv(
+            filepath_or_buffer,
+            sep=sep,
+            header=header,
+            names=names,
+            index_col=index_col,
+            usecols=usecols,  # type: ignore
+            dtype=dtype,
+            engine=engine,
+            encoding=encoding,
+            **kwargs,
+        )
+        return self._read_pandas(pandas_df, api_name="read_csv", write_engine=write_engine)  # type: ignore
 
-            if encoding is not None and encoding not in _VALID_ENCODINGS:
-                raise NotImplementedError(
-                    f"BigQuery engine only supports the following encodings: {_VALID_ENCODINGS}. "
-                    f"{constants.FEEDBACK_LINK}"
-                )
+    def _read_csv_w_bigquery_engine(
+        self,
+        filepath_or_buffer,
+        *,
+        sep,
+        header,
+        names,
+        index_col,
+        usecols,
+        dtype,
+        encoding,
+    ) -> dataframe.DataFrame:
+        """Reads a CSV file using the BigQuery engine into a BigQuery DataFrames.
 
-            job_config = bigquery.LoadJobConfig()
-            job_config.create_disposition = bigquery.CreateDisposition.CREATE_IF_NEEDED
-            job_config.source_format = bigquery.SourceFormat.CSV
-            job_config.write_disposition = bigquery.WriteDisposition.WRITE_EMPTY
-            job_config.autodetect = True
-            job_config.field_delimiter = sep
-            job_config.encoding = encoding
-            job_config.labels = {"bigframes-api": "read_csv"}
+        This method serves as the implementation backend for read_csv when the
+        'bigquery' engine is specified or inferred. It leverages BigQuery's
+        native CSV loading capabilities, making it suitable for large datasets
+        that may not fit into local memory.
+        """
 
-            # We want to match pandas behavior. If header is 0, no rows should be skipped, so we
-            # do not need to set `skip_leading_rows`. If header is None, then there is no header.
-            # Setting skip_leading_rows to 0 does that. If header=N and N>0, we want to skip N rows.
-            if header is None:
-                job_config.skip_leading_rows = 0
-            elif header > 0:
-                job_config.skip_leading_rows = header
+        if any(param is not None for param in (dtype, names)):
+            not_supported = ("dtype", "names")
+            raise NotImplementedError(
+                f"BigQuery engine does not support these arguments: {not_supported}. "
+                f"{constants.FEEDBACK_LINK}"
+            )
 
-            return self._loader._read_bigquery_load_job(
-                filepath_or_buffer,
-                table,
-                job_config=job_config,
-                index_col=index_col,
-                columns=columns,
+        # TODO(b/338089659): Looks like we can relax this 1 column
+        # restriction if we check the contents of an iterable are strings
+        # not integers.
+        if (
+            # Empty tuples, None, and False are allowed and falsey.
+            index_col
+            and not isinstance(index_col, bigframes.enums.DefaultIndexKind)
+            and not isinstance(index_col, str)
+        ):
+            raise NotImplementedError(
+                "BigQuery engine only supports a single column name for `index_col`, "
+                f"got: {repr(index_col)}. {constants.FEEDBACK_LINK}"
             )
-        else:
-            if isinstance(index_col, bigframes.enums.DefaultIndexKind):
-                raise NotImplementedError(
-                    f"With index_col={repr(index_col)}, only engine='bigquery' is supported. "
-                    f"{constants.FEEDBACK_LINK}"
-                )
-            if any(arg in kwargs for arg in ("chunksize", "iterator")):
+
+        # None and False cannot be passed to read_gbq.
+        # TODO(b/338400133): When index_col is None, we should be using the
+        # first column of the CSV as the index to be compatible with the
+        # pandas engine. According to the pandas docs, only "False"
+        # indicates a default sequential index.
+        if not index_col:
+            index_col = ()
+
+        index_col = typing.cast(
+            Union[
+                Sequence[str],  # Falsey values
+                bigframes.enums.DefaultIndexKind,
+                str,
+            ],
+            index_col,
+        )
+
+        # usecols should only be an iterable of strings (column names) for use as columns in read_gbq.
+        columns: Tuple[Any, ...] = tuple()
+        if usecols is not None:
+            if isinstance(usecols, Iterable) and all(
+                isinstance(col, str) for col in usecols
+            ):
+                columns = tuple(col for col in usecols)
+            else:
                 raise NotImplementedError(
-                    "'chunksize' and 'iterator' arguments are not supported. "
+                    "BigQuery engine only supports an iterable of strings for `usecols`. "
                     f"{constants.FEEDBACK_LINK}"
                 )
 
-            if isinstance(filepath_or_buffer, str):
-                self._check_file_size(filepath_or_buffer)
-            pandas_df = pandas.read_csv(
-                filepath_or_buffer,
-                sep=sep,
-                header=header,
-                names=names,
-                index_col=index_col,
-                usecols=usecols,  # type: ignore
-                dtype=dtype,
-                engine=engine,
-                encoding=encoding,
-                **kwargs,
+        if encoding is not None and encoding not in _VALID_ENCODINGS:
+            raise NotImplementedError(
+                f"BigQuery engine only supports the following encodings: {_VALID_ENCODINGS}. "
+                f"{constants.FEEDBACK_LINK}"
             )
-            return self._read_pandas(pandas_df, api_name="read_csv", write_engine=write_engine)  # type: ignore
+
+        job_config = bigquery.LoadJobConfig()
+        job_config.source_format = bigquery.SourceFormat.CSV
+        job_config.autodetect = True
+        job_config.field_delimiter = sep
+        job_config.encoding = encoding
+        job_config.labels = {"bigframes-api": "read_csv"}
+
+        # b/409070192: When header > 0, pandas and BigFrames returns different column naming.
+
+        # We want to match pandas behavior. If header is 0, no rows should be skipped, so we
+        # do not need to set `skip_leading_rows`. If header is None, then there is no header.
+        # Setting skip_leading_rows to 0 does that. If header=N and N>0, we want to skip N rows.
+        if header is None:
+            job_config.skip_leading_rows = 0
+        elif header > 0:
+            job_config.skip_leading_rows = header + 1
+
+        return self._loader.read_bigquery_load_job(
+            filepath_or_buffer,
+            job_config=job_config,
+            index_col=index_col,
+            columns=columns,
+        )
 
     def read_pickle(
         self,
@@ -1052,18 +1069,12 @@ def read_parquet(
             engine=engine,
             write_engine=write_engine,
         )
-        table = self._temp_storage_manager.allocate_temp_table()
-
         if engine == "bigquery":
             job_config = bigquery.LoadJobConfig()
-            job_config.create_disposition = bigquery.CreateDisposition.CREATE_IF_NEEDED
             job_config.source_format = bigquery.SourceFormat.PARQUET
-            job_config.write_disposition = bigquery.WriteDisposition.WRITE_EMPTY
             job_config.labels = {"bigframes-api": "read_parquet"}
 
-            return self._loader._read_bigquery_load_job(
-                path, table, job_config=job_config
-            )
+            return self._loader.read_bigquery_load_job(path, job_config=job_config)
         else:
             if "*" in path:
                 raise ValueError(
@@ -1106,8 +1117,6 @@ def read_json(
             engine=engine,
             write_engine=write_engine,
         )
-        table = self._temp_storage_manager.allocate_temp_table()
-
         if engine == "bigquery":
 
             if dtype is not None:
@@ -1131,16 +1140,13 @@ def read_json(
                 )
 
             job_config = bigquery.LoadJobConfig()
-            job_config.create_disposition = bigquery.CreateDisposition.CREATE_IF_NEEDED
             job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
-            job_config.write_disposition = bigquery.WriteDisposition.WRITE_EMPTY
             job_config.autodetect = True
             job_config.encoding = encoding
             job_config.labels = {"bigframes-api": "read_json"}
 
-            return self._loader._read_bigquery_load_job(
+            return self._loader.read_bigquery_load_job(
                 path_or_buf,
-                table,
                 job_config=job_config,
             )
         else:
@@ -1202,14 +1208,19 @@ def _check_file_size(self, filepath: str):
 
     def remote_function(
         self,
+        # Make sure that the input/output types, and dataset can be used
+        # positionally. This avoids the worst of the breaking change from 1.x to
+        # 2.x while still preventing possible mixups between consecutive str
+        # parameters.
         input_types: Union[None, type, Sequence[type]] = None,
         output_type: Optional[type] = None,
         dataset: Optional[str] = None,
+        *,
         bigquery_connection: Optional[str] = None,
         reuse: bool = True,
         name: Optional[str] = None,
         packages: Optional[Sequence[str]] = None,
-        cloud_function_service_account: Optional[str] = None,
+        cloud_function_service_account: str,
         cloud_function_kms_key_name: Optional[str] = None,
         cloud_function_docker_repository: Optional[str] = None,
         max_batching_rows: Optional[int] = 1000,
@@ -1217,9 +1228,9 @@ def remote_function(
         cloud_function_max_instances: Optional[int] = None,
         cloud_function_vpc_connector: Optional[str] = None,
         cloud_function_memory_mib: Optional[int] = 1024,
-        cloud_function_ingress_settings: Optional[
-            Literal["all", "internal-only", "internal-and-gclb"]
-        ] = None,
+        cloud_function_ingress_settings: Literal[
+            "all", "internal-only", "internal-and-gclb"
+        ] = "internal-only",
     ):
         """Decorator to turn a user defined function into a BigQuery remote function. Check out
         the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.
@@ -1327,8 +1338,8 @@ def remote_function(
                 Explicit name of the external package dependencies. Each dependency
                 is added to the `requirements.txt` as is, and can be of the form
                 supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/.
-            cloud_function_service_account (str, Optional):
-                Service account to use for the cloud functions. If not provided
+            cloud_function_service_account (str):
+                Service account to use for the cloud functions. If "default" provided
                 then the default service account would be used. See
                 https://cloud.google.com/functions/docs/securing/function-identity
                 for more details. Please make sure the service account has the
@@ -1392,8 +1403,8 @@ def remote_function(
             cloud_function_ingress_settings (str, Optional):
                 Ingress settings controls dictating what traffic can reach the
                 function. Options are: `all`, `internal-only`, or `internal-and-gclb`.
-                If no setting is provided, `all` will be used by default and a warning
-                will be issued. See for more details
+                If no setting is provided, `internal-only` will be used by default.
+                See for more details
                 https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings.
         Returns:
             collections.abc.Callable:
@@ -1406,8 +1417,8 @@ def remote_function(
                 `bigframes_remote_function` - The bigquery remote function capable of calling into `bigframes_cloud_function`.
         """
         return self._function_session.remote_function(
-            input_types,
-            output_type,
+            input_types=input_types,
+            output_type=output_type,
             session=self,
             dataset=dataset,
             bigquery_connection=bigquery_connection,
@@ -1430,9 +1441,9 @@ def udf(
         *,
         input_types: Union[None, type, Sequence[type]] = None,
         output_type: Optional[type] = None,
-        dataset: Optional[str] = None,
+        dataset: str,
         bigquery_connection: Optional[str] = None,
-        name: Optional[str] = None,
+        name: str,
         packages: Optional[Sequence[str]] = None,
     ):
         """Decorator to turn a Python user defined function (udf) into a
@@ -1459,30 +1470,33 @@ def udf(
                 be specified. The supported output types are `bool`, `bytes`,
                 `float`, `int`, `str`, `list[bool]`, `list[float]`, `list[int]`
                 and `list[str]`.
-            dataset (str, Optional):
+            dataset (str):
                 Dataset in which to create a BigQuery managed function. It
                 should be in `<project_id>.<dataset_name>` or `<dataset_name>`
-                format. If this parameter is not provided then session dataset
-                id is used.
+                format.
             bigquery_connection (str, Optional):
-                Name of the BigQuery connection. You should either have the
-                connection already created in the `location` you have chosen, or
-                you should have the Project IAM Admin role to enable the service
-                to create the connection for you if you need it. If this
-                parameter is not provided then the BigQuery connection from the
-                session is used.
-            name (str, Optional):
+                Name of the BigQuery connection. It is used to provide an
+                identity to the serverless instances running the user code. It
+                helps BigQuery manage and track the resources used by the udf.
+                This connection is required for internet access and for
+                interacting with other GCP services. To access GCP services, the
+                appropriate IAM permissions must also be granted to the
+                connection's Service Account. When it defaults to None, the udf
+                will be created without any connection. A udf without a
+                connection has no internet access and no access to other GCP
+                services.
+            name (str):
                 Explicit name of the persisted BigQuery managed function. Use it
                 with caution, because more than one users working in the same
                 project and dataset could overwrite each other's managed
-                functions if they use the same persistent name. When an explicit
-                name is provided, any session specific clean up (
+                functions if they use the same persistent name. Please note that
+                any session specific clean up (
                 ``bigframes.session.Session.close``/
                 ``bigframes.pandas.close_session``/
                 ``bigframes.pandas.reset_session``/
                 ``bigframes.pandas.clean_up_by_session_id``) does not clean up
-                the function, and leaves it for the user to manage the function
-                and the associated cloud function directly.
+                this function, and leaves it for the user to manage the function
+                directly.
             packages (str[], Optional):
                 Explicit name of the external package dependencies. Each
                 dependency is added to the `requirements.txt` as is, and can be
@@ -1499,8 +1513,8 @@ def udf(
                 deployed for the user defined code.
         """
         return self._function_session.udf(
-            input_types,
-            output_type,
+            input_types=input_types,
+            output_type=output_type,
             session=self,
             dataset=dataset,
             bigquery_connection=bigquery_connection,
@@ -1593,7 +1607,7 @@ def read_gbq_function(
         Another use case is to define your own remote function and use it later.
         For example, define the remote function:
 
-            >>> @bpd.remote_function()
+            >>> @bpd.remote_function(cloud_function_service_account="default")
             ... def tenfold(num: int) -> float:
             ...     return num * 10
 
@@ -1620,7 +1634,7 @@ def read_gbq_function(
         note, row processor implies that the function has only one input
         parameter.
 
-            >>> @bpd.remote_function()
+            >>> @bpd.remote_function(cloud_function_service_account="default")
             ... def row_sum(s: bpd.Series) -> float:
             ...     return s['a'] + s['b'] + s['c']
 
@@ -1708,7 +1722,7 @@ def _start_query_ml_ddl(
 
     def _create_object_table(self, path: str, connection: str) -> str:
         """Create a random id Object Table from the input path and connection."""
-        table = str(self._loader._storage_manager.generate_unique_resource_id())
+        table = str(self._anon_dataset_manager.generate_unique_resource_id())
 
         import textwrap
 
@@ -1757,9 +1771,7 @@ def from_glob_path(
             raise NotImplementedError()
 
         # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done.
-        connection = self._create_bq_connection(
-            connection=connection, iam_role="storage.objectUser"
-        )
+        connection = self._create_bq_connection(connection=connection)
 
         table = self._create_object_table(path, connection)
 
@@ -1769,13 +1781,16 @@ def from_glob_path(
         return s.rename(name).to_frame()
 
     def _create_bq_connection(
-        self, iam_role: str, *, connection: Optional[str] = None
+        self,
+        *,
+        connection: Optional[str] = None,
+        iam_role: Optional[str] = None,
     ) -> str:
         """Create the connection with the session settings and try to attach iam role to the connection SA.
         If any of project, location or connection isn't specified, use the session defaults. Returns fully-qualified connection name."""
         connection = self._bq_connection if not connection else connection
-        connection = bigframes.clients.resolve_full_bq_connection_name(
-            connection_name=connection,
+        connection = bigframes.clients.get_canonical_bq_connection_id(
+            connection_id=connection,
             default_project=self._project,
             default_location=self._location,
         )
diff --git a/bigframes/session/_io/bigquery/__init__.py b/bigframes/session/_io/bigquery/__init__.py
index d9f1c0f295..4fdd836777 100644
--- a/bigframes/session/_io/bigquery/__init__.py
+++ b/bigframes/session/_io/bigquery/__init__.py
@@ -245,6 +245,8 @@ def start_query_with_client(
                 location=location,
                 project=project,
                 api_timeout=timeout,
+                page_size=page_size,
+                max_results=max_results,
             )
             if metrics is not None:
                 metrics.count_job_stats(query=sql)
diff --git a/bigframes/session/_io/pandas.py b/bigframes/session/_io/pandas.py
index ca70ee774c..9340e060ac 100644
--- a/bigframes/session/_io/pandas.py
+++ b/bigframes/session/_io/pandas.py
@@ -19,7 +19,6 @@
 
 import bigframes_vendored.constants as constants
 import geopandas  # type: ignore
-import numpy as np
 import pandas
 import pandas.arrays
 import pyarrow  # type: ignore
@@ -27,7 +26,6 @@
 import pyarrow.types  # type: ignore
 
 import bigframes.core.schema
-import bigframes.core.utils as utils
 import bigframes.dtypes
 import bigframes.features
 
@@ -80,7 +78,10 @@ def arrow_to_pandas(
 
         if dtype == geopandas.array.GeometryDtype():
             series = geopandas.GeoSeries.from_wkt(
-                column,
+                # Use `to_pylist()` is a workaround for TypeError: object of type
+                # 'pyarrow.lib.StringScalar' has no len() on older pyarrow,
+                # geopandas, shapely combinations.
+                column.to_pylist(),
                 # BigQuery geography type is based on the WGS84 reference ellipsoid.
                 crs="EPSG:4326",
             )
@@ -130,49 +131,3 @@ def arrow_to_pandas(
         serieses[field.name] = series
 
     return pandas.DataFrame(serieses)
-
-
-def pandas_to_bq_compatible(pandas_dataframe: pandas.DataFrame) -> DataFrameAndLabels:
-    """Convert a pandas DataFrame into something compatible with uploading to a
-    BigQuery table (without flexible column names enabled).
-    """
-    col_index = pandas_dataframe.columns.copy()
-    col_labels, idx_labels = (
-        col_index.to_list(),
-        pandas_dataframe.index.names,
-    )
-    new_col_ids, new_idx_ids = utils.get_standardized_ids(
-        col_labels,
-        idx_labels,
-        # Loading parquet files into BigQuery with special column names
-        # is only supported under an allowlist.
-        strict=True,
-    )
-
-    # Add order column to pandas DataFrame to preserve order in BigQuery
-    ordering_col = "rowid"
-    columns = frozenset(col_labels + idx_labels)
-    suffix = 2
-    while ordering_col in columns:
-        ordering_col = f"rowid_{suffix}"
-        suffix += 1
-
-    pandas_dataframe_copy = pandas_dataframe.copy()
-    pandas_dataframe_copy.index.names = new_idx_ids
-    pandas_dataframe_copy.columns = pandas.Index(new_col_ids)
-    pandas_dataframe_copy[ordering_col] = np.arange(pandas_dataframe_copy.shape[0])
-
-    timedelta_cols = utils.replace_timedeltas_with_micros(pandas_dataframe_copy)
-    json_cols = utils.replace_json_with_string(pandas_dataframe_copy)
-    col_type_overrides: typing.Dict[str, bigframes.dtypes.Dtype] = {
-        **{col: bigframes.dtypes.TIMEDELTA_DTYPE for col in timedelta_cols},
-        **{col: bigframes.dtypes.JSON_DTYPE for col in json_cols},
-    }
-
-    return DataFrameAndLabels(
-        df=pandas_dataframe_copy,
-        column_labels=col_labels,
-        index_labels=idx_labels,
-        ordering_col=ordering_col,
-        col_type_overrides=col_type_overrides,
-    )
diff --git a/bigframes/session/temp_storage.py b/bigframes/session/anonymous_dataset.py
similarity index 89%
rename from bigframes/session/temp_storage.py
rename to bigframes/session/anonymous_dataset.py
index 3b2965efef..c5808aa63c 100644
--- a/bigframes/session/temp_storage.py
+++ b/bigframes/session/anonymous_dataset.py
@@ -18,13 +18,14 @@
 
 import google.cloud.bigquery as bigquery
 
-import bigframes.constants as constants
+from bigframes import constants
+from bigframes.session import temporary_storage
 import bigframes.session._io.bigquery as bf_io_bigquery
 
 _TEMP_TABLE_ID_FORMAT = "bqdf{date}_{session_id}_{random_id}"
 
 
-class AnonymousDatasetManager:
+class AnonymousDatasetManager(temporary_storage.TemporaryStorageManager):
     """
     Responsible for allocating and cleaning up temporary gbq tables used by a BigFrames session.
     """
@@ -38,10 +39,10 @@ def __init__(
         kms_key: Optional[str] = None
     ):
         self.bqclient = bqclient
-        self.location = location
+        self._location = location
         self.dataset = bf_io_bigquery.create_bq_dataset_reference(
             self.bqclient,
-            location=self.location,
+            location=self._location,
             api_name="session-__init__",
         )
 
@@ -49,8 +50,12 @@ def __init__(
         self._table_ids: List[bigquery.TableReference] = []
         self._kms_key = kms_key
 
-    def allocate_and_create_temp_table(
-        self, schema: Sequence[bigquery.SchemaField], cluster_cols: Sequence[str]
+    @property
+    def location(self):
+        return self._location
+
+    def create_temp_table(
+        self, schema: Sequence[bigquery.SchemaField], cluster_cols: Sequence[str] = []
     ) -> bigquery.TableReference:
         """
         Allocates and and creates a table in the anonymous dataset.
@@ -99,7 +104,8 @@ def generate_unique_resource_id(self) -> bigquery.TableReference:
         )
         return self.dataset.table(table_id)
 
-    def clean_up_tables(self):
+    def close(self):
         """Delete tables that were created with this session's session_id."""
         for table_ref in self._table_ids:
             self.bqclient.delete_table(table_ref, not_found_ok=True)
+        self._table_ids.clear()
diff --git a/bigframes/session/bigquery_session.py b/bigframes/session/bigquery_session.py
new file mode 100644
index 0000000000..ae8dc88d43
--- /dev/null
+++ b/bigframes/session/bigquery_session.py
@@ -0,0 +1,168 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import logging
+import threading
+from typing import Callable, Optional, Sequence
+import uuid
+
+# TODO: Non-ibis implementation
+import bigframes_vendored.ibis.backends.bigquery.datatypes as ibis_bq
+import google.cloud.bigquery as bigquery
+
+from bigframes.core.compile import googlesql
+from bigframes.session import temporary_storage
+
+KEEPALIVE_QUERY_TIMEOUT_SECONDS = 5.0
+
+KEEPALIVE_FREQUENCY = datetime.timedelta(hours=6)
+
+
+logger = logging.getLogger(__name__)
+
+
+class SessionResourceManager(temporary_storage.TemporaryStorageManager):
+    """
+    Responsible for allocating and cleaning up temporary gbq tables used by a BigFrames session.
+    """
+
+    def __init__(self, bqclient: bigquery.Client, location: str):
+        self.bqclient = bqclient
+        self._location = location
+        self._session_id: Optional[str] = None
+        self._sessiondaemon: Optional[RecurringTaskDaemon] = None
+        self._session_lock = threading.RLock()
+
+    @property
+    def location(self):
+        return self._location
+
+    def create_temp_table(
+        self, schema: Sequence[bigquery.SchemaField], cluster_cols: Sequence[str] = []
+    ) -> bigquery.TableReference:
+        """Create a temporary session table. Session is an exclusive resource, so throughput is limited"""
+        # Can't set a table in _SESSION as destination via query job API, so we
+        # run DDL, instead.
+        with self._session_lock:
+            table_ref = bigquery.TableReference(
+                bigquery.DatasetReference(self.bqclient.project, "_SESSION"),
+                f"bqdf_{uuid.uuid4()}",
+            )
+            job_config = bigquery.QueryJobConfig(
+                connection_properties=[
+                    bigquery.ConnectionProperty("session_id", self._get_session_id())
+                ]
+            )
+
+            ibis_schema = ibis_bq.BigQuerySchema.to_ibis(list(schema))
+
+            fields = [
+                f"{googlesql.identifier(name)} {ibis_bq.BigQueryType.from_ibis(ibis_type)}"
+                for name, ibis_type in ibis_schema.fields.items()
+            ]
+            fields_string = ",".join(fields)
+
+            cluster_string = ""
+            if cluster_cols:
+                cluster_cols_sql = ", ".join(
+                    f"{googlesql.identifier(cluster_col)}"
+                    for cluster_col in cluster_cols
+                )
+                cluster_string = f"\nCLUSTER BY {cluster_cols_sql}"
+
+            ddl = f"CREATE TEMP TABLE `_SESSION`.{googlesql.identifier(table_ref.table_id)} ({fields_string}){cluster_string}"
+
+            job = self.bqclient.query(ddl, job_config=job_config)
+            job.result()
+            # return the fully qualified table, so it can be used outside of the session
+            return job.destination
+
+    def close(self):
+        if self._sessiondaemon is not None:
+            self._sessiondaemon.stop()
+
+        if self._session_id is not None and self.bqclient is not None:
+            self.bqclient.query_and_wait(f"CALL BQ.ABORT_SESSION('{self._session_id}')")
+
+    def _get_session_id(self) -> str:
+        if self._session_id:
+            return self._session_id
+        with self._session_lock:
+            if self._session_id is None:
+                job_config = bigquery.QueryJobConfig(create_session=True)
+                # Make sure the session is a new one, not one associated with another query.
+                job_config.use_query_cache = False
+                query_job = self.bqclient.query(
+                    "SELECT 1", job_config=job_config, location=self.location
+                )
+                query_job.result()  # blocks until finished
+                assert query_job.session_info is not None
+                assert query_job.session_info.session_id is not None
+                self._session_id = query_job.session_info.session_id
+                self._sessiondaemon = RecurringTaskDaemon(
+                    task=self._keep_session_alive, frequency=KEEPALIVE_FREQUENCY
+                )
+                self._sessiondaemon.start()
+                return query_job.session_info.session_id
+            else:
+                return self._session_id
+
+    def _keep_session_alive(self):
+        # bq sessions will default expire after 24 hours of disuse, but if queried, this is renewed to a maximum of 7 days
+        with self._session_lock:
+            job_config = bigquery.QueryJobConfig(
+                connection_properties=[
+                    bigquery.ConnectionProperty("session_id", self._get_session_id())
+                ]
+            )
+            try:
+                self.bqclient.query_and_wait(
+                    "SELECT 1",
+                    location=self.location,
+                    job_config=job_config,
+                    wait_timeout=KEEPALIVE_QUERY_TIMEOUT_SECONDS,
+                )
+            except Exception as e:
+                logging.warning("BigQuery session keep-alive query errored : %s", e)
+
+
+class RecurringTaskDaemon:
+    def __init__(self, task: Callable[[], None], frequency: datetime.timedelta):
+        self._stop_event = threading.Event()
+        self._frequency = frequency
+        self._thread = threading.Thread(target=self._run_loop, daemon=True)
+        self._task = task
+
+    def start(self):
+        """Start the daemon. Cannot be restarted once stopped."""
+        if self._stop_event.is_set():
+            raise RuntimeError("Cannot restart daemon thread.")
+        self._thread.start()
+
+    def _run_loop(self):
+        while True:
+            self._stop_event.wait(self._frequency.total_seconds())
+            if self._stop_event.is_set():
+                return
+            try:
+                self._task()
+            except Exception as e:
+                logging.warning("RecurringTaskDaemon task errorred: %s", e)
+
+    def stop(self, timeout_seconds: Optional[float] = None):
+        """Stop and cleanup the daemon."""
+        if self._thread.is_alive():
+            self._stop_event.set()
+            self._thread.join(timeout=timeout_seconds)
diff --git a/bigframes/session/bq_caching_executor.py b/bigframes/session/bq_caching_executor.py
new file mode 100644
index 0000000000..983b1918f5
--- /dev/null
+++ b/bigframes/session/bq_caching_executor.py
@@ -0,0 +1,598 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import math
+import os
+from typing import cast, Literal, Mapping, Optional, Sequence, Tuple, Union
+import warnings
+import weakref
+
+import google.api_core.exceptions
+from google.cloud import bigquery
+import google.cloud.bigquery.job as bq_job
+import google.cloud.bigquery.table as bq_table
+import google.cloud.bigquery_storage_v1
+
+import bigframes.core
+import bigframes.core.compile
+import bigframes.core.guid
+import bigframes.core.nodes as nodes
+import bigframes.core.ordering as order
+import bigframes.core.tree_properties as tree_properties
+import bigframes.dtypes
+import bigframes.exceptions as bfe
+import bigframes.features
+from bigframes.session import executor, read_api_execution
+import bigframes.session._io.bigquery as bq_io
+import bigframes.session.metrics
+import bigframes.session.planner
+import bigframes.session.temporary_storage
+
+# Max complexity that should be executed as a single query
+QUERY_COMPLEXITY_LIMIT = 1e7
+# Number of times to factor out subqueries before giving up.
+MAX_SUBTREE_FACTORINGS = 5
+_MAX_CLUSTER_COLUMNS = 4
+MAX_SMALL_RESULT_BYTES = 10 * 1024 * 1024 * 1024  # 10G
+
+
+class BigQueryCachingExecutor(executor.Executor):
+    """Computes BigFrames values using BigQuery Engine.
+
+    This executor can cache expressions. If those expressions are executed later, this session
+    will re-use the pre-existing results from previous executions.
+
+    This class is not thread-safe.
+    """
+
+    def __init__(
+        self,
+        bqclient: bigquery.Client,
+        storage_manager: bigframes.session.temporary_storage.TemporaryStorageManager,
+        bqstoragereadclient: google.cloud.bigquery_storage_v1.BigQueryReadClient,
+        *,
+        strictly_ordered: bool = True,
+        metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None,
+    ):
+        self.bqclient = bqclient
+        self.storage_manager = storage_manager
+        self.compiler: bigframes.core.compile.SQLCompiler = (
+            bigframes.core.compile.SQLCompiler()
+        )
+        self.strictly_ordered: bool = strictly_ordered
+        self._cached_executions: weakref.WeakKeyDictionary[
+            nodes.BigFrameNode, nodes.BigFrameNode
+        ] = weakref.WeakKeyDictionary()
+        self.metrics = metrics
+        self.bqstoragereadclient = bqstoragereadclient
+        # Simple left-to-right precedence for now
+        self._semi_executors = (
+            read_api_execution.ReadApiSemiExecutor(
+                bqstoragereadclient=bqstoragereadclient,
+                project=self.bqclient.project,
+            ),
+        )
+
+    def to_sql(
+        self,
+        array_value: bigframes.core.ArrayValue,
+        offset_column: Optional[str] = None,
+        ordered: bool = False,
+        enable_cache: bool = True,
+    ) -> str:
+        if offset_column:
+            array_value, _ = array_value.promote_offsets()
+        node = (
+            self.replace_cached_subtrees(array_value.node)
+            if enable_cache
+            else array_value.node
+        )
+        return self.compiler.compile(node, ordered=ordered)
+
+    def execute(
+        self,
+        array_value: bigframes.core.ArrayValue,
+        *,
+        ordered: bool = True,
+        use_explicit_destination: Optional[bool] = None,
+        page_size: Optional[int] = None,
+        max_results: Optional[int] = None,
+    ) -> executor.ExecuteResult:
+        if use_explicit_destination is None:
+            use_explicit_destination = bigframes.options.bigquery.allow_large_results
+
+        if bigframes.options.compute.enable_multi_query_execution:
+            self._simplify_with_caching(array_value)
+
+        plan = self.replace_cached_subtrees(array_value.node)
+        # Use explicit destination to avoid 10GB limit of temporary table
+        destination_table = (
+            self.storage_manager.create_temp_table(
+                array_value.schema.to_bigquery(), cluster_cols=[]
+            )
+            if use_explicit_destination
+            else None
+        )
+        return self._execute_plan(
+            plan,
+            ordered=ordered,
+            page_size=page_size,
+            max_results=max_results,
+            destination=destination_table,
+        )
+
+    def export_gbq(
+        self,
+        array_value: bigframes.core.ArrayValue,
+        destination: bigquery.TableReference,
+        if_exists: Literal["fail", "replace", "append"] = "fail",
+        cluster_cols: Sequence[str] = [],
+    ):
+        """
+        Export the ArrayValue to an existing BigQuery table.
+        """
+        if bigframes.options.compute.enable_multi_query_execution:
+            self._simplify_with_caching(array_value)
+
+        dispositions = {
+            "fail": bigquery.WriteDisposition.WRITE_EMPTY,
+            "replace": bigquery.WriteDisposition.WRITE_TRUNCATE,
+            "append": bigquery.WriteDisposition.WRITE_APPEND,
+        }
+        sql = self.to_sql(array_value, ordered=False)
+        job_config = bigquery.QueryJobConfig(
+            write_disposition=dispositions[if_exists],
+            destination=destination,
+            clustering_fields=cluster_cols if cluster_cols else None,
+        )
+        # TODO(swast): plumb through the api_name of the user-facing api that
+        # caused this query.
+        _, query_job = self._run_execute_query(
+            sql=sql,
+            job_config=job_config,
+        )
+
+        has_timedelta_col = any(
+            t == bigframes.dtypes.TIMEDELTA_DTYPE for t in array_value.schema.dtypes
+        )
+
+        if if_exists != "append" and has_timedelta_col:
+            # Only update schema if this is not modifying an existing table, and the
+            # new table contains timedelta columns.
+            table = self.bqclient.get_table(destination)
+            table.schema = array_value.schema.to_bigquery()
+            self.bqclient.update_table(table, ["schema"])
+
+        return query_job
+
+    def export_gcs(
+        self,
+        array_value: bigframes.core.ArrayValue,
+        uri: str,
+        format: Literal["json", "csv", "parquet"],
+        export_options: Mapping[str, Union[bool, str]],
+    ):
+        query_job = self.execute(
+            array_value,
+            ordered=False,
+            use_explicit_destination=True,
+        ).query_job
+        assert query_job is not None
+        result_table = query_job.destination
+        assert result_table is not None
+        export_data_statement = bq_io.create_export_data_statement(
+            f"{result_table.project}.{result_table.dataset_id}.{result_table.table_id}",
+            uri=uri,
+            format=format,
+            export_options=dict(export_options),
+        )
+
+        bq_io.start_query_with_client(
+            self.bqclient,
+            export_data_statement,
+            job_config=bigquery.QueryJobConfig(),
+            api_name=f"dataframe-to_{format.lower()}",
+            metrics=self.metrics,
+        )
+        return query_job
+
+    def dry_run(
+        self, array_value: bigframes.core.ArrayValue, ordered: bool = True
+    ) -> bigquery.QueryJob:
+        sql = self.to_sql(array_value, ordered=ordered)
+        job_config = bigquery.QueryJobConfig(dry_run=True)
+        query_job = self.bqclient.query(sql, job_config=job_config)
+        return query_job
+
+    def peek(
+        self,
+        array_value: bigframes.core.ArrayValue,
+        n_rows: int,
+        use_explicit_destination: Optional[bool] = None,
+    ) -> executor.ExecuteResult:
+        """
+        A 'peek' efficiently accesses a small number of rows in the dataframe.
+        """
+        plan = self.replace_cached_subtrees(array_value.node)
+        if not tree_properties.can_fast_peek(plan):
+            msg = bfe.format_message("Peeking this value cannot be done efficiently.")
+            warnings.warn(msg)
+        if use_explicit_destination is None:
+            use_explicit_destination = bigframes.options.bigquery.allow_large_results
+
+        destination_table = (
+            self.storage_manager.create_temp_table(
+                array_value.schema.to_bigquery(), cluster_cols=[]
+            )
+            if use_explicit_destination
+            else None
+        )
+
+        return self._execute_plan(
+            plan, ordered=False, destination=destination_table, peek=n_rows
+        )
+
+    def head(
+        self, array_value: bigframes.core.ArrayValue, n_rows: int
+    ) -> executor.ExecuteResult:
+
+        maybe_row_count = self._local_get_row_count(array_value)
+        if (maybe_row_count is not None) and (maybe_row_count <= n_rows):
+            return self.execute(array_value, ordered=True)
+
+        if not self.strictly_ordered and not array_value.node.explicitly_ordered:
+            # No user-provided ordering, so just get any N rows, its faster!
+            return self.peek(array_value, n_rows)
+
+        plan = self.replace_cached_subtrees(array_value.node)
+        if not tree_properties.can_fast_head(plan):
+            # If can't get head fast, we are going to need to execute the whole query
+            # Will want to do this in a way such that the result is reusable, but the first
+            # N values can be easily extracted.
+            # This currently requires clustering on offsets.
+            self._cache_with_offsets(array_value)
+            # Get a new optimized plan after caching
+            plan = self.replace_cached_subtrees(array_value.node)
+            assert tree_properties.can_fast_head(plan)
+
+        head_plan = generate_head_plan(plan, n_rows)
+        return self._execute_plan(head_plan, ordered=True)
+
+    def get_row_count(self, array_value: bigframes.core.ArrayValue) -> int:
+        # TODO: Fold row count node in and use local execution
+        count = self._local_get_row_count(array_value)
+        if count is not None:
+            return count
+        else:
+            row_count_plan = self.replace_cached_subtrees(
+                generate_row_count_plan(array_value.node)
+            )
+            results = self._execute_plan(row_count_plan, ordered=True)
+            pa_table = next(results.arrow_batches())
+            pa_array = pa_table.column(0)
+            return pa_array.tolist()[0]
+
+    def cached(
+        self,
+        array_value: bigframes.core.ArrayValue,
+        *,
+        force: bool = False,
+        use_session: bool = False,
+        cluster_cols: Sequence[str] = (),
+    ) -> None:
+        """Write the block to a session table."""
+        # use a heuristic for whether something needs to be cached
+        if (not force) and self._is_trivially_executable(array_value):
+            return
+        if use_session:
+            self._cache_with_session_awareness(array_value)
+        else:
+            self._cache_with_cluster_cols(array_value, cluster_cols=cluster_cols)
+
+    def _local_get_row_count(
+        self, array_value: bigframes.core.ArrayValue
+    ) -> Optional[int]:
+        # optimized plan has cache materializations which will have row count metadata
+        # that is more likely to be usable than original leaf nodes.
+        plan = self.replace_cached_subtrees(array_value.node)
+        return tree_properties.row_count(plan)
+
+    # Helpers
+    def _run_execute_query(
+        self,
+        sql: str,
+        job_config: Optional[bq_job.QueryJobConfig] = None,
+        api_name: Optional[str] = None,
+        page_size: Optional[int] = None,
+        max_results: Optional[int] = None,
+        query_with_job: bool = True,
+    ) -> Tuple[bq_table.RowIterator, Optional[bigquery.QueryJob]]:
+        """
+        Starts BigQuery query job and waits for results.
+        """
+        job_config = bq_job.QueryJobConfig() if job_config is None else job_config
+        if bigframes.options.compute.maximum_bytes_billed is not None:
+            job_config.maximum_bytes_billed = (
+                bigframes.options.compute.maximum_bytes_billed
+            )
+
+        if not self.strictly_ordered:
+            job_config.labels["bigframes-mode"] = "unordered"
+
+        try:
+            iterator, query_job = bq_io.start_query_with_client(
+                self.bqclient,
+                sql,
+                job_config=job_config,
+                api_name=api_name,
+                max_results=max_results,
+                page_size=page_size,
+                metrics=self.metrics,
+                query_with_job=query_with_job,
+            )
+            return iterator, query_job
+
+        except google.api_core.exceptions.BadRequest as e:
+            # Unfortunately, this error type does not have a separate error code or exception type
+            if "Resources exceeded during query execution" in e.message:
+                new_message = "Computation is too complex to execute as a single query. Try using DataFrame.cache() on intermediate results, or setting bigframes.options.compute.enable_multi_query_execution."
+                raise bigframes.exceptions.QueryComplexityError(new_message) from e
+            else:
+                raise
+
+    def replace_cached_subtrees(self, node: nodes.BigFrameNode) -> nodes.BigFrameNode:
+        return nodes.top_down(node, lambda x: self._cached_executions.get(x, x))
+
+    def _is_trivially_executable(self, array_value: bigframes.core.ArrayValue):
+        """
+        Can the block be evaluated very cheaply?
+        If True, the array_value probably is not worth caching.
+        """
+        # Once rewriting is available, will want to rewrite before
+        # evaluating execution cost.
+        return tree_properties.is_trivially_executable(
+            self.replace_cached_subtrees(array_value.node)
+        )
+
+    def _cache_with_cluster_cols(
+        self, array_value: bigframes.core.ArrayValue, cluster_cols: Sequence[str]
+    ):
+        """Executes the query and uses the resulting table to rewrite future executions."""
+
+        sql, schema, ordering_info = self.compiler.compile_raw(
+            self.replace_cached_subtrees(array_value.node)
+        )
+        tmp_table = self._sql_as_cached_temp_table(
+            sql,
+            schema,
+            cluster_cols=bq_io.select_cluster_cols(schema, cluster_cols),
+        )
+        cached_replacement = array_value.as_cached(
+            cache_table=self.bqclient.get_table(tmp_table),
+            ordering=ordering_info,
+        ).node
+        self._cached_executions[array_value.node] = cached_replacement
+
+    def _cache_with_offsets(self, array_value: bigframes.core.ArrayValue):
+        """Executes the query and uses the resulting table to rewrite future executions."""
+        offset_column = bigframes.core.guid.generate_guid("bigframes_offsets")
+        w_offsets, offset_column = array_value.promote_offsets()
+        sql = self.compiler.compile(
+            self.replace_cached_subtrees(w_offsets.node), ordered=False
+        )
+
+        tmp_table = self._sql_as_cached_temp_table(
+            sql,
+            w_offsets.schema.to_bigquery(),
+            cluster_cols=[offset_column],
+        )
+        cached_replacement = array_value.as_cached(
+            cache_table=self.bqclient.get_table(tmp_table),
+            ordering=order.TotalOrdering.from_offset_col(offset_column),
+        ).node
+        self._cached_executions[array_value.node] = cached_replacement
+
+    def _cache_with_session_awareness(
+        self,
+        array_value: bigframes.core.ArrayValue,
+    ) -> None:
+        session_forest = [obj._block._expr.node for obj in array_value.session.objects]
+        # These node types are cheap to re-compute
+        target, cluster_cols = bigframes.session.planner.session_aware_cache_plan(
+            array_value.node, list(session_forest)
+        )
+        cluster_cols_sql_names = [id.sql for id in cluster_cols]
+        if len(cluster_cols) > 0:
+            self._cache_with_cluster_cols(
+                bigframes.core.ArrayValue(target), cluster_cols_sql_names
+            )
+        elif self.strictly_ordered:
+            self._cache_with_offsets(bigframes.core.ArrayValue(target))
+        else:
+            self._cache_with_cluster_cols(bigframes.core.ArrayValue(target), [])
+
+    def _simplify_with_caching(self, array_value: bigframes.core.ArrayValue):
+        """Attempts to handle the complexity by caching duplicated subtrees and breaking the query into pieces."""
+        # Apply existing caching first
+        for _ in range(MAX_SUBTREE_FACTORINGS):
+            node_with_cache = self.replace_cached_subtrees(array_value.node)
+            if node_with_cache.planning_complexity < QUERY_COMPLEXITY_LIMIT:
+                return
+
+            did_cache = self._cache_most_complex_subtree(array_value.node)
+            if not did_cache:
+                return
+
+    def _cache_most_complex_subtree(self, node: nodes.BigFrameNode) -> bool:
+        # TODO: If query fails, retry with lower complexity limit
+        selection = tree_properties.select_cache_target(
+            node,
+            min_complexity=(QUERY_COMPLEXITY_LIMIT / 500),
+            max_complexity=QUERY_COMPLEXITY_LIMIT,
+            cache=dict(self._cached_executions),
+            # Heuristic: subtree_compleixty * (copies of subtree)^2
+            heuristic=lambda complexity, count: math.log(complexity)
+            + 2 * math.log(count),
+        )
+        if selection is None:
+            # No good subtrees to cache, just return original tree
+            return False
+
+        self._cache_with_cluster_cols(bigframes.core.ArrayValue(selection), [])
+        return True
+
+    def _sql_as_cached_temp_table(
+        self,
+        sql: str,
+        schema: Sequence[bigquery.SchemaField],
+        cluster_cols: Sequence[str],
+    ) -> bigquery.TableReference:
+        assert len(cluster_cols) <= _MAX_CLUSTER_COLUMNS
+        temp_table = self.storage_manager.create_temp_table(schema, cluster_cols)
+
+        # TODO: Get default job config settings
+        job_config = cast(
+            bigquery.QueryJobConfig,
+            bigquery.QueryJobConfig.from_api_repr({}),
+        )
+        job_config.destination = temp_table
+        _, query_job = self._run_execute_query(
+            sql,
+            job_config=job_config,
+            api_name="cached",
+        )
+        assert query_job is not None
+        query_job.result()
+        return query_job.destination
+
+    def _validate_result_schema(
+        self,
+        array_value: bigframes.core.ArrayValue,
+        bq_schema: list[bigquery.SchemaField],
+    ):
+        actual_schema = _sanitize(tuple(bq_schema))
+        ibis_schema = bigframes.core.compile.test_only_ibis_inferred_schema(
+            self.replace_cached_subtrees(array_value.node)
+        ).to_bigquery()
+        internal_schema = _sanitize(array_value.schema.to_bigquery())
+        if not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable:
+            return
+
+        if internal_schema != actual_schema:
+            raise ValueError(
+                f"This error should only occur while testing. BigFrames internal schema: {internal_schema} does not match actual schema: {actual_schema}"
+            )
+
+        if ibis_schema != actual_schema:
+            raise ValueError(
+                f"This error should only occur while testing. Ibis schema: {ibis_schema} does not match actual schema: {actual_schema}"
+            )
+
+    def _execute_plan(
+        self,
+        plan: nodes.BigFrameNode,
+        ordered: bool,
+        page_size: Optional[int] = None,
+        max_results: Optional[int] = None,
+        destination: Optional[bq_table.TableReference] = None,
+        peek: Optional[int] = None,
+    ):
+        """Just execute whatever plan as is, without further caching or decomposition."""
+
+        # First try to execute fast-paths
+        # TODO: Allow page_size and max_results by rechunking/truncating results
+        if (not page_size) and (not max_results) and (not destination) and (not peek):
+            for semi_executor in self._semi_executors:
+                maybe_result = semi_executor.execute(plan, ordered=ordered)
+                if maybe_result:
+                    return maybe_result
+
+        # TODO(swast): plumb through the api_name of the user-facing api that
+        # caused this query.
+        job_config = bigquery.QueryJobConfig()
+        # Use explicit destination to avoid 10GB limit of temporary table
+        if destination is not None:
+            job_config.destination = destination
+        sql = self.compiler.compile(plan, ordered=ordered, limit=peek)
+        iterator, query_job = self._run_execute_query(
+            sql=sql,
+            job_config=job_config,
+            page_size=page_size,
+            max_results=max_results,
+            query_with_job=(destination is not None),
+        )
+
+        # Though we provide the read client, iterator may or may not use it based on what is efficient for the result
+        def iterator_supplier():
+            # Workaround issue fixed by: https://github.com/googleapis/python-bigquery/pull/2154
+            if iterator._page_size is not None or iterator.max_results is not None:
+                return iterator.to_arrow_iterable(bqstorage_client=None)
+            else:
+                return iterator.to_arrow_iterable(
+                    bqstorage_client=self.bqstoragereadclient
+                )
+
+        if query_job:
+            size_bytes = self.bqclient.get_table(query_job.destination).num_bytes
+        else:
+            size_bytes = None
+
+        if size_bytes is not None and size_bytes >= MAX_SMALL_RESULT_BYTES:
+            msg = bfe.format_message(
+                "The query result size has exceeded 10 GB. In BigFrames 2.0 and "
+                "later, you might need to manually set `allow_large_results=True` in "
+                "the IO method or adjust the BigFrames option: "
+                "`bigframes.options.bigquery.allow_large_results=True`."
+            )
+            warnings.warn(msg, FutureWarning)
+        # Runs strict validations to ensure internal type predictions and ibis are completely in sync
+        # Do not execute these validations outside of testing suite.
+        if "PYTEST_CURRENT_TEST" in os.environ:
+            self._validate_result_schema(
+                bigframes.core.ArrayValue(plan), iterator.schema
+            )
+
+        return executor.ExecuteResult(
+            arrow_batches=iterator_supplier,
+            schema=plan.schema,
+            query_job=query_job,
+            total_bytes=size_bytes,
+            total_rows=iterator.total_rows,
+        )
+
+
+def _sanitize(
+    schema: Tuple[bigquery.SchemaField, ...]
+) -> Tuple[bigquery.SchemaField, ...]:
+    # Schema inferred from SQL strings and Ibis expressions contain only names, types and modes,
+    # so we disregard other fields (e.g timedelta description for timedelta columns) for validations.
+    return tuple(
+        bigquery.SchemaField(
+            f.name,
+            f.field_type,
+            f.mode,  # type:ignore
+            fields=_sanitize(f.fields),
+        )
+        for f in schema
+    )
+
+
+def generate_head_plan(node: nodes.BigFrameNode, n: int):
+    return nodes.SliceNode(node, start=None, stop=n)
+
+
+def generate_row_count_plan(node: nodes.BigFrameNode):
+    return nodes.RowCountNode(node)
diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py
index 2b24b6cb8b..86be8bd897 100644
--- a/bigframes/session/clients.py
+++ b/bigframes/session/clients.py
@@ -17,7 +17,6 @@
 import os
 import typing
 from typing import Optional
-import warnings
 
 import google.api_core.client_info
 import google.api_core.client_options
@@ -32,7 +31,6 @@
 import pydata_google_auth
 
 import bigframes.constants
-import bigframes.exceptions as bfe
 import bigframes.version
 
 from . import environment
@@ -43,16 +41,11 @@
 
 
 # BigQuery is a REST API, which requires the protocol as part of the URL.
-_BIGQUERY_LOCATIONAL_ENDPOINT = "https://{location}-bigquery.googleapis.com"
 _BIGQUERY_REGIONAL_ENDPOINT = "https://bigquery.{location}.rep.googleapis.com"
 
 # BigQuery Connection and Storage are gRPC APIs, which don't support the
 # https:// protocol in the API endpoint URL.
-_BIGQUERYCONNECTION_LOCATIONAL_ENDPOINT = "{location}-bigqueryconnection.googleapis.com"
-_BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT = "{location}-bigquerystorage.googleapis.com"
-_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = (
-    "https://bigquerystorage.{location}.rep.googleapis.com"
-)
+_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "bigquerystorage.{location}.rep.googleapis.com"
 
 
 def _get_default_credentials_with_project():
@@ -114,19 +107,18 @@ def __init__(
         )
         self._project = project
 
-        if (
-            use_regional_endpoints
-            and location is not None
-            and location.lower()
-            not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
-        ):
-            msg = bfe.format_message(
-                bigframes.constants.LEP_DEPRECATION_WARNING_MESSAGE.format(
-                    location=location
-                ),
-                fill=False,
-            )
-            warnings.warn(msg, category=FutureWarning)
+        if use_regional_endpoints:
+            if location is None:
+                raise ValueError(bigframes.constants.LOCATION_NEEDED_FOR_REP_MESSAGE)
+            elif (
+                location.lower()
+                not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
+            ):
+                raise ValueError(
+                    bigframes.constants.REP_NOT_SUPPORTED_MESSAGE.format(
+                        location=location
+                    )
+                )
         self._location = location
         self._use_regional_endpoints = use_regional_endpoints
 
@@ -156,16 +148,8 @@ def _create_bigquery_client(self):
                 api_endpoint=self._client_endpoints_override["bqclient"]
             )
         elif self._use_regional_endpoints:
-            endpoint_template = _BIGQUERY_REGIONAL_ENDPOINT
-            if (
-                self._location is not None
-                and self._location.lower()
-                not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
-            ):
-                endpoint_template = _BIGQUERY_LOCATIONAL_ENDPOINT
-
             bq_options = google.api_core.client_options.ClientOptions(
-                api_endpoint=endpoint_template.format(location=self._location)
+                api_endpoint=_BIGQUERY_REGIONAL_ENDPOINT.format(location=self._location)
             )
 
         bq_info = google.api_core.client_info.ClientInfo(
@@ -212,12 +196,6 @@ def bqconnectionclient(self):
                 bqconnection_options = google.api_core.client_options.ClientOptions(
                     api_endpoint=self._client_endpoints_override["bqconnectionclient"]
                 )
-            elif self._use_regional_endpoints:
-                bqconnection_options = google.api_core.client_options.ClientOptions(
-                    api_endpoint=_BIGQUERYCONNECTION_LOCATIONAL_ENDPOINT.format(
-                        location=self._location
-                    )
-                )
 
             bqconnection_info = google.api_core.gapic_v1.client_info.ClientInfo(
                 user_agent=self._application_name
@@ -241,16 +219,10 @@ def bqstoragereadclient(self):
                     api_endpoint=self._client_endpoints_override["bqstoragereadclient"]
                 )
             elif self._use_regional_endpoints:
-                endpoint_template = _BIGQUERYSTORAGE_REGIONAL_ENDPOINT
-                if (
-                    self._location is not None
-                    and self._location.lower()
-                    not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
-                ):
-                    endpoint_template = _BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT
-
                 bqstorage_options = google.api_core.client_options.ClientOptions(
-                    api_endpoint=endpoint_template.format(location=self._location)
+                    api_endpoint=_BIGQUERYSTORAGE_REGIONAL_ENDPOINT.format(
+                        location=self._location
+                    )
                 )
 
             bqstorage_info = google.api_core.gapic_v1.client_info.ClientInfo(
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index 07645c2a98..4c27c25058 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -16,53 +16,13 @@
 
 import abc
 import dataclasses
-import math
-import os
-from typing import (
-    Callable,
-    cast,
-    Iterator,
-    Literal,
-    Mapping,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-)
-import warnings
-import weakref
+from typing import Callable, Iterator, Literal, Mapping, Optional, Sequence, Union
 
-import google.api_core.exceptions
 from google.cloud import bigquery
-import google.cloud.bigquery.job as bq_job
-import google.cloud.bigquery.table as bq_table
-import google.cloud.bigquery_storage_v1
 import pyarrow
 
 import bigframes.core
-import bigframes.core.compile
-import bigframes.core.guid
-import bigframes.core.identifiers
-import bigframes.core.nodes as nodes
-import bigframes.core.ordering as order
 import bigframes.core.schema
-import bigframes.core.tree_properties as tree_properties
-import bigframes.dtypes
-import bigframes.exceptions as bfe
-import bigframes.features
-import bigframes.session._io.bigquery as bq_io
-import bigframes.session.metrics
-import bigframes.session.planner
-import bigframes.session.temp_storage
-
-# Max complexity that should be executed as a single query
-QUERY_COMPLEXITY_LIMIT = 1e7
-# Number of times to factor out subqueries before giving up.
-MAX_SUBTREE_FACTORINGS = 5
-_MAX_CLUSTER_COLUMNS = 4
-# TODO: b/338258028 Enable pruning to reduce text size.
-ENABLE_PRUNING = False
-MAX_SMALL_RESULT_BYTES = 10 * 1024 * 1024 * 1024  # 10G
 
 
 @dataclasses.dataclass(frozen=True)
@@ -181,532 +141,3 @@ def cached(
         cluster_cols: Sequence[str] = (),
     ) -> None:
         raise NotImplementedError("cached not implemented for this executor")
-
-
-class BigQueryCachingExecutor(Executor):
-    """Computes BigFrames values using BigQuery Engine.
-
-    This executor can cache expressions. If those expressions are executed later, this session
-    will re-use the pre-existing results from previous executions.
-
-    This class is not thread-safe.
-    """
-
-    def __init__(
-        self,
-        bqclient: bigquery.Client,
-        storage_manager: bigframes.session.temp_storage.AnonymousDatasetManager,
-        bqstoragereadclient: google.cloud.bigquery_storage_v1.BigQueryReadClient,
-        *,
-        strictly_ordered: bool = True,
-        metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None,
-    ):
-        self.bqclient = bqclient
-        self.storage_manager = storage_manager
-        self.compiler: bigframes.core.compile.SQLCompiler = (
-            bigframes.core.compile.SQLCompiler()
-        )
-        self.strictly_ordered: bool = strictly_ordered
-        self._cached_executions: weakref.WeakKeyDictionary[
-            nodes.BigFrameNode, nodes.BigFrameNode
-        ] = weakref.WeakKeyDictionary()
-        self.metrics = metrics
-        self.bqstoragereadclient = bqstoragereadclient
-
-    def to_sql(
-        self,
-        array_value: bigframes.core.ArrayValue,
-        offset_column: Optional[str] = None,
-        ordered: bool = False,
-        enable_cache: bool = True,
-    ) -> str:
-        if offset_column:
-            array_value, internal_offset_col = array_value.promote_offsets()
-        node = (
-            self.replace_cached_subtrees(array_value.node)
-            if enable_cache
-            else array_value.node
-        )
-        return self.compiler.compile(node, ordered=ordered)
-
-    def execute(
-        self,
-        array_value: bigframes.core.ArrayValue,
-        *,
-        ordered: bool = True,
-        use_explicit_destination: Optional[bool] = None,
-        page_size: Optional[int] = None,
-        max_results: Optional[int] = None,
-    ):
-        if use_explicit_destination is None:
-            use_explicit_destination = bigframes.options.bigquery.allow_large_results
-
-        if bigframes.options.compute.enable_multi_query_execution:
-            self._simplify_with_caching(array_value)
-
-        sql = self.to_sql(array_value, ordered=ordered)
-        job_config = bigquery.QueryJobConfig()
-        # Use explicit destination to avoid 10GB limit of temporary table
-        if use_explicit_destination:
-            destination_table = self.storage_manager.allocate_and_create_temp_table(
-                array_value.schema.to_bigquery(), cluster_cols=[]
-            )
-            job_config.destination = destination_table
-        # TODO(swast): plumb through the api_name of the user-facing api that
-        # caused this query.
-        iterator, query_job = self._run_execute_query(
-            sql=sql,
-            job_config=job_config,
-            page_size=page_size,
-            max_results=max_results,
-            query_with_job=use_explicit_destination,
-        )
-
-        # Though we provide the read client, iterator may or may not use it based on what is efficient for the result
-        def iterator_supplier():
-            return iterator.to_arrow_iterable(bqstorage_client=self.bqstoragereadclient)
-
-        if query_job:
-            size_bytes = self.bqclient.get_table(query_job.destination).num_bytes
-        else:
-            size_bytes = None
-
-        if size_bytes is not None and size_bytes >= MAX_SMALL_RESULT_BYTES:
-            msg = bfe.format_message(
-                "The query result size has exceeded 10 GB. In BigFrames 2.0 and "
-                "later, you might need to manually set `allow_large_results=True` in "
-                "the IO method or adjust the BigFrames option: "
-                "`bigframes.options.bigquery.allow_large_results=True`."
-            )
-            warnings.warn(msg, FutureWarning)
-        # Runs strict validations to ensure internal type predictions and ibis are completely in sync
-        # Do not execute these validations outside of testing suite.
-        if "PYTEST_CURRENT_TEST" in os.environ:
-            self._validate_result_schema(array_value, iterator.schema)
-
-        return ExecuteResult(
-            arrow_batches=iterator_supplier,
-            schema=array_value.schema,
-            query_job=query_job,
-            total_bytes=size_bytes,
-            total_rows=iterator.total_rows,
-        )
-
-    def export_gbq(
-        self,
-        array_value: bigframes.core.ArrayValue,
-        destination: bigquery.TableReference,
-        if_exists: Literal["fail", "replace", "append"] = "fail",
-        cluster_cols: Sequence[str] = [],
-    ):
-        """
-        Export the ArrayValue to an existing BigQuery table.
-        """
-        if bigframes.options.compute.enable_multi_query_execution:
-            self._simplify_with_caching(array_value)
-
-        dispositions = {
-            "fail": bigquery.WriteDisposition.WRITE_EMPTY,
-            "replace": bigquery.WriteDisposition.WRITE_TRUNCATE,
-            "append": bigquery.WriteDisposition.WRITE_APPEND,
-        }
-        sql = self.to_sql(array_value, ordered=False)
-        job_config = bigquery.QueryJobConfig(
-            write_disposition=dispositions[if_exists],
-            destination=destination,
-            clustering_fields=cluster_cols if cluster_cols else None,
-        )
-        # TODO(swast): plumb through the api_name of the user-facing api that
-        # caused this query.
-        _, query_job = self._run_execute_query(
-            sql=sql,
-            job_config=job_config,
-        )
-
-        has_timedelta_col = any(
-            t == bigframes.dtypes.TIMEDELTA_DTYPE for t in array_value.schema.dtypes
-        )
-
-        if if_exists != "append" and has_timedelta_col:
-            # Only update schema if this is not modifying an existing table, and the
-            # new table contains timedelta columns.
-            table = self.bqclient.get_table(destination)
-            table.schema = array_value.schema.to_bigquery()
-            self.bqclient.update_table(table, ["schema"])
-
-        return query_job
-
-    def export_gcs(
-        self,
-        array_value: bigframes.core.ArrayValue,
-        uri: str,
-        format: Literal["json", "csv", "parquet"],
-        export_options: Mapping[str, Union[bool, str]],
-    ):
-        query_job = self.execute(
-            array_value,
-            ordered=False,
-            use_explicit_destination=True,
-        ).query_job
-        result_table = query_job.destination
-        export_data_statement = bq_io.create_export_data_statement(
-            f"{result_table.project}.{result_table.dataset_id}.{result_table.table_id}",
-            uri=uri,
-            format=format,
-            export_options=dict(export_options),
-        )
-
-        bq_io.start_query_with_client(
-            self.bqclient,
-            export_data_statement,
-            job_config=bigquery.QueryJobConfig(),
-            api_name=f"dataframe-to_{format.lower()}",
-            metrics=self.metrics,
-        )
-        return query_job
-
-    def dry_run(
-        self, array_value: bigframes.core.ArrayValue, ordered: bool = True
-    ) -> bigquery.QueryJob:
-        sql = self.to_sql(array_value, ordered=ordered)
-        job_config = bigquery.QueryJobConfig(dry_run=True)
-        query_job = self.bqclient.query(sql, job_config=job_config)
-        return query_job
-
-    def peek(
-        self,
-        array_value: bigframes.core.ArrayValue,
-        n_rows: int,
-        use_explicit_destination: Optional[bool] = None,
-    ) -> ExecuteResult:
-        """
-        A 'peek' efficiently accesses a small number of rows in the dataframe.
-        """
-        plan = self.replace_cached_subtrees(array_value.node)
-        if not tree_properties.can_fast_peek(plan):
-            msg = bfe.format_message("Peeking this value cannot be done efficiently.")
-            warnings.warn(msg)
-        if use_explicit_destination is None:
-            use_explicit_destination = bigframes.options.bigquery.allow_large_results
-
-        job_config = bigquery.QueryJobConfig()
-        # Use explicit destination to avoid 10GB limit of temporary table
-        if use_explicit_destination:
-            destination_table = self.storage_manager.allocate_and_create_temp_table(
-                array_value.schema.to_bigquery(), cluster_cols=[]
-            )
-            job_config.destination = destination_table
-
-        sql = self.compiler.compile(plan, ordered=False, limit=n_rows)
-
-        # TODO(swast): plumb through the api_name of the user-facing api that
-        # caused this query.
-        iterator, query_job = self._run_execute_query(
-            sql=sql, job_config=job_config, query_with_job=use_explicit_destination
-        )
-        return ExecuteResult(
-            # Probably don't need read client for small peek results, but let client decide
-            arrow_batches=lambda: iterator.to_arrow_iterable(
-                bqstorage_client=self.bqstoragereadclient
-            ),
-            schema=array_value.schema,
-            query_job=query_job,
-            total_rows=iterator.total_rows,
-        )
-
-    def head(
-        self, array_value: bigframes.core.ArrayValue, n_rows: int
-    ) -> ExecuteResult:
-
-        maybe_row_count = self._local_get_row_count(array_value)
-        if (maybe_row_count is not None) and (maybe_row_count <= n_rows):
-            return self.execute(array_value, ordered=True)
-
-        if not self.strictly_ordered and not array_value.node.explicitly_ordered:
-            # No user-provided ordering, so just get any N rows, its faster!
-            return self.peek(array_value, n_rows)
-
-        plan = self.replace_cached_subtrees(array_value.node)
-        if not tree_properties.can_fast_head(plan):
-            # If can't get head fast, we are going to need to execute the whole query
-            # Will want to do this in a way such that the result is reusable, but the first
-            # N values can be easily extracted.
-            # This currently requires clustering on offsets.
-            self._cache_with_offsets(array_value)
-            # Get a new optimized plan after caching
-            plan = self.replace_cached_subtrees(array_value.node)
-            assert tree_properties.can_fast_head(plan)
-
-        head_plan = generate_head_plan(plan, n_rows)
-        sql = self.compiler.compile(head_plan)
-
-        # TODO(swast): plumb through the api_name of the user-facing api that
-        # caused this query.
-        iterator, query_job = self._run_execute_query(sql=sql)
-        return ExecuteResult(
-            # Probably don't need read client for small head results, but let client decide
-            arrow_batches=lambda: iterator.to_arrow_iterable(
-                bqstorage_client=self.bqstoragereadclient
-            ),
-            schema=array_value.schema,
-            query_job=query_job,
-            total_rows=iterator.total_rows,
-        )
-
-    def get_row_count(self, array_value: bigframes.core.ArrayValue) -> int:
-        count = self._local_get_row_count(array_value)
-        if count is not None:
-            return count
-        else:
-            row_count_plan = self.replace_cached_subtrees(
-                generate_row_count_plan(array_value.node)
-            )
-            sql = self.compiler.compile(row_count_plan, ordered=False)
-            iter, _ = self._run_execute_query(sql, query_with_job=False)
-            return next(iter)[0]
-
-    def cached(
-        self,
-        array_value: bigframes.core.ArrayValue,
-        *,
-        force: bool = False,
-        use_session: bool = False,
-        cluster_cols: Sequence[str] = (),
-    ) -> None:
-        """Write the block to a session table."""
-        # use a heuristic for whether something needs to be cached
-        if (not force) and self._is_trivially_executable(array_value):
-            return
-        if use_session:
-            self._cache_with_session_awareness(array_value)
-        else:
-            self._cache_with_cluster_cols(array_value, cluster_cols=cluster_cols)
-
-    def _local_get_row_count(
-        self, array_value: bigframes.core.ArrayValue
-    ) -> Optional[int]:
-        # optimized plan has cache materializations which will have row count metadata
-        # that is more likely to be usable than original leaf nodes.
-        plan = self.replace_cached_subtrees(array_value.node)
-        return tree_properties.row_count(plan)
-
-    # Helpers
-    def _run_execute_query(
-        self,
-        sql: str,
-        job_config: Optional[bq_job.QueryJobConfig] = None,
-        api_name: Optional[str] = None,
-        page_size: Optional[int] = None,
-        max_results: Optional[int] = None,
-        query_with_job: bool = True,
-    ) -> Tuple[bq_table.RowIterator, Optional[bigquery.QueryJob]]:
-        """
-        Starts BigQuery query job and waits for results.
-        """
-        job_config = bq_job.QueryJobConfig() if job_config is None else job_config
-        if bigframes.options.compute.maximum_bytes_billed is not None:
-            job_config.maximum_bytes_billed = (
-                bigframes.options.compute.maximum_bytes_billed
-            )
-
-        if not self.strictly_ordered:
-            job_config.labels["bigframes-mode"] = "unordered"
-
-        # Note: add_and_trim_labels is global scope which may have unexpected effects
-        # Ensure no additional labels are added to job_config after this point,
-        # as `add_and_trim_labels` ensures the label count does not exceed 64.
-        bq_io.add_and_trim_labels(job_config, api_name=api_name)
-        try:
-            iterator, query_job = bq_io.start_query_with_client(
-                self.bqclient,
-                sql,
-                job_config=job_config,
-                api_name=api_name,
-                max_results=max_results,
-                page_size=page_size,
-                metrics=self.metrics,
-                query_with_job=query_with_job,
-            )
-            return iterator, query_job
-
-        except google.api_core.exceptions.BadRequest as e:
-            # Unfortunately, this error type does not have a separate error code or exception type
-            if "Resources exceeded during query execution" in e.message:
-                new_message = "Computation is too complex to execute as a single query. Try using DataFrame.cache() on intermediate results, or setting bigframes.options.compute.enable_multi_query_execution."
-                raise bigframes.exceptions.QueryComplexityError(new_message) from e
-            else:
-                raise
-
-    def replace_cached_subtrees(self, node: nodes.BigFrameNode) -> nodes.BigFrameNode:
-        return nodes.top_down(node, lambda x: self._cached_executions.get(x, x))
-
-    def _is_trivially_executable(self, array_value: bigframes.core.ArrayValue):
-        """
-        Can the block be evaluated very cheaply?
-        If True, the array_value probably is not worth caching.
-        """
-        # Once rewriting is available, will want to rewrite before
-        # evaluating execution cost.
-        return tree_properties.is_trivially_executable(
-            self.replace_cached_subtrees(array_value.node)
-        )
-
-    def _cache_with_cluster_cols(
-        self, array_value: bigframes.core.ArrayValue, cluster_cols: Sequence[str]
-    ):
-        """Executes the query and uses the resulting table to rewrite future executions."""
-
-        sql, schema, ordering_info = self.compiler.compile_raw(
-            self.replace_cached_subtrees(array_value.node)
-        )
-        tmp_table = self._sql_as_cached_temp_table(
-            sql,
-            schema,
-            cluster_cols=bq_io.select_cluster_cols(schema, cluster_cols),
-        )
-        cached_replacement = array_value.as_cached(
-            cache_table=self.bqclient.get_table(tmp_table),
-            ordering=ordering_info,
-        ).node
-        self._cached_executions[array_value.node] = cached_replacement
-
-    def _cache_with_offsets(self, array_value: bigframes.core.ArrayValue):
-        """Executes the query and uses the resulting table to rewrite future executions."""
-        offset_column = bigframes.core.guid.generate_guid("bigframes_offsets")
-        w_offsets, offset_column = array_value.promote_offsets()
-        sql = self.compiler.compile(
-            self.replace_cached_subtrees(w_offsets.node), ordered=False
-        )
-
-        tmp_table = self._sql_as_cached_temp_table(
-            sql,
-            w_offsets.schema.to_bigquery(),
-            cluster_cols=[offset_column],
-        )
-        cached_replacement = array_value.as_cached(
-            cache_table=self.bqclient.get_table(tmp_table),
-            ordering=order.TotalOrdering.from_offset_col(offset_column),
-        ).node
-        self._cached_executions[array_value.node] = cached_replacement
-
-    def _cache_with_session_awareness(
-        self,
-        array_value: bigframes.core.ArrayValue,
-    ) -> None:
-        session_forest = [obj._block._expr.node for obj in array_value.session.objects]
-        # These node types are cheap to re-compute
-        target, cluster_cols = bigframes.session.planner.session_aware_cache_plan(
-            array_value.node, list(session_forest)
-        )
-        cluster_cols_sql_names = [id.sql for id in cluster_cols]
-        if len(cluster_cols) > 0:
-            self._cache_with_cluster_cols(
-                bigframes.core.ArrayValue(target), cluster_cols_sql_names
-            )
-        elif self.strictly_ordered:
-            self._cache_with_offsets(bigframes.core.ArrayValue(target))
-        else:
-            self._cache_with_cluster_cols(bigframes.core.ArrayValue(target), [])
-
-    def _simplify_with_caching(self, array_value: bigframes.core.ArrayValue):
-        """Attempts to handle the complexity by caching duplicated subtrees and breaking the query into pieces."""
-        # Apply existing caching first
-        for _ in range(MAX_SUBTREE_FACTORINGS):
-            node_with_cache = self.replace_cached_subtrees(array_value.node)
-            if node_with_cache.planning_complexity < QUERY_COMPLEXITY_LIMIT:
-                return
-
-            did_cache = self._cache_most_complex_subtree(array_value.node)
-            if not did_cache:
-                return
-
-    def _cache_most_complex_subtree(self, node: nodes.BigFrameNode) -> bool:
-        # TODO: If query fails, retry with lower complexity limit
-        selection = tree_properties.select_cache_target(
-            node,
-            min_complexity=(QUERY_COMPLEXITY_LIMIT / 500),
-            max_complexity=QUERY_COMPLEXITY_LIMIT,
-            cache=dict(self._cached_executions),
-            # Heuristic: subtree_compleixty * (copies of subtree)^2
-            heuristic=lambda complexity, count: math.log(complexity)
-            + 2 * math.log(count),
-        )
-        if selection is None:
-            # No good subtrees to cache, just return original tree
-            return False
-
-        self._cache_with_cluster_cols(bigframes.core.ArrayValue(selection), [])
-        return True
-
-    def _sql_as_cached_temp_table(
-        self,
-        sql: str,
-        schema: Sequence[bigquery.SchemaField],
-        cluster_cols: Sequence[str],
-    ) -> bigquery.TableReference:
-        assert len(cluster_cols) <= _MAX_CLUSTER_COLUMNS
-        temp_table = self.storage_manager.allocate_and_create_temp_table(
-            schema, cluster_cols
-        )
-
-        # TODO: Get default job config settings
-        job_config = cast(
-            bigquery.QueryJobConfig,
-            bigquery.QueryJobConfig.from_api_repr({}),
-        )
-        job_config.destination = temp_table
-        _, query_job = self._run_execute_query(
-            sql,
-            job_config=job_config,
-            api_name="cached",
-        )
-        assert query_job is not None
-        query_job.result()
-        return query_job.destination
-
-    def _validate_result_schema(
-        self,
-        array_value: bigframes.core.ArrayValue,
-        bq_schema: list[bigquery.SchemaField],
-    ):
-        actual_schema = _sanitize(tuple(bq_schema))
-        ibis_schema = bigframes.core.compile.test_only_ibis_inferred_schema(
-            self.replace_cached_subtrees(array_value.node)
-        ).to_bigquery()
-        internal_schema = _sanitize(array_value.schema.to_bigquery())
-        if not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable:
-            return
-
-        if internal_schema != actual_schema:
-            raise ValueError(
-                f"This error should only occur while testing. BigFrames internal schema: {internal_schema} does not match actual schema: {actual_schema}"
-            )
-
-        if ibis_schema != actual_schema:
-            raise ValueError(
-                f"This error should only occur while testing. Ibis schema: {ibis_schema} does not match actual schema: {actual_schema}"
-            )
-
-
-def _sanitize(
-    schema: Tuple[bigquery.SchemaField, ...]
-) -> Tuple[bigquery.SchemaField, ...]:
-    # Schema inferred from SQL strings and Ibis expressions contain only names, types and modes,
-    # so we disregard other fields (e.g timedelta description for timedelta columns) for validations.
-    return tuple(
-        bigquery.SchemaField(
-            f.name,
-            f.field_type,
-            f.mode,  # type:ignore
-            fields=_sanitize(f.fields),
-        )
-        for f in schema
-    )
-
-
-def generate_head_plan(node: nodes.BigFrameNode, n: int):
-    return nodes.SliceNode(node, start=None, stop=n)
-
-
-def generate_row_count_plan(node: nodes.BigFrameNode):
-    return nodes.RowCountNode(node)
diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py
index 1296e9d1b3..bdcada6364 100644
--- a/bigframes/session/loader.py
+++ b/bigframes/session/loader.py
@@ -17,60 +17,62 @@
 import copy
 import dataclasses
 import datetime
+import io
 import itertools
 import os
 import typing
-from typing import Dict, Hashable, IO, Iterable, List, Optional, Sequence, Tuple, Union
+from typing import (
+    Dict,
+    Hashable,
+    IO,
+    Iterable,
+    List,
+    Literal,
+    Optional,
+    Sequence,
+    Tuple,
+)
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.io.gbq as third_party_pandas_gbq
 import google.api_core.exceptions
-import google.auth.credentials
 import google.cloud.bigquery as bigquery
 import google.cloud.bigquery.table
-import google.cloud.bigquery_connection_v1
-import google.cloud.bigquery_storage_v1
-import google.cloud.functions_v2
-import google.cloud.resourcemanager_v3
-import jellyfish
 import pandas
-import pandas_gbq.schema.pandas_to_bigquery  # type: ignore
+import pyarrow as pa
 
-import bigframes.clients
-import bigframes.constants
+from bigframes.core import local_data, utils
 import bigframes.core as core
 import bigframes.core.blocks as blocks
-import bigframes.core.compile
-import bigframes.core.expression as expression
-import bigframes.core.guid
-import bigframes.core.ordering
-import bigframes.core.pruning
 import bigframes.core.schema as schemata
-import bigframes.dataframe
 import bigframes.dtypes
-import bigframes.exceptions
 import bigframes.formatting_helpers as formatting_helpers
-import bigframes.operations
-import bigframes.operations.aggregations as agg_ops
 import bigframes.session._io.bigquery as bf_io_bigquery
 import bigframes.session._io.bigquery.read_gbq_table as bf_read_gbq_table
-import bigframes.session._io.pandas as bf_io_pandas
-import bigframes.session.clients
-import bigframes.session.executor
 import bigframes.session.metrics
-import bigframes.session.planner
-import bigframes.session.temp_storage
+import bigframes.session.temporary_storage
 import bigframes.session.time as session_time
-import bigframes.version
 
 # Avoid circular imports.
 if typing.TYPE_CHECKING:
-    import bigframes.core.indexes
     import bigframes.dataframe as dataframe
-    import bigframes.series
     import bigframes.session
 
-_MAX_CLUSTER_COLUMNS = 4
+_PLACEHOLDER_SCHEMA = (
+    google.cloud.bigquery.SchemaField("bf_loader_placeholder", "INTEGER"),
+)
+
+_LOAD_JOB_TYPE_OVERRIDES = {
+    # Json load jobs not supported yet: b/271321143
+    bigframes.dtypes.JSON_DTYPE: "STRING",
+    # Timedelta is emulated using integer in bq type system
+    bigframes.dtypes.TIMEDELTA_DTYPE: "INTEGER",
+}
+
+_STREAM_JOB_TYPE_OVERRIDES = {
+    # Timedelta is emulated using integer in bq type system
+    bigframes.dtypes.TIMEDELTA_DTYPE: "INTEGER",
+}
 
 
 def _to_index_cols(
@@ -87,6 +89,31 @@ def _to_index_cols(
     return index_cols
 
 
+def _check_column_duplicates(index_cols: Iterable[str], columns: Iterable[str]):
+    index_cols_list = list(index_cols) if index_cols is not None else []
+    columns_list = list(columns) if columns is not None else []
+    set_index = set(index_cols_list)
+    set_columns = set(columns_list)
+
+    if len(index_cols_list) > len(set_index):
+        raise ValueError(
+            "The 'index_col' argument contains duplicate names. "
+            "All column names specified in 'index_col' must be unique."
+        )
+
+    if len(columns_list) > len(set_columns):
+        raise ValueError(
+            "The 'columns' argument contains duplicate names. "
+            "All column names specified in 'columns' must be unique."
+        )
+
+    if not set_index.isdisjoint(set_columns):
+        raise ValueError(
+            "Found column names that exist in both 'index_col' and 'columns' arguments. "
+            "These arguments must specify distinct sets of columns."
+        )
+
+
 @dataclasses.dataclass
 class GbqDataLoader:
     """
@@ -115,7 +142,7 @@ def __init__(
         self,
         session: bigframes.session.Session,
         bqclient: bigquery.Client,
-        storage_manager: bigframes.session.temp_storage.AnonymousDatasetManager,
+        storage_manager: bigframes.session.temporary_storage.TemporaryStorageManager,
         default_index_type: bigframes.enums.DefaultIndexKind,
         scan_index_uniqueness: bool,
         force_total_order: bool,
@@ -135,136 +162,120 @@ def __init__(
         self._clock = session_time.BigQuerySyncedClock(bqclient)
         self._clock.sync()
 
-    def read_pandas_load_job(
-        self, pandas_dataframe: pandas.DataFrame, api_name: str
+    def read_pandas(
+        self,
+        pandas_dataframe: pandas.DataFrame,
+        method: Literal["load", "stream"],
+        api_name: str,
     ) -> dataframe.DataFrame:
-        import bigframes.dataframe as dataframe
+        # TODO: Push this into from_pandas, along with index flag
+        from bigframes import dataframe
 
-        df_and_labels = bf_io_pandas.pandas_to_bq_compatible(pandas_dataframe)
-        pandas_dataframe_copy = df_and_labels.df
-        new_idx_ids = pandas_dataframe_copy.index.names
-        ordering_col = df_and_labels.ordering_col
-
-        # TODO(https://github.com/googleapis/python-bigquery-pandas/issues/760):
-        # Once pandas-gbq can show a link to the running load job like
-        # bigframes does, switch to using pandas-gbq to load the
-        # bigquery-compatible pandas DataFrame.
-        schema: list[
-            bigquery.SchemaField
-        ] = pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(
-            pandas_dataframe_copy,
-            index=True,
+        val_cols, idx_cols = utils.get_standardized_ids(
+            pandas_dataframe.columns, pandas_dataframe.index.names, strict=True
+        )
+        prepared_df = pandas_dataframe.reset_index(drop=False).set_axis(
+            [*idx_cols, *val_cols], axis="columns"
         )
+        managed_data = local_data.ManagedArrowTable.from_pandas(prepared_df)
 
-        job_config = bigquery.LoadJobConfig()
-        job_config.schema = schema
+        if method == "load":
+            array_value = self.load_data(managed_data, api_name=api_name)
+        elif method == "stream":
+            array_value = self.stream_data(managed_data)
+        else:
+            raise ValueError(f"Unsupported read method {method}")
 
-        # TODO: Remove this. It's likely that the slower load job due to
-        # clustering doesn't improve speed of queries because pandas tables are
-        # small.
-        cluster_cols = [ordering_col]
-        job_config.clustering_fields = cluster_cols
+        block = blocks.Block(
+            array_value,
+            index_columns=idx_cols,
+            column_labels=pandas_dataframe.columns,
+            index_labels=pandas_dataframe.index.names,
+        )
+        return dataframe.DataFrame(block)
 
-        job_config.labels = {"bigframes-api": api_name}
+    def load_data(
+        self, data: local_data.ManagedArrowTable, api_name: Optional[str] = None
+    ) -> core.ArrayValue:
+        """Load managed data into bigquery"""
+        ordering_col = "bf_load_job_offsets"
 
-        load_table_destination = self._storage_manager.allocate_temp_table()
-        load_job = self._bqclient.load_table_from_dataframe(
-            pandas_dataframe_copy,
-            load_table_destination,
-            job_config=job_config,
+        # JSON support incomplete
+        for item in data.schema.items:
+            _validate_dtype_can_load(item.column, item.dtype)
+
+        schema_w_offsets = data.schema.append(
+            schemata.SchemaItem(ordering_col, bigframes.dtypes.INT_DTYPE)
         )
-        self._start_generic_job(load_job)
+        bq_schema = schema_w_offsets.to_bigquery(_LOAD_JOB_TYPE_OVERRIDES)
 
+        job_config = bigquery.LoadJobConfig()
+        job_config.source_format = bigquery.SourceFormat.PARQUET
+        job_config.schema = bq_schema
+        if api_name:
+            job_config.labels = {"bigframes-api": api_name}
+
+        load_table_destination = self._storage_manager.create_temp_table(
+            bq_schema, [ordering_col]
+        )
+
+        buffer = io.BytesIO()
+        data.to_parquet(
+            buffer,
+            offsets_col=ordering_col,
+            geo_format="wkt",
+            duration_type="duration",
+            json_type="string",
+        )
+        buffer.seek(0)
+        load_job = self._bqclient.load_table_from_file(
+            buffer, destination=load_table_destination, job_config=job_config
+        )
+        self._start_generic_job(load_job)
+        # must get table metadata after load job for accurate metadata
         destination_table = self._bqclient.get_table(load_table_destination)
-        array_value = core.ArrayValue.from_table(
+        return core.ArrayValue.from_table(
             table=destination_table,
-            # TODO (b/394156190): Generate this directly from original pandas df.
-            schema=schemata.ArraySchema.from_bq_table(
-                destination_table, df_and_labels.col_type_overrides
-            ),
+            schema=schema_w_offsets,
             session=self._session,
             offsets_col=ordering_col,
+            n_rows=data.data.num_rows,
         ).drop_columns([ordering_col])
 
-        block = blocks.Block(
-            array_value,
-            index_columns=new_idx_ids,
-            column_labels=df_and_labels.column_labels,
-            index_labels=df_and_labels.index_labels,
+    def stream_data(self, data: local_data.ManagedArrowTable) -> core.ArrayValue:
+        """Load managed data into bigquery"""
+        ordering_col = "bf_stream_job_offsets"
+        schema_w_offsets = data.schema.append(
+            schemata.SchemaItem(ordering_col, bigframes.dtypes.INT_DTYPE)
         )
-        return dataframe.DataFrame(block)
-
-    def read_pandas_streaming(
-        self,
-        pandas_dataframe: pandas.DataFrame,
-    ) -> dataframe.DataFrame:
-        """Same as pandas_to_bigquery_load, but uses the BQ legacy streaming API."""
-        import bigframes.dataframe as dataframe
-
-        df_and_labels = bf_io_pandas.pandas_to_bq_compatible(pandas_dataframe)
-        pandas_dataframe_copy = df_and_labels.df
-        new_idx_ids = pandas_dataframe_copy.index.names
-        ordering_col = df_and_labels.ordering_col
-
-        # TODO(https://github.com/googleapis/python-bigquery-pandas/issues/300):
-        # Once pandas-gbq can do streaming inserts (again), switch to using
-        # pandas-gbq to write the bigquery-compatible pandas DataFrame.
-        schema: list[
-            bigquery.SchemaField
-        ] = pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(
-            pandas_dataframe_copy,
-            index=True,
+        bq_schema = schema_w_offsets.to_bigquery(_STREAM_JOB_TYPE_OVERRIDES)
+        load_table_destination = self._storage_manager.create_temp_table(
+            bq_schema, [ordering_col]
         )
 
-        destination = self._storage_manager.allocate_and_create_temp_table(
-            schema,
-            [ordering_col],
+        rows = data.itertuples(
+            geo_format="wkt", duration_type="int", json_type="object"
         )
-        destination_table = bigquery.Table(destination, schema=schema)
-        # TODO(swast): Confirm that the index is written.
-        for errors in self._bqclient.insert_rows_from_dataframe(
-            destination_table,
-            pandas_dataframe_copy,
+        rows_w_offsets = ((*row, offset) for offset, row in enumerate(rows))
+
+        for errors in self._bqclient.insert_rows(
+            load_table_destination,
+            rows_w_offsets,
+            selected_fields=bq_schema,
+            row_ids=map(str, itertools.count()),  # used to ensure only-once insertion
         ):
             if errors:
                 raise ValueError(
                     f"Problem loading at least one row from DataFrame: {errors}. {constants.FEEDBACK_LINK}"
                 )
-        array_value = (
-            core.ArrayValue.from_table(
-                table=destination_table,
-                schema=schemata.ArraySchema.from_bq_table(
-                    destination_table, df_and_labels.col_type_overrides
-                ),
-                session=self._session,
-                # Don't set the offsets column because we want to group by it.
-            )
-            # There may be duplicate rows because of hidden retries, so use a query to
-            # deduplicate based on the ordering ID, which is guaranteed to be unique.
-            # We know that rows with same ordering ID are duplicates,
-            # so ANY_VALUE() is deterministic.
-            .aggregate(
-                by_column_ids=[ordering_col],
-                aggregations=[
-                    (
-                        expression.UnaryAggregation(
-                            agg_ops.AnyValueOp(),
-                            expression.deref(field.name),
-                        ),
-                        field.name,
-                    )
-                    for field in destination_table.schema
-                    if field.name != ordering_col
-                ],
-            ).drop_columns([ordering_col])
-        )
-        block = blocks.Block(
-            array_value,
-            index_columns=new_idx_ids,
-            column_labels=df_and_labels.column_labels,
-            index_labels=df_and_labels.index_labels,
-        )
-        return dataframe.DataFrame(block)
+        destination_table = self._bqclient.get_table(load_table_destination)
+        return core.ArrayValue.from_table(
+            table=destination_table,
+            schema=schema_w_offsets,
+            session=self._session,
+            offsets_col=ordering_col,
+            n_rows=data.data.num_rows,
+        ).drop_columns([ordering_col])
 
     def _start_generic_job(self, job: formatting_helpers.GenericJob):
         if bigframes.options.display.progress_bar is not None:
@@ -281,11 +292,12 @@ def read_gbq_table(
         index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (),
         columns: Iterable[str] = (),
         max_results: Optional[int] = None,
-        api_name: str,
+        api_name: str = "read_gbq_table",
         use_cache: bool = True,
         filters: third_party_pandas_gbq.FiltersType = (),
         enable_snapshot: bool = True,
     ) -> dataframe.DataFrame:
+        import bigframes._tools.strings
         import bigframes.dataframe as dataframe
 
         # ---------------------------------
@@ -326,7 +338,9 @@ def read_gbq_table(
             if key not in table_column_names:
                 possibility = min(
                     table_column_names,
-                    key=lambda item: jellyfish.levenshtein_distance(key, item),
+                    key=lambda item: bigframes._tools.strings.levenshtein_distance(
+                        key, item
+                    ),
                 )
                 raise ValueError(
                     f"Column '{key}' of `columns` not found in this table. Did you mean '{possibility}'?"
@@ -339,12 +353,15 @@ def read_gbq_table(
             table=table,
             index_col=index_col,
         )
+        _check_column_duplicates(index_cols, columns)
 
         for key in index_cols:
             if key not in table_column_names:
                 possibility = min(
                     table_column_names,
-                    key=lambda item: jellyfish.levenshtein_distance(key, item),
+                    key=lambda item: bigframes._tools.strings.levenshtein_distance(
+                        key, item
+                    ),
                 )
                 raise ValueError(
                     f"Column '{key}' of `index_col` not found in this table. Did you mean '{possibility}'?"
@@ -384,7 +401,7 @@ def read_gbq_table(
                 query,
                 index_col=index_cols,
                 columns=columns,
-                api_name="read_gbq_table",
+                api_name=api_name,
                 use_cache=use_cache,
             )
 
@@ -494,29 +511,28 @@ def read_gbq_table(
             df.sort_index()
         return df
 
-    def _read_bigquery_load_job(
+    def read_bigquery_load_job(
         self,
         filepath_or_buffer: str | IO["bytes"],
-        table: Union[bigquery.Table, bigquery.TableReference],
         *,
         job_config: bigquery.LoadJobConfig,
         index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (),
         columns: Iterable[str] = (),
     ) -> dataframe.DataFrame:
-        index_cols = _to_index_cols(index_col)
-
-        if not job_config.clustering_fields and index_cols:
-            job_config.clustering_fields = index_cols[:_MAX_CLUSTER_COLUMNS]
-
+        # Need to create session table beforehand
+        table = self._storage_manager.create_temp_table(_PLACEHOLDER_SCHEMA)
+        # but, we just overwrite the placeholder schema immediately with the load job
+        job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
         if isinstance(filepath_or_buffer, str):
+            filepath_or_buffer = os.path.expanduser(filepath_or_buffer)
             if filepath_or_buffer.startswith("gs://"):
                 load_job = self._bqclient.load_table_from_uri(
-                    filepath_or_buffer, table, job_config=job_config
+                    filepath_or_buffer, destination=table, job_config=job_config
                 )
             elif os.path.exists(filepath_or_buffer):  # local file path
                 with open(filepath_or_buffer, "rb") as source_file:
                     load_job = self._bqclient.load_table_from_file(
-                        source_file, table, job_config=job_config
+                        source_file, destination=table, job_config=job_config
                     )
             else:
                 raise NotImplementedError(
@@ -525,21 +541,12 @@ def _read_bigquery_load_job(
                 )
         else:
             load_job = self._bqclient.load_table_from_file(
-                filepath_or_buffer, table, job_config=job_config
+                filepath_or_buffer, destination=table, job_config=job_config
             )
 
         self._start_generic_job(load_job)
         table_id = f"{table.project}.{table.dataset_id}.{table.table_id}"
 
-        # Update the table expiration so we aren't limited to the default 24
-        # hours of the anonymous dataset.
-        table_expiration = bigquery.Table(table_id)
-        table_expiration.expires = (
-            datetime.datetime.now(datetime.timezone.utc)
-            + bigframes.constants.DEFAULT_EXPIRATION
-        )
-        self._bqclient.update_table(table_expiration, ["expires"])
-
         # The BigQuery REST API for tables.get doesn't take a session ID, so we
         # can't get the schema for a temp table that way.
 
@@ -588,6 +595,7 @@ def read_gbq_query(
             )
 
         index_cols = _to_index_cols(index_col)
+        _check_column_duplicates(index_cols, columns)
 
         filters_copy1, filters_copy2 = itertools.tee(filters)
         has_filters = len(list(filters_copy1)) != 0
@@ -673,9 +681,7 @@ def _query_to_destination(
             )
         else:
             cluster_cols = []
-        temp_table = self._storage_manager.allocate_and_create_temp_table(
-            schema, cluster_cols
-        )
+        temp_table = self._storage_manager.create_temp_table(schema, cluster_cols)
 
         timeout_ms = configuration.get("jobTimeoutMs") or configuration["query"].get(
             "timeoutMs"
@@ -761,3 +767,44 @@ def _transform_read_gbq_configuration(configuration: Optional[dict]) -> dict:
         configuration["jobTimeoutMs"] = timeout_ms
 
     return configuration
+
+
+def _has_json_arrow_type(arrow_type: pa.DataType) -> bool:
+    """
+    Searches recursively for JSON array type within a PyArrow DataType.
+    """
+    if arrow_type == bigframes.dtypes.JSON_ARROW_TYPE:
+        return True
+    if pa.types.is_list(arrow_type):
+        return _has_json_arrow_type(arrow_type.value_type)
+    if pa.types.is_struct(arrow_type):
+        for i in range(arrow_type.num_fields):
+            if _has_json_arrow_type(arrow_type.field(i).type):
+                return True
+        return False
+    return False
+
+
+def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
+    """
+    Determines whether a datatype is supported by bq load jobs.
+
+    Due to a BigQuery IO limitation with loading JSON from Parquet files (b/374784249),
+    we're using a workaround: storing JSON as strings and then parsing them into JSON
+    objects.
+    TODO(b/395912450): Remove workaround solution once b/374784249 got resolved.
+
+    Raises:
+        NotImplementedError: Type is not yet supported by load jobs.
+    """
+    # we can handle top-level json, but not nested yet through string conversion
+    if column_type == bigframes.dtypes.JSON_DTYPE:
+        return
+
+    if isinstance(column_type, pandas.ArrowDtype) and _has_json_arrow_type(
+        column_type.pyarrow_dtype
+    ):
+        raise NotImplementedError(
+            f"Nested JSON types, found in column `{name}`: `{column_type}`', "
+            f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"
+        )
diff --git a/bigframes/session/read_api_execution.py b/bigframes/session/read_api_execution.py
new file mode 100644
index 0000000000..32095e41f4
--- /dev/null
+++ b/bigframes/session/read_api_execution.py
@@ -0,0 +1,100 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from typing import Any, Optional
+
+from google.cloud import bigquery_storage_v1
+
+from bigframes.core import bigframe_node, rewrite
+from bigframes.session import executor, semi_executor
+
+
+class ReadApiSemiExecutor(semi_executor.SemiExecutor):
+    """
+    Executes plans reducible to a bq table scan by directly reading the table with the read api.
+    """
+
+    def __init__(
+        self, bqstoragereadclient: bigquery_storage_v1.BigQueryReadClient, project: str
+    ):
+        self.bqstoragereadclient = bqstoragereadclient
+        self.project = project
+
+    def execute(
+        self,
+        plan: bigframe_node.BigFrameNode,
+        ordered: bool,
+        peek: Optional[int] = None,
+    ) -> Optional[executor.ExecuteResult]:
+        node = rewrite.try_reduce_to_table_scan(plan)
+        if not node:
+            return None
+        if node.explicitly_ordered and ordered:
+            return None
+        if peek:
+            # TODO: Support peeking
+            return None
+
+        import google.cloud.bigquery_storage_v1.types as bq_storage_types
+        from google.protobuf import timestamp_pb2
+
+        bq_table = node.source.table.get_table_ref()
+        read_options: dict[str, Any] = {
+            "selected_fields": [item.source_id for item in node.scan_list.items]
+        }
+        if node.source.sql_predicate:
+            read_options["row_restriction"] = node.source.sql_predicate
+        read_options = bq_storage_types.ReadSession.TableReadOptions(**read_options)
+
+        table_mod_options = {}
+        if node.source.at_time:
+            snapshot_time = timestamp_pb2.Timestamp()
+            snapshot_time.FromDatetime(node.source.at_time)
+            table_mod_options["snapshot_time"] = snapshot_time = snapshot_time
+        table_mods = bq_storage_types.ReadSession.TableModifiers(**table_mod_options)
+
+        def iterator_supplier():
+            requested_session = bq_storage_types.stream.ReadSession(
+                table=bq_table.to_bqstorage(),
+                data_format=bq_storage_types.DataFormat.ARROW,
+                read_options=read_options,
+                table_modifiers=table_mods,
+            )
+            # Single stream to maintain ordering
+            request = bq_storage_types.CreateReadSessionRequest(
+                parent=f"projects/{self.project}",
+                read_session=requested_session,
+                max_stream_count=1,
+            )
+            session = self.bqstoragereadclient.create_read_session(
+                request=request, retry=None
+            )
+
+            if not session.streams:
+                return iter([])
+
+            reader = self.bqstoragereadclient.read_rows(
+                session.streams[0].name, retry=None
+            )
+            rowstream = reader.rows()
+            return map(lambda page: page.to_arrow(), rowstream.pages)
+
+        return executor.ExecuteResult(
+            arrow_batches=iterator_supplier,
+            schema=plan.schema,
+            query_job=None,
+            total_bytes=None,
+            total_rows=node.source.n_rows,
+        )
diff --git a/bigframes/session/semi_executor.py b/bigframes/session/semi_executor.py
new file mode 100644
index 0000000000..c41d7c96d3
--- /dev/null
+++ b/bigframes/session/semi_executor.py
@@ -0,0 +1,33 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import abc
+from typing import Optional
+
+from bigframes.core import bigframe_node
+from bigframes.session import executor
+
+
+# Unstable interface, in development
+class SemiExecutor(abc.ABC):
+    """
+    A semi executor executes a subset of possible plans, returns None for unsupported plans.
+    """
+
+    def execute(
+        self,
+        plan: bigframe_node.BigFrameNode,
+        ordered: bool,
+        peek: Optional[int] = None,
+    ) -> Optional[executor.ExecuteResult]:
+        raise NotImplementedError("execute not implemented for this executor")
diff --git a/bigframes/session/temporary_storage.py b/bigframes/session/temporary_storage.py
new file mode 100644
index 0000000000..0c2a36f3fe
--- /dev/null
+++ b/bigframes/session/temporary_storage.py
@@ -0,0 +1,32 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Protocol, Sequence
+
+from google.cloud import bigquery
+
+
+class TemporaryStorageManager(Protocol):
+    @property
+    def location(self) -> str:
+        ...
+
+    def create_temp_table(
+        self, schema: Sequence[bigquery.SchemaField], cluster_cols: Sequence[str] = []
+    ) -> bigquery.TableReference:
+        ...
+
+    # implementations should be robust to repeatedly closing
+    def close(self) -> None:
+        ...
diff --git a/bigframes/testing/__init__.py b/bigframes/testing/__init__.py
new file mode 100644
index 0000000000..529c08241d
--- /dev/null
+++ b/bigframes/testing/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""[Experimental] Utilities for testing BigQuery DataFrames.
+
+These modules are provided for testing the BigQuery DataFrames package. The
+interface is not considered stable.
+"""
diff --git a/tests/unit/resources.py b/bigframes/testing/mocks.py
similarity index 90%
rename from tests/unit/resources.py
rename to bigframes/testing/mocks.py
index ebc1243eaf..ab48b97f0d 100644
--- a/tests/unit/resources.py
+++ b/bigframes/testing/mocks.py
@@ -36,12 +36,17 @@
 
 
 def create_bigquery_session(
+    *,
     bqclient: Optional[mock.Mock] = None,
     session_id: str = "abcxyz",
     table_schema: Sequence[google.cloud.bigquery.SchemaField] = TEST_SCHEMA,
     anonymous_dataset: Optional[google.cloud.bigquery.DatasetReference] = None,
     location: str = "test-region",
 ) -> bigframes.Session:
+    """[Experimental] Create a mock BigQuery DataFrames session that avoids making Google Cloud API calls.
+
+    Intended for unit test environments that don't have access to the network.
+    """
     credentials = mock.create_autospec(
         google.auth.credentials.Credentials, instance=True
     )
@@ -108,8 +113,12 @@ def query_and_wait_mock(query, *args, **kwargs):
 
 
 def create_dataframe(
-    monkeypatch: pytest.MonkeyPatch, session: Optional[bigframes.Session] = None
+    monkeypatch: pytest.MonkeyPatch, *, session: Optional[bigframes.Session] = None
 ) -> bigframes.dataframe.DataFrame:
+    """[Experimental] Create a mock DataFrame that avoids making Google Cloud API calls.
+
+    Intended for unit test environments that don't have access to the network.
+    """
     if session is None:
         session = create_bigquery_session()
 
diff --git a/bigframes/version.py b/bigframes/version.py
index 356e73a71d..e3a1d84bfa 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.42.0"
+__version__ = "2.0.0"
 
 # {x-release-please-start-date}
-__release_date__ = "2025-03-27"
+__release_date__ = "2025-04-17"
 # {x-release-please-end}
diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml
index b00044b087..1e712848df 100644
--- a/docs/templates/toc.yml
+++ b/docs/templates/toc.yml
@@ -86,6 +86,9 @@
         uid: bigframes.operations.structs.StructAccessor
       - name: PlotAccessor
         uid: bigframes.operations.plotting.PlotAccessor
+      - name: BlobAccessor
+        uid: bigframes.operations.blob.BlobAccessor
+        status: beta
       name: Series
     - name: Window
       uid: bigframes.core.window.Window
diff --git a/notebooks/apps/synthetic_data_generation.ipynb b/notebooks/apps/synthetic_data_generation.ipynb
index c190f219af..f830e35c16 100644
--- a/notebooks/apps/synthetic_data_generation.ipynb
+++ b/notebooks/apps/synthetic_data_generation.ipynb
@@ -248,8 +248,8 @@
       },
       "outputs": [],
       "source": [
-        "@bpd.remote_function([int], str, packages=['faker', 'pandas'])\n",
-        "def data_generator(id):\n",
+        "@bpd.remote_function(packages=['faker', 'pandas'], cloud_function_service_account=\"default\")\n",
+        "def data_generator(id: int) -> str:\n",
         "  context = {}\n",
         "  exec(code, context)\n",
         "  result_df = context.get(\"result_df\")\n",
diff --git a/notebooks/experimental/ai_operators.ipynb b/notebooks/experimental/ai_operators.ipynb
new file mode 100644
index 0000000000..9f35d3864a
--- /dev/null
+++ b/notebooks/experimental/ai_operators.ipynb
@@ -0,0 +1,3178 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "id": "UYeZd_I8iouP"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2025 Google LLC\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rWJnGj2ViouP"
+      },
+      "source": [
+        "# BigFrames AI Operator Tutorial\n",
+        "\n",
+        "<table align=\"left\">\n",
+        "\n",
+        "  <td>\n",
+        "    <a href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/ai_operators.ipynb\">\n",
+        "      <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"> Run in Colab\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/ai_operators.ipynb\">\n",
+        "      <img src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" alt=\"GitHub logo\">\n",
+        "      View on GitHub\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/ai_operators.ipynb\">\n",
+        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+        "      Open in BQ Studio\n",
+        "    </a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mgOrr256iouQ"
+      },
+      "source": [
+        "This notebook provides a hands-on preview of AI operator APIs powered by the Gemini model.\n",
+        "\n",
+        "The notebook is divided into two sections. The first section introduces the API syntax with examples, aiming to familiarize you with how AI operators work. The second section applies AI operators to a large real-world dataset and presents performance statistics.\n",
+        "\n",
+        "This work is inspired by [this paper](https://arxiv.org/pdf/2407.11418) and powered by BigQuery ML and Vertex AI."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2ymVbJV2iouQ"
+      },
+      "source": [
+        "# Preparation"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vvVzFzo3iouQ"
+      },
+      "source": [
+        "First, import the BigFrames modules.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "Jb9glT2ziouQ"
+      },
+      "outputs": [],
+      "source": [
+        "import bigframes\n",
+        "import bigframes.pandas as bpd"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xQiCWj7OiouQ"
+      },
+      "source": [
+        "Make sure the BigFrames version is at least `1.42.0`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "id": "LTPpI8IpiouQ"
+      },
+      "outputs": [],
+      "source": [
+        "from packaging.version import Version\n",
+        "\n",
+        "assert Version(bigframes.__version__) >= Version(\"1.42.0\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "agxLmtlbiouR"
+      },
+      "source": [
+        "Turn on the AI operator experiment. You will see a warning sign saying that these operators are still under experiments. If you don't turn on the experiment before using the operators, you will get `NotImplemenetedError`s."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "1wXqdDr8iouR"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/_config/experiment_options.py:54: PreviewWarning: AI operators are still under experiments, and are subject to change in\n",
+            "the future.\n",
+            "  warnings.warn(msg, category=bfe.PreviewWarning)\n"
+          ]
+        }
+      ],
+      "source": [
+        "bigframes.options.experiments.ai_operators = True"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "W8TPUvnsqxhv"
+      },
+      "source": [
+        "Specify your GCP project and location."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vCkraKOeqJFl"
+      },
+      "outputs": [],
+      "source": [
+        "bpd.options.bigquery.project = 'YOUR_PROJECT_ID'\n",
+        "bpd.options.bigquery.location = 'US'"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "n95MFlS0iouR"
+      },
+      "source": [
+        "**Optional**: turn off the display of progress bar so that only the operation results will be printed out"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "id": "5r6ahx7MiouR"
+      },
+      "outputs": [],
+      "source": [
+        "bpd.options.display.progress_bar = None"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "93iYvp7niouR"
+      },
+      "source": [
+        "Create LLM instances. They will be passed in as parameters for each AI operator.\n",
+        "\n",
+        "This tutorial uses the \"gemini-2.0-flash-001\" model for text generation and \"text-embedding-005\" for embedding. While these are recommended, you can choose [other Vertex AI LLM models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models) based on your needs and availability. Ensure you have [sufficient quota](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas) for your chosen models and adjust it if necessary."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "tHkymaLNiouR"
+      },
+      "outputs": [],
+      "source": [
+        "from bigframes.ml import llm\n",
+        "gemini_model = llm.GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
+        "text_embedding_model = llm.TextEmbeddingGenerator(model_name=\"text-embedding-005\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mbFDcvnPiouR"
+      },
+      "source": [
+        "**Note**: AI operators could be expensive over a large set of data. As a result, our team added this option `bigframes.options.compute.ai_ops_confirmation_threshold` at `version 1.42.0` so that the BigFrames will ask for your confirmation if the amount of data to be processed is too large. If the amount of rows exceeds your threshold, you will see a prompt for your keyboard input -- 'y' to proceed and 'n' to abort. If you abort the operation, no LLM processing will be done.\n",
+        "\n",
+        "The default threshold is 0, which means the operators will always ask for confirmations. You are free to adjust the value as needed. You can also set the threshold to `None` to disable this feature."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "F4dZm4b7iouR"
+      },
+      "outputs": [],
+      "source": [
+        "if Version(bigframes.__version__) >= Version(\"1.42.0\"):\n",
+        "    bigframes.options.compute.ai_ops_confirmation_threshold = 1000"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_dEA3G9RiouR"
+      },
+      "source": [
+        "If you would like your operations to fail automatically when the data is too large, set `bigframes.options.compute.ai_ops_threshold_autofail` to `True`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BoUK-cpbiouS"
+      },
+      "outputs": [],
+      "source": [
+        "# if Version(bigframes.__version__) >= Version(\"1.42.0\"):\n",
+        "#     bigframes.options.compute.ai_ops_threshold_autofail = True"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hQft3o3OiouS"
+      },
+      "source": [
+        "# API Samples"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dt5Kl-QGiouS"
+      },
+      "source": [
+        "You will learn about each AI operator by trying some examples."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "J7XAT459iouS"
+      },
+      "source": [
+        "## AI Filtering"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9d5HUIvliouS"
+      },
+      "source": [
+        "AI filtering allows you to filter your dataframe based on the instruction (i.e. prompt) you provided.\n",
+        "\n",
+        "First, create a dataframe:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 190
+        },
+        "id": "NDpCRGd_iouS",
+        "outputId": "5048c935-06d3-4ef1-ad87-72e14a30b1b7"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>country</th>\n",
+              "      <th>city</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>USA</td>\n",
+              "      <td>Seattle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Germany</td>\n",
+              "      <td>Berlin</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Japan</td>\n",
+              "      <td>Kyoto</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>3 rows × 2 columns</p>\n",
+              "</div>[3 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "   country     city\n",
+              "0      USA  Seattle\n",
+              "1  Germany   Berlin\n",
+              "2    Japan    Kyoto\n",
+              "\n",
+              "[3 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 10,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df = bpd.DataFrame({'country': ['USA', 'Germany', 'Japan'], 'city': ['Seattle', 'Berlin', 'Kyoto']})\n",
+        "df"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6AXmT7sniouS"
+      },
+      "source": [
+        "Now, filter this dataframe by keeping only the rows where the value in `city` column is the capital of the value in `country` column. The column references could be \"escaped\" by using a pair of braces in your instruction. In this example, your instruction should be like this:\n",
+        "```\n",
+        "The {city} is the capital of the {country}.\n",
+        "```\n",
+        "\n",
+        "Note that this is not a Python f-string, so you shouldn't prefix your instruction with an `f`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 127
+        },
+        "id": "ipW3Z_l4iouS",
+        "outputId": "ad447459-225a-419c-d4c8-fedac4a9ed0f"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>country</th>\n",
+              "      <th>city</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Germany</td>\n",
+              "      <td>Berlin</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>1 rows × 2 columns</p>\n",
+              "</div>[1 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "   country    city\n",
+              "1  Germany  Berlin\n",
+              "\n",
+              "[1 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 11,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df.ai.filter(\"The {city} is the capital of the {country}\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "swKvgfm1iouS"
+      },
+      "source": [
+        "The filter operator extracts the information from the referenced column to enrich your instruction with context. The instruction is then sent for the designated model for evaluation. For filtering operations, the LLM is asked to return only `True` and `False` for each row, and the operator removes the rows accordingly."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r_2AAGGoiouS"
+      },
+      "source": [
+        "## AI Mapping"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vT6skC57iouS"
+      },
+      "source": [
+        "AI mapping allows to you to combine values from multiple columns into a single output based your instruction.\n",
+        "\n",
+        "Here is an example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 190
+        },
+        "id": "BQ7xeUK3iouS",
+        "outputId": "33dcb742-77ed-4bea-8dbc-1cf775102a25"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>ingredient_1</th>\n",
+              "      <th>ingredient_2</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Bun</td>\n",
+              "      <td>Beef Patty</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Soy Bean</td>\n",
+              "      <td>Bittern</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Sausage</td>\n",
+              "      <td>Long Bread</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>3 rows × 2 columns</p>\n",
+              "</div>[3 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "  ingredient_1 ingredient_2\n",
+              "0          Bun   Beef Patty\n",
+              "1     Soy Bean      Bittern\n",
+              "2      Sausage   Long Bread\n",
+              "\n",
+              "[3 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 12,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df = bpd.DataFrame({\n",
+        "    \"ingredient_1\": [\"Bun\", \"Soy Bean\", \"Sausage\"],\n",
+        "    \"ingredient_2\": [\"Beef Patty\", \"Bittern\", \"Long Bread\"]\n",
+        "    })\n",
+        "df"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VFObP2aFiouS"
+      },
+      "source": [
+        "Now, you ask LLM what kind of food can be made from the two ingredients in each row. The column reference syntax in your instruction stays the same. In addition, you need to specify the column name by setting the `output_column` parameter to hold the mapping results."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 190
+        },
+        "id": "PpL24AQFiouS",
+        "outputId": "e7aff038-bf4b-4833-def8-fe2648e8885b"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>ingredient_1</th>\n",
+              "      <th>ingredient_2</th>\n",
+              "      <th>food</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Bun</td>\n",
+              "      <td>Beef Patty</td>\n",
+              "      <td>Burger</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Soy Bean</td>\n",
+              "      <td>Bittern</td>\n",
+              "      <td>Tofu</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Sausage</td>\n",
+              "      <td>Long Bread</td>\n",
+              "      <td>Hotdog</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>3 rows × 3 columns</p>\n",
+              "</div>[3 rows x 3 columns in total]"
+            ],
+            "text/plain": [
+              "  ingredient_1 ingredient_2     food\n",
+              "0          Bun   Beef Patty  Burger\n",
+              "\n",
+              "1     Soy Bean      Bittern    Tofu\n",
+              "\n",
+              "2      Sausage   Long Bread  Hotdog\n",
+              "\n",
+              "\n",
+              "[3 rows x 3 columns]"
+            ]
+          },
+          "execution_count": 13,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df.ai.map(\"What is the food made from {ingredient_1} and {ingredient_2}? One word only.\", output_column=\"food\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "70WTZZfdiouS"
+      },
+      "source": [
+        "## AI Joining"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "u93uieRaiouS"
+      },
+      "source": [
+        "AI joining can join two dataframes based on the instruction you provided.\n",
+        "\n",
+        "First, you prepare two dataframes:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "metadata": {
+        "id": "dffIGEUEiouS"
+      },
+      "outputs": [],
+      "source": [
+        "cities = bpd.DataFrame({'city': ['Seattle', 'Ottawa', 'Berlin', 'Shanghai', 'New Delhi']})\n",
+        "continents = bpd.DataFrame({'continent': ['North America', 'Africa', 'Asia']})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Hz0X-0RtiouS"
+      },
+      "source": [
+        "You want to join the `cities` with `continents` to form a new dataframe such that, in each row the city from the `cities` data frame is in the continent from the `continents` dataframe. You could re-use the aforementioned column reference syntax:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 15,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 221
+        },
+        "id": "WPIOHEwCiouT",
+        "outputId": "976586c3-b5db-4088-a46a-44dfbf822ecb"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>city</th>\n",
+              "      <th>continent</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Seattle</td>\n",
+              "      <td>North America</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Ottawa</td>\n",
+              "      <td>North America</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Shanghai</td>\n",
+              "      <td>Asia</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>New Delhi</td>\n",
+              "      <td>Asia</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>4 rows × 2 columns</p>\n",
+              "</div>[4 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "        city      continent\n",
+              "0    Seattle  North America\n",
+              "1     Ottawa  North America\n",
+              "2   Shanghai           Asia\n",
+              "3  New Delhi           Asia\n",
+              "\n",
+              "[4 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 15,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "cities.ai.join(continents, \"{city} is in {continent}\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4Qc97GMWiouT"
+      },
+      "source": [
+        "!! **Important:** AI join can trigger probihitively expensitve operations! This operation first cross joins two dataframes, then invokes AI filter on each row. That means if you have two dataframes of sizes `M` and `N`, the total amount of queries sent to the LLM is on the scale of `M * N`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MUEJXT1IiouT"
+      },
+      "source": [
+        "### Self Joins"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QvX-nCogiouT"
+      },
+      "source": [
+        "This self-join example is for demonstrating a special case: what happens when the joining columns exist in both data frames? It turns out that you need to provide extra information in your column references: by attaching \"left.\" and \"right.\" prefixes to your column names.\n",
+        "\n",
+        "Create an example data frame:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "metadata": {
+        "id": "OIGz5sqxiouW"
+      },
+      "outputs": [],
+      "source": [
+        "animals = bpd.DataFrame({'animal': ['cow', 'cat', 'spider', 'elephant']})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VmJbuWNniouX"
+      },
+      "source": [
+        "You want to compare the weights of these animals, and output all the pairs where the animal on the left is heavier than the animal on the right. In this case, you use `left.animal` and `right.animal` to differentiate the data sources:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 17,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 284
+        },
+        "id": "UHfggdhBiouX",
+        "outputId": "a439e3aa-1382-4244-951f-127dc8da0fe3"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>animal_left</th>\n",
+              "      <th>animal_right</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>cow</td>\n",
+              "      <td>cat</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>cow</td>\n",
+              "      <td>spider</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>cat</td>\n",
+              "      <td>spider</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>elephant</td>\n",
+              "      <td>cow</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>elephant</td>\n",
+              "      <td>cat</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>elephant</td>\n",
+              "      <td>spider</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>6 rows × 2 columns</p>\n",
+              "</div>[6 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "  animal_left animal_right\n",
+              "0         cow          cat\n",
+              "1         cow       spider\n",
+              "2         cat       spider\n",
+              "3    elephant          cow\n",
+              "4    elephant          cat\n",
+              "5    elephant       spider\n",
+              "\n",
+              "[6 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 17,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "animals.ai.join(animals, \"{left.animal} generally weighs heavier than {right.animal}\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kU7BsyTyiouX"
+      },
+      "source": [
+        "## AI Top K"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s9QePXEoiouX"
+      },
+      "source": [
+        "AI Top K selects the top K values based on your instruction. Here is an example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 18,
+      "metadata": {
+        "id": "bMQqtyZ2iouX"
+      },
+      "outputs": [],
+      "source": [
+        "df = bpd.DataFrame({\"Animals\": [\"Corgi\", \"Orange Cat\", \"Parrot\", \"Tarantula\"]})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KiljGBSCiouX"
+      },
+      "source": [
+        "You want to find the top two most popular pets:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 19,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 159
+        },
+        "id": "OZv5WUGIiouX",
+        "outputId": "ae1cee27-cc31-455e-c4ac-c0a9a5cf4ca5"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Animals</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Corgi</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Orange Cat</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2 rows × 1 columns</p>\n",
+              "</div>[2 rows x 1 columns in total]"
+            ],
+            "text/plain": [
+              "      Animals\n",
+              "0       Corgi\n",
+              "1  Orange Cat\n",
+              "\n",
+              "[2 rows x 1 columns]"
+            ]
+          },
+          "execution_count": 19,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df.ai.top_k(\"{Animals} are more popular as pets\", model=gemini_model, k=2)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dC8fyu3aiouX"
+      },
+      "source": [
+        "Under the hood, the AI top K operator performs pair-wise comparisons with LLM. The top K results are returned in the order of their indices instead of their ranks."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sIszJ0zPiouX"
+      },
+      "source": [
+        "## AI Search"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e4ojHRKAiouX"
+      },
+      "source": [
+        "AI search searches the most similar values to your query within a single column. Here is an example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 20,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 253
+        },
+        "id": "gnQSIZ5SiouX",
+        "outputId": "dd6e1ecb-1bad-4a7c-8065-e56c697d0863"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>creatures</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>salmon</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>sea urchin</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>baboons</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>frog</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>chimpanzee</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1 columns</p>\n",
+              "</div>[5 rows x 1 columns in total]"
+            ],
+            "text/plain": [
+              "    creatures\n",
+              "0      salmon\n",
+              "1  sea urchin\n",
+              "2     baboons\n",
+              "3        frog\n",
+              "4  chimpanzee\n",
+              "\n",
+              "[5 rows x 1 columns]"
+            ]
+          },
+          "execution_count": 20,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df = bpd.DataFrame({\"creatures\": [\"salmon\", \"sea urchin\", \"baboons\", \"frog\", \"chimpanzee\"]})\n",
+        "df"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5apfIaZMiouX"
+      },
+      "source": [
+        "You want to get the top 2 creatures that are most similar to \"monkey\":"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 21,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 159
+        },
+        "id": "CkAuFgPYiouY",
+        "outputId": "723c7604-f53c-43d7-c754-4c91ec198dff"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n",
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n",
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>creatures</th>\n",
+              "      <th>similarity score</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>baboons</td>\n",
+              "      <td>0.708434</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>chimpanzee</td>\n",
+              "      <td>0.635844</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2 rows × 2 columns</p>\n",
+              "</div>[2 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "    creatures  similarity score\n",
+              "2     baboons          0.708434\n",
+              "4  chimpanzee          0.635844\n",
+              "\n",
+              "[2 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 21,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df.ai.search(\"creatures\", query=\"monkey\", top_k = 2, model = text_embedding_model, score_column='similarity score')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GDZeVzFTiouY"
+      },
+      "source": [
+        "Note that you are using a text embedding model this time. This model generates embedding vectors for both your query as well as the values in the search space. The operator then uses BigQuery's built-in VECTOR_SEARCH function to find the nearest neighbors of your query.\n",
+        "\n",
+        "In addition, `score_column` is an optional parameter for storing the distances between the results and your query. If not set, the score column won't be attached to the result."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EXNutIXqiouY"
+      },
+      "source": [
+        "## AI Similarity Join"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BhWrhQMjiouY"
+      },
+      "source": [
+        "When you want to perform multiple similarity queries in the same value space, you could use similarity join to simplify your call. For example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 22,
+      "metadata": {
+        "id": "cUc7-8O6iouY"
+      },
+      "outputs": [],
+      "source": [
+        "df1 = bpd.DataFrame({'animal': ['monkey', 'spider', 'salmon', 'giraffe', 'sparrow']})\n",
+        "df2 = bpd.DataFrame({'animal': ['scorpion', 'baboon', 'owl', 'elephant', 'tuna']})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "k96WerOviouY"
+      },
+      "source": [
+        "In this example, you want to pick the most related animal from `df2` for each value in `df1`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 23,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 253
+        },
+        "id": "wPV5EkfpiouY",
+        "outputId": "4be1211d-0353-4b94-8c27-ebd568e8e104"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n",
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>animal</th>\n",
+              "      <th>animal_1</th>\n",
+              "      <th>distance</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>monkey</td>\n",
+              "      <td>baboon</td>\n",
+              "      <td>0.620521</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>spider</td>\n",
+              "      <td>scorpion</td>\n",
+              "      <td>0.728024</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>salmon</td>\n",
+              "      <td>tuna</td>\n",
+              "      <td>0.782141</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>giraffe</td>\n",
+              "      <td>elephant</td>\n",
+              "      <td>0.7135</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>sparrow</td>\n",
+              "      <td>owl</td>\n",
+              "      <td>0.810864</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 3 columns</p>\n",
+              "</div>[5 rows x 3 columns in total]"
+            ],
+            "text/plain": [
+              "    animal  animal_1  distance\n",
+              "0   monkey    baboon  0.620521\n",
+              "1   spider  scorpion  0.728024\n",
+              "2   salmon      tuna  0.782141\n",
+              "3  giraffe  elephant    0.7135\n",
+              "4  sparrow       owl  0.810864\n",
+              "\n",
+              "[5 rows x 3 columns]"
+            ]
+          },
+          "execution_count": 23,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df1.ai.sim_join(df2, left_on='animal', right_on='animal', top_k=1, model=text_embedding_model, score_column='distance')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GplzD7v0iouY"
+      },
+      "source": [
+        "!! **Important** Like AI join, this operator can also be very expensive. To guard against unexpected processing of large dataset, use the `bigframes.options.compute.sem_ops_confirmation_threshold` option to specify a threshold."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hgj8GoQhiouY"
+      },
+      "source": [
+        "# Performance Analyses"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EZomL0BciouY"
+      },
+      "source": [
+        "In this section, you will use BigQuery's public data of hacker news to perform some heavy work. We recommend you to check the code without executing them in order to save your time and money. The execution results are attached after each cell for your reference.\n",
+        "\n",
+        "First, load 3k rows from the table:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 24,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 880
+        },
+        "id": "wRR0SrcSiouY",
+        "outputId": "3b25f3a3-09c7-4396-9107-4aa4cdb4b963"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>title</th>\n",
+              "      <th>text</th>\n",
+              "      <th>by</th>\n",
+              "      <th>score</th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>type</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2010-04-16 19:52:51+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I&amp;#x27;d agree about border control with a cav...</td>\n",
+              "      <td>bandrami</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-06-04 06:12:00+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>So 4 pickups? At least pickups are high margin...</td>\n",
+              "      <td>seanmcdirmid</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-09-19 14:19:46+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>Workplace Wellness Programs Don’t Work Well. W...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>anarbadalov</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2018-08-07 12:17:45+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Are you implying that to be a good developer y...</td>\n",
+              "      <td>ecesena</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2016-06-10 19:38:25+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It pretty much works with other carriers. My s...</td>\n",
+              "      <td>toast0</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-08-13 03:11:32+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-06-07 22:43:03+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>7</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;quot;not operated for profit&amp;quot; and &amp;quot;...</td>\n",
+              "      <td>radford-neal</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-03-19 00:24:47+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It&amp;#x27;s a good description of one applicatio...</td>\n",
+              "      <td>dkarl</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-10-07 13:38:18+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Might be a bit high, but....&lt;p&gt;&lt;i&gt;&amp;quot;For ex...</td>\n",
+              "      <td>tyingq</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-01-23 19:49:15+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10</th>\n",
+              "      <td>Taiwan’s Tech King to Nancy Pelosi: U.S. Is in...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>dlcmh</td>\n",
+              "      <td>11</td>\n",
+              "      <td>2023-02-18 02:51:11+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>11</th>\n",
+              "      <td>Android’s new multitasking is terrible and sho...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>wowamit</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2018-10-22 09:50:36+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>12</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>SEEKING WORK | REMOTE | US Citizen&lt;p&gt;Location:...</td>\n",
+              "      <td>rasikjain</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-08-01 16:56:49+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>13</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I had a very similar experience last month tea...</td>\n",
+              "      <td>tmaly</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-01-22 18:26:36+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>14</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>mrtweetyhack</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-02-26 19:34:00+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>15</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; Just do what most American cities do with...</td>\n",
+              "      <td>AnthonyMouse</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-10-04 23:10:50+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It&amp;#x27;s not a space. The l and the C are at ...</td>\n",
+              "      <td>antninja</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2013-07-13 09:48:34+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I’ve knowingly paid the premium in the past, j...</td>\n",
+              "      <td>zwily</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-06-17 14:26:43+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>18</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; Any sufficiently complicated C or Fortran...</td>\n",
+              "      <td>wavemode</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2025-02-07 06:42:53+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>19</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It&amp;#x27;s similar to a lot of Japanese &amp;quot;t...</td>\n",
+              "      <td>TillE</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-11-06 17:15:10+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>20</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Engineers are just people paid to code. If you...</td>\n",
+              "      <td>rchaud</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-04-12 14:31:42+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>21</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>So don&amp;#x27;t use it</td>\n",
+              "      <td>CyberDildonics</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2015-12-29 22:01:16+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>22</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Sure, but there are degrees of these things. T...</td>\n",
+              "      <td>dang</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-11-11 23:42:12+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>23</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I wish this would happen.  There&amp;#x27;s a &amp;quo...</td>\n",
+              "      <td>coredog64</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2018-02-12 16:03:37+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>24</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I’m not sure why responsible riders wouldn’t w...</td>\n",
+              "      <td>mjmahone17</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-11-09 01:36:01+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>25 rows × 6 columns</p>\n",
+              "</div>[3000 rows x 6 columns in total]"
+            ],
+            "text/plain": [
+              "                                                title  \\\n",
+              "0                                                <NA>   \n",
+              "1                                                <NA>   \n",
+              "2                                                <NA>   \n",
+              "3   Workplace Wellness Programs Don’t Work Well. W...   \n",
+              "4                                                <NA>   \n",
+              "5                                                <NA>   \n",
+              "6                                                <NA>   \n",
+              "7                                                <NA>   \n",
+              "8                                                <NA>   \n",
+              "9                                                <NA>   \n",
+              "10  Taiwan’s Tech King to Nancy Pelosi: U.S. Is in...   \n",
+              "11  Android’s new multitasking is terrible and sho...   \n",
+              "12                                               <NA>   \n",
+              "13                                               <NA>   \n",
+              "14                                               <NA>   \n",
+              "15                                               <NA>   \n",
+              "16                                               <NA>   \n",
+              "17                                               <NA>   \n",
+              "18                                               <NA>   \n",
+              "19                                               <NA>   \n",
+              "20                                               <NA>   \n",
+              "21                                               <NA>   \n",
+              "22                                               <NA>   \n",
+              "23                                               <NA>   \n",
+              "24                                               <NA>   \n",
+              "\n",
+              "                                                 text              by  score  \\\n",
+              "0                                                <NA>            <NA>   <NA>   \n",
+              "1   I&#x27;d agree about border control with a cav...        bandrami   <NA>   \n",
+              "2   So 4 pickups? At least pickups are high margin...    seanmcdirmid   <NA>   \n",
+              "3                                                <NA>     anarbadalov      2   \n",
+              "4   Are you implying that to be a good developer y...         ecesena   <NA>   \n",
+              "5   It pretty much works with other carriers. My s...          toast0   <NA>   \n",
+              "6                                                <NA>            <NA>   <NA>   \n",
+              "7   &quot;not operated for profit&quot; and &quot;...    radford-neal   <NA>   \n",
+              "8   It&#x27;s a good description of one applicatio...           dkarl   <NA>   \n",
+              "9   Might be a bit high, but....<p><i>&quot;For ex...          tyingq   <NA>   \n",
+              "10                                               <NA>           dlcmh     11   \n",
+              "11                                               <NA>         wowamit      1   \n",
+              "12  SEEKING WORK | REMOTE | US Citizen<p>Location:...       rasikjain   <NA>   \n",
+              "13  I had a very similar experience last month tea...           tmaly   <NA>   \n",
+              "14                                               <NA>    mrtweetyhack   <NA>   \n",
+              "15  &gt; Just do what most American cities do with...    AnthonyMouse   <NA>   \n",
+              "16  It&#x27;s not a space. The l and the C are at ...        antninja   <NA>   \n",
+              "17  I’ve knowingly paid the premium in the past, j...           zwily   <NA>   \n",
+              "18  &gt; Any sufficiently complicated C or Fortran...        wavemode   <NA>   \n",
+              "19  It&#x27;s similar to a lot of Japanese &quot;t...           TillE   <NA>   \n",
+              "20  Engineers are just people paid to code. If you...          rchaud   <NA>   \n",
+              "21                               So don&#x27;t use it  CyberDildonics   <NA>   \n",
+              "22  Sure, but there are degrees of these things. T...            dang   <NA>   \n",
+              "23  I wish this would happen.  There&#x27;s a &quo...       coredog64   <NA>   \n",
+              "24  I’m not sure why responsible riders wouldn’t w...      mjmahone17   <NA>   \n",
+              "\n",
+              "                    timestamp     type  \n",
+              "0   2010-04-16 19:52:51+00:00  comment  \n",
+              "1   2023-06-04 06:12:00+00:00  comment  \n",
+              "2   2023-09-19 14:19:46+00:00  comment  \n",
+              "3   2018-08-07 12:17:45+00:00    story  \n",
+              "4   2016-06-10 19:38:25+00:00  comment  \n",
+              "5   2024-08-13 03:11:32+00:00  comment  \n",
+              "6   2020-06-07 22:43:03+00:00  comment  \n",
+              "7   2020-03-19 00:24:47+00:00  comment  \n",
+              "8   2024-10-07 13:38:18+00:00  comment  \n",
+              "9   2017-01-23 19:49:15+00:00  comment  \n",
+              "10  2023-02-18 02:51:11+00:00    story  \n",
+              "11  2018-10-22 09:50:36+00:00    story  \n",
+              "12  2024-08-01 16:56:49+00:00  comment  \n",
+              "13  2020-01-22 18:26:36+00:00  comment  \n",
+              "14  2022-02-26 19:34:00+00:00  comment  \n",
+              "15  2021-10-04 23:10:50+00:00  comment  \n",
+              "16  2013-07-13 09:48:34+00:00  comment  \n",
+              "17  2020-06-17 14:26:43+00:00  comment  \n",
+              "18  2025-02-07 06:42:53+00:00  comment  \n",
+              "19  2022-11-06 17:15:10+00:00  comment  \n",
+              "20  2023-04-12 14:31:42+00:00  comment  \n",
+              "21  2015-12-29 22:01:16+00:00  comment  \n",
+              "22  2021-11-11 23:42:12+00:00  comment  \n",
+              "23  2018-02-12 16:03:37+00:00  comment  \n",
+              "24  2021-11-09 01:36:01+00:00  comment  \n",
+              "...\n",
+              "\n",
+              "[3000 rows x 6 columns]"
+            ]
+          },
+          "execution_count": 24,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "hacker_news = bpd.read_gbq(\"bigquery-public-data.hacker_news.full\")[['title', 'text', 'by', 'score', 'timestamp', 'type']].head(3000)\n",
+        "hacker_news"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3e94DPOdiouY"
+      },
+      "source": [
+        "Then, keep only the rows that have text content:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 25,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "mQl8hc1biouY",
+        "outputId": "2b4ffa85-9d95-4a20-9040-0420c67da2d4"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "2533"
+            ]
+          },
+          "execution_count": 25,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "hacker_news_with_texts = hacker_news[hacker_news['text'].isnull() == False]\n",
+        "len(hacker_news_with_texts)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JWalDtLDiouZ"
+      },
+      "source": [
+        "You can get an idea of the input token length by calculating the average string length."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 26,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "PZeg4LCUiouZ",
+        "outputId": "05b67cac-6b3d-42ef-d6d6-b578a9734f4c"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "393.2356889064355"
+            ]
+          },
+          "execution_count": 26,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "hacker_news_with_texts['text'].str.len().mean()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2IXqskHHiouZ"
+      },
+      "source": [
+        "**Optional**: You can raise the confirmation threshold for a smoother experience."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EpjXQ4FViouZ"
+      },
+      "outputs": [],
+      "source": [
+        "if Version(bigframes.__version__) >= Version(\"1.42.0\"):\n",
+        "    bigframes.options.compute.ai_ops_confirmation_threshold = 5000"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SYFB-X1RiouZ"
+      },
+      "source": [
+        "Now it's LLM's turn. You want to keep only the rows whose texts are talking about iPhone. This will take several minutes to finish."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 28,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 253
+        },
+        "id": "rditQlmoiouZ",
+        "outputId": "2b44dcbf-2ef5-4119-ca05-9b082db9c0c1"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>title</th>\n",
+              "      <th>text</th>\n",
+              "      <th>by</th>\n",
+              "      <th>score</th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>type</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>445</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>If I want to manipulate a device, I&amp;#x27;ll bu...</td>\n",
+              "      <td>exelius</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-09-21 17:39:37+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>967</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;a href=\"https:&amp;#x2F;&amp;#x2F;archive.ph&amp;#x2F;nnE...</td>\n",
+              "      <td>blinding-streak</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-04-30 19:10:16+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>975</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I&amp;#x27;ve had my 6S Plus now for 36 months and...</td>\n",
+              "      <td>throwaway427</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-01-03 18:06:33+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1253</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Apple is far more closed and tyrannical with i...</td>\n",
+              "      <td>RyanMcGreal</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2012-12-21 00:45:40+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1274</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>An iOS version was released earlier this year....</td>\n",
+              "      <td>pls2halp</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-12-09 06:36:41+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1548</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I’m not sure how that fits with Apple pursuing...</td>\n",
+              "      <td>alphabettsy</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-12-26 19:41:38+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1630</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Not sure if you’re being ironic, but I use an ...</td>\n",
+              "      <td>lxgr</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2025-03-29 03:57:25+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1664</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Quoting from the article I linked you:&lt;p&gt;&amp;gt;&amp;...</td>\n",
+              "      <td>StreamBright</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-09-11 19:57:34+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1884</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; Not all wireless headsets are the same, h...</td>\n",
+              "      <td>cptskippy</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-11-16 13:28:44+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2251</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Will not buy any more apple product, iphone 4s...</td>\n",
+              "      <td>omi</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2012-09-11 14:42:52+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2877</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I&amp;#x27;ve been an iPhone user since the OG in ...</td>\n",
+              "      <td>vsnf</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-04-15 06:28:09+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>11 rows × 6 columns</p>\n",
+              "</div>[11 rows x 6 columns in total]"
+            ],
+            "text/plain": [
+              "     title                                               text  \\\n",
+              "445   <NA>  If I want to manipulate a device, I&#x27;ll bu...   \n",
+              "967   <NA>  <a href=\"https:&#x2F;&#x2F;archive.ph&#x2F;nnE...   \n",
+              "975   <NA>  I&#x27;ve had my 6S Plus now for 36 months and...   \n",
+              "1253  <NA>  Apple is far more closed and tyrannical with i...   \n",
+              "1274  <NA>  An iOS version was released earlier this year....   \n",
+              "1548  <NA>  I’m not sure how that fits with Apple pursuing...   \n",
+              "1630  <NA>  Not sure if you’re being ironic, but I use an ...   \n",
+              "1664  <NA>  Quoting from the article I linked you:<p>&gt;&...   \n",
+              "1884  <NA>  &gt; Not all wireless headsets are the same, h...   \n",
+              "2251  <NA>  Will not buy any more apple product, iphone 4s...   \n",
+              "2877  <NA>  I&#x27;ve been an iPhone user since the OG in ...   \n",
+              "\n",
+              "                   by  score                  timestamp     type  \n",
+              "445           exelius   <NA>  2017-09-21 17:39:37+00:00  comment  \n",
+              "967   blinding-streak   <NA>  2023-04-30 19:10:16+00:00  comment  \n",
+              "975      throwaway427   <NA>  2019-01-03 18:06:33+00:00  comment  \n",
+              "1253      RyanMcGreal   <NA>  2012-12-21 00:45:40+00:00  comment  \n",
+              "1274         pls2halp   <NA>  2017-12-09 06:36:41+00:00  comment  \n",
+              "1548      alphabettsy   <NA>  2021-12-26 19:41:38+00:00  comment  \n",
+              "1630             lxgr   <NA>  2025-03-29 03:57:25+00:00  comment  \n",
+              "1664     StreamBright   <NA>  2017-09-11 19:57:34+00:00  comment  \n",
+              "1884        cptskippy   <NA>  2021-11-16 13:28:44+00:00  comment  \n",
+              "2251              omi   <NA>  2012-09-11 14:42:52+00:00  comment  \n",
+              "2877             vsnf   <NA>  2024-04-15 06:28:09+00:00  comment  \n",
+              "\n",
+              "[11 rows x 6 columns]"
+            ]
+          },
+          "execution_count": 28,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "iphone_comments = hacker_news_with_texts.ai.filter(\"The {text} is mainly focused on iPhone\", gemini_model)\n",
+        "iphone_comments"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yl24sJFIiouZ"
+      },
+      "source": [
+        "The performance of the ai operators depends on the length of your input as well as your quota. Here are our benchmarks for running the previous operation with Gemini Flash 1.5 over data of different sizes. Here are the estimates supposing your quota is [the default 200 requests per minute](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas):\n",
+        "\n",
+        "* 800 Rows -> ~4m\n",
+        "* 2550 Rows -> ~13m\n",
+        "* 8500 Rows -> ~40m\n",
+        "\n",
+        "These numbers can give you a general idea of how fast the operators run."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eo4nfISuiouZ"
+      },
+      "source": [
+        "Now, use LLM to summarize the sentiments towards iPhone:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 29,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 253
+        },
+        "id": "IlKBrNxUiouZ",
+        "outputId": "818d01e4-1cdf-42a2-9e02-61c4736a8905"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>title</th>\n",
+              "      <th>text</th>\n",
+              "      <th>by</th>\n",
+              "      <th>score</th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>type</th>\n",
+              "      <th>sentiment</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>445</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>If I want to manipulate a device, I&amp;#x27;ll bu...</td>\n",
+              "      <td>exelius</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-09-21 17:39:37+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Pragmatic, slightly annoyed</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>967</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;a href=\"https:&amp;#x2F;&amp;#x2F;archive.ph&amp;#x2F;nnE...</td>\n",
+              "      <td>blinding-streak</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-04-30 19:10:16+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>I lack the ability to access external websites...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>975</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I&amp;#x27;ve had my 6S Plus now for 36 months and...</td>\n",
+              "      <td>throwaway427</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-01-03 18:06:33+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Generally positive, impressed.</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1253</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Apple is far more closed and tyrannical with i...</td>\n",
+              "      <td>RyanMcGreal</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2012-12-21 00:45:40+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Negative towards Apple</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1274</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>An iOS version was released earlier this year....</td>\n",
+              "      <td>pls2halp</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-12-09 06:36:41+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Neutral, factual statement.</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1548</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I’m not sure how that fits with Apple pursuing...</td>\n",
+              "      <td>alphabettsy</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-12-26 19:41:38+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Skeptical and critical.</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1630</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Not sure if you’re being ironic, but I use an ...</td>\n",
+              "      <td>lxgr</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2025-03-29 03:57:25+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Wants interoperability, frustrated.</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1664</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Quoting from the article I linked you:&lt;p&gt;&amp;gt;&amp;...</td>\n",
+              "      <td>StreamBright</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-09-11 19:57:34+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Extremely positive review</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1884</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; Not all wireless headsets are the same, h...</td>\n",
+              "      <td>cptskippy</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-11-16 13:28:44+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Skeptical and critical</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2251</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Will not buy any more apple product, iphone 4s...</td>\n",
+              "      <td>omi</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2012-09-11 14:42:52+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Negative, regretful.</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2877</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I&amp;#x27;ve been an iPhone user since the OG in ...</td>\n",
+              "      <td>vsnf</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-04-15 06:28:09+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Mildly annoyed, resigned</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>11 rows × 7 columns</p>\n",
+              "</div>[11 rows x 7 columns in total]"
+            ],
+            "text/plain": [
+              "     title                                               text  \\\n",
+              "445   <NA>  If I want to manipulate a device, I&#x27;ll bu...   \n",
+              "967   <NA>  <a href=\"https:&#x2F;&#x2F;archive.ph&#x2F;nnE...   \n",
+              "975   <NA>  I&#x27;ve had my 6S Plus now for 36 months and...   \n",
+              "1253  <NA>  Apple is far more closed and tyrannical with i...   \n",
+              "1274  <NA>  An iOS version was released earlier this year....   \n",
+              "1548  <NA>  I’m not sure how that fits with Apple pursuing...   \n",
+              "1630  <NA>  Not sure if you’re being ironic, but I use an ...   \n",
+              "1664  <NA>  Quoting from the article I linked you:<p>&gt;&...   \n",
+              "1884  <NA>  &gt; Not all wireless headsets are the same, h...   \n",
+              "2251  <NA>  Will not buy any more apple product, iphone 4s...   \n",
+              "2877  <NA>  I&#x27;ve been an iPhone user since the OG in ...   \n",
+              "\n",
+              "                   by  score                  timestamp     type  \\\n",
+              "445           exelius   <NA>  2017-09-21 17:39:37+00:00  comment   \n",
+              "967   blinding-streak   <NA>  2023-04-30 19:10:16+00:00  comment   \n",
+              "975      throwaway427   <NA>  2019-01-03 18:06:33+00:00  comment   \n",
+              "1253      RyanMcGreal   <NA>  2012-12-21 00:45:40+00:00  comment   \n",
+              "1274         pls2halp   <NA>  2017-12-09 06:36:41+00:00  comment   \n",
+              "1548      alphabettsy   <NA>  2021-12-26 19:41:38+00:00  comment   \n",
+              "1630             lxgr   <NA>  2025-03-29 03:57:25+00:00  comment   \n",
+              "1664     StreamBright   <NA>  2017-09-11 19:57:34+00:00  comment   \n",
+              "1884        cptskippy   <NA>  2021-11-16 13:28:44+00:00  comment   \n",
+              "2251              omi   <NA>  2012-09-11 14:42:52+00:00  comment   \n",
+              "2877             vsnf   <NA>  2024-04-15 06:28:09+00:00  comment   \n",
+              "\n",
+              "                                              sentiment  \n",
+              "445                        Pragmatic, slightly annoyed\n",
+              "  \n",
+              "967   I lack the ability to access external websites...  \n",
+              "975                     Generally positive, impressed.\n",
+              "  \n",
+              "1253                            Negative towards Apple\n",
+              "  \n",
+              "1274                       Neutral, factual statement.\n",
+              "  \n",
+              "1548                           Skeptical and critical.\n",
+              "  \n",
+              "1630               Wants interoperability, frustrated.\n",
+              "  \n",
+              "1664                         Extremely positive review\n",
+              "  \n",
+              "1884                            Skeptical and critical\n",
+              "  \n",
+              "2251                              Negative, regretful.\n",
+              "  \n",
+              "2877                          Mildly annoyed, resigned\n",
+              "  \n",
+              "\n",
+              "[11 rows x 7 columns]"
+            ]
+          },
+          "execution_count": 29,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "iphone_comments.ai.map(\"Summarize the sentiment of the {text}. Your answer should have at most 3 words\", output_column=\"sentiment\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "y7_16T2xiouZ"
+      },
+      "source": [
+        "Here is another example: count the number of rows whose authors have animals in their names."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 30,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 880
+        },
+        "id": "CbGwc_uXiouZ",
+        "outputId": "138acca0-7fb9-495a-e797-0d42495d65e6"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3577: UserWarning: Reading cached table from 2025-04-02 18:00:55.801294+00:00 to avoid\n",
+            "incompatibilies with previous reads of this table. To read the latest\n",
+            "version, set `use_cache=False` or close the current session with\n",
+            "Session.close() or bigframes.pandas.close_session().\n",
+            "  exec(code_obj, self.user_global_ns, self.user_ns)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>title</th>\n",
+              "      <th>text</th>\n",
+              "      <th>by</th>\n",
+              "      <th>score</th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>type</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2010-04-16 19:52:51+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I&amp;#x27;d agree about border control with a cav...</td>\n",
+              "      <td>bandrami</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-06-04 06:12:00+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>So 4 pickups? At least pickups are high margin...</td>\n",
+              "      <td>seanmcdirmid</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-09-19 14:19:46+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>Workplace Wellness Programs Don’t Work Well. W...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>anarbadalov</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2018-08-07 12:17:45+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Are you implying that to be a good developer y...</td>\n",
+              "      <td>ecesena</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2016-06-10 19:38:25+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It pretty much works with other carriers. My s...</td>\n",
+              "      <td>toast0</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-08-13 03:11:32+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-06-07 22:43:03+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>7</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;quot;not operated for profit&amp;quot; and &amp;quot;...</td>\n",
+              "      <td>radford-neal</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-03-19 00:24:47+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It&amp;#x27;s a good description of one applicatio...</td>\n",
+              "      <td>dkarl</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-10-07 13:38:18+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Might be a bit high, but....&lt;p&gt;&lt;i&gt;&amp;quot;For ex...</td>\n",
+              "      <td>tyingq</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-01-23 19:49:15+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10</th>\n",
+              "      <td>Taiwan’s Tech King to Nancy Pelosi: U.S. Is in...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>dlcmh</td>\n",
+              "      <td>11</td>\n",
+              "      <td>2023-02-18 02:51:11+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>11</th>\n",
+              "      <td>Android’s new multitasking is terrible and sho...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>wowamit</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2018-10-22 09:50:36+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>12</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>SEEKING WORK | REMOTE | US Citizen&lt;p&gt;Location:...</td>\n",
+              "      <td>rasikjain</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-08-01 16:56:49+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>13</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I had a very similar experience last month tea...</td>\n",
+              "      <td>tmaly</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-01-22 18:26:36+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>14</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>mrtweetyhack</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-02-26 19:34:00+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>15</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; Just do what most American cities do with...</td>\n",
+              "      <td>AnthonyMouse</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-10-04 23:10:50+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It&amp;#x27;s not a space. The l and the C are at ...</td>\n",
+              "      <td>antninja</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2013-07-13 09:48:34+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I’ve knowingly paid the premium in the past, j...</td>\n",
+              "      <td>zwily</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-06-17 14:26:43+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>18</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; Any sufficiently complicated C or Fortran...</td>\n",
+              "      <td>wavemode</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2025-02-07 06:42:53+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>19</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It&amp;#x27;s similar to a lot of Japanese &amp;quot;t...</td>\n",
+              "      <td>TillE</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-11-06 17:15:10+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>20</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Engineers are just people paid to code. If you...</td>\n",
+              "      <td>rchaud</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-04-12 14:31:42+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>21</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>So don&amp;#x27;t use it</td>\n",
+              "      <td>CyberDildonics</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2015-12-29 22:01:16+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>22</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Sure, but there are degrees of these things. T...</td>\n",
+              "      <td>dang</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-11-11 23:42:12+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>23</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I wish this would happen.  There&amp;#x27;s a &amp;quo...</td>\n",
+              "      <td>coredog64</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2018-02-12 16:03:37+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>24</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I’m not sure why responsible riders wouldn’t w...</td>\n",
+              "      <td>mjmahone17</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-11-09 01:36:01+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>25 rows × 6 columns</p>\n",
+              "</div>[3000 rows x 6 columns in total]"
+            ],
+            "text/plain": [
+              "                                                title  \\\n",
+              "0                                                <NA>   \n",
+              "1                                                <NA>   \n",
+              "2                                                <NA>   \n",
+              "3   Workplace Wellness Programs Don’t Work Well. W...   \n",
+              "4                                                <NA>   \n",
+              "5                                                <NA>   \n",
+              "6                                                <NA>   \n",
+              "7                                                <NA>   \n",
+              "8                                                <NA>   \n",
+              "9                                                <NA>   \n",
+              "10  Taiwan’s Tech King to Nancy Pelosi: U.S. Is in...   \n",
+              "11  Android’s new multitasking is terrible and sho...   \n",
+              "12                                               <NA>   \n",
+              "13                                               <NA>   \n",
+              "14                                               <NA>   \n",
+              "15                                               <NA>   \n",
+              "16                                               <NA>   \n",
+              "17                                               <NA>   \n",
+              "18                                               <NA>   \n",
+              "19                                               <NA>   \n",
+              "20                                               <NA>   \n",
+              "21                                               <NA>   \n",
+              "22                                               <NA>   \n",
+              "23                                               <NA>   \n",
+              "24                                               <NA>   \n",
+              "\n",
+              "                                                 text              by  score  \\\n",
+              "0                                                <NA>            <NA>   <NA>   \n",
+              "1   I&#x27;d agree about border control with a cav...        bandrami   <NA>   \n",
+              "2   So 4 pickups? At least pickups are high margin...    seanmcdirmid   <NA>   \n",
+              "3                                                <NA>     anarbadalov      2   \n",
+              "4   Are you implying that to be a good developer y...         ecesena   <NA>   \n",
+              "5   It pretty much works with other carriers. My s...          toast0   <NA>   \n",
+              "6                                                <NA>            <NA>   <NA>   \n",
+              "7   &quot;not operated for profit&quot; and &quot;...    radford-neal   <NA>   \n",
+              "8   It&#x27;s a good description of one applicatio...           dkarl   <NA>   \n",
+              "9   Might be a bit high, but....<p><i>&quot;For ex...          tyingq   <NA>   \n",
+              "10                                               <NA>           dlcmh     11   \n",
+              "11                                               <NA>         wowamit      1   \n",
+              "12  SEEKING WORK | REMOTE | US Citizen<p>Location:...       rasikjain   <NA>   \n",
+              "13  I had a very similar experience last month tea...           tmaly   <NA>   \n",
+              "14                                               <NA>    mrtweetyhack   <NA>   \n",
+              "15  &gt; Just do what most American cities do with...    AnthonyMouse   <NA>   \n",
+              "16  It&#x27;s not a space. The l and the C are at ...        antninja   <NA>   \n",
+              "17  I’ve knowingly paid the premium in the past, j...           zwily   <NA>   \n",
+              "18  &gt; Any sufficiently complicated C or Fortran...        wavemode   <NA>   \n",
+              "19  It&#x27;s similar to a lot of Japanese &quot;t...           TillE   <NA>   \n",
+              "20  Engineers are just people paid to code. If you...          rchaud   <NA>   \n",
+              "21                               So don&#x27;t use it  CyberDildonics   <NA>   \n",
+              "22  Sure, but there are degrees of these things. T...            dang   <NA>   \n",
+              "23  I wish this would happen.  There&#x27;s a &quo...       coredog64   <NA>   \n",
+              "24  I’m not sure why responsible riders wouldn’t w...      mjmahone17   <NA>   \n",
+              "\n",
+              "                    timestamp     type  \n",
+              "0   2010-04-16 19:52:51+00:00  comment  \n",
+              "1   2023-06-04 06:12:00+00:00  comment  \n",
+              "2   2023-09-19 14:19:46+00:00  comment  \n",
+              "3   2018-08-07 12:17:45+00:00    story  \n",
+              "4   2016-06-10 19:38:25+00:00  comment  \n",
+              "5   2024-08-13 03:11:32+00:00  comment  \n",
+              "6   2020-06-07 22:43:03+00:00  comment  \n",
+              "7   2020-03-19 00:24:47+00:00  comment  \n",
+              "8   2024-10-07 13:38:18+00:00  comment  \n",
+              "9   2017-01-23 19:49:15+00:00  comment  \n",
+              "10  2023-02-18 02:51:11+00:00    story  \n",
+              "11  2018-10-22 09:50:36+00:00    story  \n",
+              "12  2024-08-01 16:56:49+00:00  comment  \n",
+              "13  2020-01-22 18:26:36+00:00  comment  \n",
+              "14  2022-02-26 19:34:00+00:00  comment  \n",
+              "15  2021-10-04 23:10:50+00:00  comment  \n",
+              "16  2013-07-13 09:48:34+00:00  comment  \n",
+              "17  2020-06-17 14:26:43+00:00  comment  \n",
+              "18  2025-02-07 06:42:53+00:00  comment  \n",
+              "19  2022-11-06 17:15:10+00:00  comment  \n",
+              "20  2023-04-12 14:31:42+00:00  comment  \n",
+              "21  2015-12-29 22:01:16+00:00  comment  \n",
+              "22  2021-11-11 23:42:12+00:00  comment  \n",
+              "23  2018-02-12 16:03:37+00:00  comment  \n",
+              "24  2021-11-09 01:36:01+00:00  comment  \n",
+              "...\n",
+              "\n",
+              "[3000 rows x 6 columns]"
+            ]
+          },
+          "execution_count": 30,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "hacker_news = bpd.read_gbq(\"bigquery-public-data.hacker_news.full\")[['title', 'text', 'by', 'score', 'timestamp', 'type']].head(3000)\n",
+        "hacker_news"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 31,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 880
+        },
+        "id": "9dzU8SNziouZ",
+        "outputId": "da8815c1-c411-4afc-d1ca-5e44c75b5b48"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>title</th>\n",
+              "      <th>text</th>\n",
+              "      <th>by</th>\n",
+              "      <th>score</th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>type</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>15</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; Just do what most American cities do with...</td>\n",
+              "      <td>AnthonyMouse</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-10-04 23:10:50+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It&amp;#x27;s not a space. The l and the C are at ...</td>\n",
+              "      <td>antninja</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2013-07-13 09:48:34+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>23</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I wish this would happen.  There&amp;#x27;s a &amp;quo...</td>\n",
+              "      <td>coredog64</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2018-02-12 16:03:37+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>27</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Flash got close, but was too complex and expen...</td>\n",
+              "      <td>surfingdino</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-05-08 05:02:37+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>36</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I think the &amp;quot;algo genius&amp;quot; type of de...</td>\n",
+              "      <td>poisonborz</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-06-04 07:39:08+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>150</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>No one will be doing anything practical with a...</td>\n",
+              "      <td>NeutralCrane</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2025-02-01 14:26:25+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>160</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I think this is more semantics than anything.&lt;...</td>\n",
+              "      <td>superb-owl</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-06-08 16:55:54+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>205</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Interesting to think of sign language localisa...</td>\n",
+              "      <td>robin_reala</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-02-01 11:49:23+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>231</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Probably because of their key location.</td>\n",
+              "      <td>ape4</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2014-08-29 14:55:40+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>250</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I realize this is a bit passe, but there were ...</td>\n",
+              "      <td>FeepingCreature</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-10-15 11:32:44+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>320</th>\n",
+              "      <td>Protest against Bill C-11, Canada's SOPA, plan...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>magikarp</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2012-01-29 02:14:12+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>344</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>What? Are you suggesting we cannot criticize p...</td>\n",
+              "      <td>chickenpotpie</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-12-02 18:24:19+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>348</th>\n",
+              "      <td>The flu vaccine this year is only 10% effective</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>maryfoxmarlow</td>\n",
+              "      <td>3</td>\n",
+              "      <td>2018-02-02 02:19:42+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>360</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Bomb ownership is okay AFAIK.  Intent to commi...</td>\n",
+              "      <td>Ferret7446</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-06-25 20:04:30+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>398</th>\n",
+              "      <td>1 + 1 = 3</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>oscar-the-horse</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2012-08-05 22:18:51+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>407</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>No (almost certainly), but you will become fru...</td>\n",
+              "      <td>AnimalMuppet</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-09-15 16:11:08+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>454</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>48h is less than 5 kWh of batteries, one quart...</td>\n",
+              "      <td>tigershark</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-07-23 05:12:52+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>457</th>\n",
+              "      <td>Brazilian Rails Websites</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>akitaonrails</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2008-07-27 17:27:47+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>472</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; When most people start as programmers, th...</td>\n",
+              "      <td>PavlovsCat</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2018-12-23 20:37:20+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>493</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Related anecdata + a study I found useful. Aft...</td>\n",
+              "      <td>TrainedMonkey</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-02-02 16:14:23+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>497</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>That &amp;quot;civilized&amp;quot; country has too man...</td>\n",
+              "      <td>rantanplan</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-02-17 12:51:51+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>514</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>The current Go 2 drafts do.</td>\n",
+              "      <td>tapirl</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-08-12 02:37:41+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>535</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Having walked this same path, this blog resona...</td>\n",
+              "      <td>curiousllama</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-10-07 20:35:18+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>607</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>If people thought the reward for talking to a ...</td>\n",
+              "      <td>slapfrog</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-09-08 20:58:13+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>672</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Given that you say you&amp;#x27;re 38 and looking ...</td>\n",
+              "      <td>strix_varius</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-08-04 02:41:50+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>25 rows × 6 columns</p>\n",
+              "</div>[112 rows x 6 columns in total]"
+            ],
+            "text/plain": [
+              "                                                 title  \\\n",
+              "15                                                <NA>   \n",
+              "16                                                <NA>   \n",
+              "23                                                <NA>   \n",
+              "27                                                <NA>   \n",
+              "36                                                <NA>   \n",
+              "150                                               <NA>   \n",
+              "160                                               <NA>   \n",
+              "205                                               <NA>   \n",
+              "231                                               <NA>   \n",
+              "250                                               <NA>   \n",
+              "320  Protest against Bill C-11, Canada's SOPA, plan...   \n",
+              "344                                               <NA>   \n",
+              "348    The flu vaccine this year is only 10% effective   \n",
+              "360                                               <NA>   \n",
+              "398                                          1 + 1 = 3   \n",
+              "407                                               <NA>   \n",
+              "454                                               <NA>   \n",
+              "457                           Brazilian Rails Websites   \n",
+              "472                                               <NA>   \n",
+              "493                                               <NA>   \n",
+              "497                                               <NA>   \n",
+              "514                                               <NA>   \n",
+              "535                                               <NA>   \n",
+              "607                                               <NA>   \n",
+              "672                                               <NA>   \n",
+              "\n",
+              "                                                  text               by  \\\n",
+              "15   &gt; Just do what most American cities do with...     AnthonyMouse   \n",
+              "16   It&#x27;s not a space. The l and the C are at ...         antninja   \n",
+              "23   I wish this would happen.  There&#x27;s a &quo...        coredog64   \n",
+              "27   Flash got close, but was too complex and expen...      surfingdino   \n",
+              "36   I think the &quot;algo genius&quot; type of de...       poisonborz   \n",
+              "150  No one will be doing anything practical with a...     NeutralCrane   \n",
+              "160  I think this is more semantics than anything.<...       superb-owl   \n",
+              "205  Interesting to think of sign language localisa...      robin_reala   \n",
+              "231            Probably because of their key location.             ape4   \n",
+              "250  I realize this is a bit passe, but there were ...  FeepingCreature   \n",
+              "320                                               <NA>         magikarp   \n",
+              "344  What? Are you suggesting we cannot criticize p...    chickenpotpie   \n",
+              "348                                               <NA>    maryfoxmarlow   \n",
+              "360  Bomb ownership is okay AFAIK.  Intent to commi...       Ferret7446   \n",
+              "398                                               <NA>  oscar-the-horse   \n",
+              "407  No (almost certainly), but you will become fru...     AnimalMuppet   \n",
+              "454  48h is less than 5 kWh of batteries, one quart...       tigershark   \n",
+              "457                                               <NA>     akitaonrails   \n",
+              "472  &gt; When most people start as programmers, th...       PavlovsCat   \n",
+              "493  Related anecdata + a study I found useful. Aft...    TrainedMonkey   \n",
+              "497  That &quot;civilized&quot; country has too man...       rantanplan   \n",
+              "514                        The current Go 2 drafts do.           tapirl   \n",
+              "535  Having walked this same path, this blog resona...     curiousllama   \n",
+              "607  If people thought the reward for talking to a ...         slapfrog   \n",
+              "672  Given that you say you&#x27;re 38 and looking ...     strix_varius   \n",
+              "\n",
+              "     score                  timestamp     type  \n",
+              "15    <NA>  2021-10-04 23:10:50+00:00  comment  \n",
+              "16    <NA>  2013-07-13 09:48:34+00:00  comment  \n",
+              "23    <NA>  2018-02-12 16:03:37+00:00  comment  \n",
+              "27    <NA>  2024-05-08 05:02:37+00:00  comment  \n",
+              "36    <NA>  2024-06-04 07:39:08+00:00  comment  \n",
+              "150   <NA>  2025-02-01 14:26:25+00:00  comment  \n",
+              "160   <NA>  2022-06-08 16:55:54+00:00  comment  \n",
+              "205   <NA>  2019-02-01 11:49:23+00:00  comment  \n",
+              "231   <NA>  2014-08-29 14:55:40+00:00  comment  \n",
+              "250   <NA>  2023-10-15 11:32:44+00:00  comment  \n",
+              "320      1  2012-01-29 02:14:12+00:00    story  \n",
+              "344   <NA>  2020-12-02 18:24:19+00:00  comment  \n",
+              "348      3  2018-02-02 02:19:42+00:00    story  \n",
+              "360   <NA>  2023-06-25 20:04:30+00:00  comment  \n",
+              "398      2  2012-08-05 22:18:51+00:00    story  \n",
+              "407   <NA>  2023-09-15 16:11:08+00:00  comment  \n",
+              "454   <NA>  2021-07-23 05:12:52+00:00  comment  \n",
+              "457      1  2008-07-27 17:27:47+00:00    story  \n",
+              "472   <NA>  2018-12-23 20:37:20+00:00  comment  \n",
+              "493   <NA>  2023-02-02 16:14:23+00:00  comment  \n",
+              "497   <NA>  2017-02-17 12:51:51+00:00  comment  \n",
+              "514   <NA>  2020-08-12 02:37:41+00:00  comment  \n",
+              "535   <NA>  2020-10-07 20:35:18+00:00  comment  \n",
+              "607   <NA>  2021-09-08 20:58:13+00:00  comment  \n",
+              "672   <NA>  2023-08-04 02:41:50+00:00  comment  \n",
+              "...\n",
+              "\n",
+              "[112 rows x 6 columns]"
+            ]
+          },
+          "execution_count": 31,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "hacker_news.ai.filter(\"{by} contains animal name\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3bpkaspoiouZ"
+      },
+      "source": [
+        "Here are the runtime numbers with 500 requests per minute [raised quota](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas):\n",
+        "* 3000 rows -> ~6m\n",
+        "* 10000 rows -> ~26m"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "include_colab_link": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "venv",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.11.9"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/notebooks/experimental/longer_ml_demo.ipynb b/notebooks/experimental/longer_ml_demo.ipynb
deleted file mode 100644
index 793ff58ecd..0000000000
--- a/notebooks/experimental/longer_ml_demo.ipynb
+++ /dev/null
@@ -1,1925 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "71fbfc47",
-   "metadata": {},
-   "source": [
-    "**Note: this notebook requires changes not yet checked in**\n",
-    "\n",
-    "# Introduction\n",
-    "\n",
-    "This is a prototype for how a minimal SKLearn-like wrapper for BQML might work in BigQuery DataFrames.\n",
-    "\n",
-    "Disclaimer - this is not a polished design or a robust implementation, this is a quick prototype to workshop some ideas. Design will be next.\n",
-    "\n",
-    "What is BigQuery DataFrame?\n",
-    "- Pandas API for BigQuery\n",
-    "- Lets data scientists quickly iterate and prepare their data as they do in Pandas, but executed by BigQuery\n",
-    "\n",
-    "What is meant by SKLearn-like?\n",
-    "- Follow the API design practices from the SKLearn project\n",
-    "    - [API design for machine learning software: experiences from the scikit-learn project](https://arxiv.org/pdf/1309.0238.pdf)\n",
-    "- Not a copy of, or compatible with, SKLearn\n",
-    "\n",
-    "Briefly, patterns taken from SKLearn are:\n",
-    "- Models and transforms are 'Estimators'\n",
-    "    - A bundle of parameters with a consistent way to initialize/get/set\n",
-    "    - And a .fit(..) method to fit to training data\n",
-    "- Models additionally have a .predict(..)\n",
-    "- By default, these objects are transient, making them easy to play around with. No need to give them names or decide how to persist them.\n",
-    "\n",
-    "\n",
-    "Design goals:\n",
-    "- Zero friction ML capabilities for BigQuery DataFrames users (no extra auth, configuration, etc)\n",
-    "- Offers first class integration with the Pandas-like BigQuery DataFrames API\n",
-    "- Uses SKLearn-like design patterns that feel familiar to data scientists\n",
-    "- Also a first class BigQuery experience\n",
-    "    - Offers BigQuery's scalability and storage / compute management\n",
-    "    - Works naturally with BigQuery's other interfaces, e.g. GUI and SQL\n",
-    "    - BQML features"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "345c2163",
-   "metadata": {},
-   "source": [
-    "# Linear regression tutorial\n",
-    "\n",
-    "Adapted from the \"Penguin weight\" Linear Regression tutorial for BQML: https://cloud.google.com/bigquery-ml/docs/linear-regression-tutorial\n"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "03c9e168",
-   "metadata": {},
-   "source": [
-    "## Setting the scene\n",
-    "\n",
-    "Our conservationists have sent us some measurements of penguins found in the Antarctic islands. They say that some of the body mass measurements for the Adelie penguins are missing, and ask if we can use some data science magic to estimate them. Sounds like a job for a linear regression!\n",
-    "\n",
-    "Lets take a look at the data..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "d7a03de2-c0ef-4f80-9cd5-f96e87cf2d54",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tag_number</th>\n",
-       "      <th>species</th>\n",
-       "      <th>island</th>\n",
-       "      <th>culmen_length_mm</th>\n",
-       "      <th>culmen_depth_mm</th>\n",
-       "      <th>flipper_length_mm</th>\n",
-       "      <th>body_mass_g</th>\n",
-       "      <th>sex</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1225</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1278</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>13.5</td>\n",
-       "      <td>210.0</td>\n",
-       "      <td>4150.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1275</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.5</td>\n",
-       "      <td>13.5</td>\n",
-       "      <td>210.0</td>\n",
-       "      <td>4550.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1233</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>43.3</td>\n",
-       "      <td>14.0</td>\n",
-       "      <td>208.0</td>\n",
-       "      <td>4575.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1311</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>47.5</td>\n",
-       "      <td>14.0</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>4875.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>1316</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>49.1</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>4625.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>1313</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.5</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>4750.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>1381</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>47.6</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>215.0</td>\n",
-       "      <td>5400.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>1377</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.1</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>207.0</td>\n",
-       "      <td>5050.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>1380</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.1</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>215.0</td>\n",
-       "      <td>5000.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>1257</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.2</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>209.0</td>\n",
-       "      <td>4800.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>1336</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.5</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>213.0</td>\n",
-       "      <td>4400.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>1237</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>43.2</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>208.0</td>\n",
-       "      <td>4450.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>1302</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>48.5</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>219.0</td>\n",
-       "      <td>4850.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>1325</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>49.1</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>228.0</td>\n",
-       "      <td>5500.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>1285</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>47.5</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>218.0</td>\n",
-       "      <td>4950.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>1242</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>49.6</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>216.0</td>\n",
-       "      <td>4750.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>1246</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>47.7</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>216.0</td>\n",
-       "      <td>4750.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>1320</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.5</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>220.0</td>\n",
-       "      <td>5000.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>1244</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.4</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>216.0</td>\n",
-       "      <td>4700.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>[347 rows x 8 columns in total]"
-      ],
-      "text/plain": [
-       "    tag_number                            species  island  culmen_length_mm  \\\n",
-       "0         1225  Gentoo penguin (Pygoscelis papua)  Biscoe              <NA>   \n",
-       "1         1278  Gentoo penguin (Pygoscelis papua)  Biscoe              42.0   \n",
-       "2         1275  Gentoo penguin (Pygoscelis papua)  Biscoe              46.5   \n",
-       "3         1233  Gentoo penguin (Pygoscelis papua)  Biscoe              43.3   \n",
-       "4         1311  Gentoo penguin (Pygoscelis papua)  Biscoe              47.5   \n",
-       "5         1316  Gentoo penguin (Pygoscelis papua)  Biscoe              49.1   \n",
-       "6         1313  Gentoo penguin (Pygoscelis papua)  Biscoe              45.5   \n",
-       "7         1381  Gentoo penguin (Pygoscelis papua)  Biscoe              47.6   \n",
-       "8         1377  Gentoo penguin (Pygoscelis papua)  Biscoe              45.1   \n",
-       "9         1380  Gentoo penguin (Pygoscelis papua)  Biscoe              45.1   \n",
-       "10        1257  Gentoo penguin (Pygoscelis papua)  Biscoe              46.2   \n",
-       "11        1336  Gentoo penguin (Pygoscelis papua)  Biscoe              46.5   \n",
-       "12        1237  Gentoo penguin (Pygoscelis papua)  Biscoe              43.2   \n",
-       "13        1302  Gentoo penguin (Pygoscelis papua)  Biscoe              48.5   \n",
-       "14        1325  Gentoo penguin (Pygoscelis papua)  Biscoe              49.1   \n",
-       "15        1285  Gentoo penguin (Pygoscelis papua)  Biscoe              47.5   \n",
-       "16        1242  Gentoo penguin (Pygoscelis papua)  Biscoe              49.6   \n",
-       "17        1246  Gentoo penguin (Pygoscelis papua)  Biscoe              47.7   \n",
-       "18        1320  Gentoo penguin (Pygoscelis papua)  Biscoe              45.5   \n",
-       "19        1244  Gentoo penguin (Pygoscelis papua)  Biscoe              46.4   \n",
-       "20        1390  Gentoo penguin (Pygoscelis papua)  Biscoe              50.7   \n",
-       "21        1379  Gentoo penguin (Pygoscelis papua)  Biscoe              47.8   \n",
-       "22        1267  Gentoo penguin (Pygoscelis papua)  Biscoe              50.1   \n",
-       "23        1389  Gentoo penguin (Pygoscelis papua)  Biscoe              47.2   \n",
-       "24        1269  Gentoo penguin (Pygoscelis papua)  Biscoe              49.6   \n",
-       "\n",
-       "    culmen_depth_mm  flipper_length_mm  body_mass_g     sex  \n",
-       "0              <NA>               <NA>         <NA>    <NA>  \n",
-       "1              13.5              210.0       4150.0  FEMALE  \n",
-       "2              13.5              210.0       4550.0  FEMALE  \n",
-       "3              14.0              208.0       4575.0  FEMALE  \n",
-       "4              14.0              212.0       4875.0  FEMALE  \n",
-       "5              14.5              212.0       4625.0  FEMALE  \n",
-       "6              14.5              212.0       4750.0  FEMALE  \n",
-       "7              14.5              215.0       5400.0    MALE  \n",
-       "8              14.5              207.0       5050.0  FEMALE  \n",
-       "9              14.5              215.0       5000.0  FEMALE  \n",
-       "10             14.5              209.0       4800.0  FEMALE  \n",
-       "11             14.5              213.0       4400.0  FEMALE  \n",
-       "12             14.5              208.0       4450.0  FEMALE  \n",
-       "13             15.0              219.0       4850.0  FEMALE  \n",
-       "14             15.0              228.0       5500.0    MALE  \n",
-       "15             15.0              218.0       4950.0  FEMALE  \n",
-       "16             15.0              216.0       4750.0    MALE  \n",
-       "17             15.0              216.0       4750.0  FEMALE  \n",
-       "18             15.0              220.0       5000.0    MALE  \n",
-       "19             15.0              216.0       4700.0  FEMALE  \n",
-       "20             15.0              223.0       5550.0    MALE  \n",
-       "21             15.0              215.0       5650.0    MALE  \n",
-       "22             15.0              225.0       5000.0    MALE  \n",
-       "23             15.5              215.0       4975.0  FEMALE  \n",
-       "24             16.0              225.0       5700.0    MALE  \n",
-       "...\n",
-       "\n",
-       "[347 rows x 8 columns]"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import bigframes.pandas\n",
-    "\n",
-    "df = bigframes.pandas.read_gbq(\"bigframes-dev.bqml_tutorial.penguins\")\n",
-    "df"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "359524c4",
-   "metadata": {},
-   "source": [
-    "First we note that while we have a default numbered index generated by BigQuery, actually the penguins are uniquely identified by their tags.\n",
-    "\n",
-    "Lets make the data a bit friendlier to work with by setting the tag number column as the index."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "93d01411",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>species</th>\n",
-       "      <th>island</th>\n",
-       "      <th>culmen_length_mm</th>\n",
-       "      <th>culmen_depth_mm</th>\n",
-       "      <th>flipper_length_mm</th>\n",
-       "      <th>body_mass_g</th>\n",
-       "      <th>sex</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>tag_number</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>1225</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1278</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>13.5</td>\n",
-       "      <td>210.0</td>\n",
-       "      <td>4150.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1275</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.5</td>\n",
-       "      <td>13.5</td>\n",
-       "      <td>210.0</td>\n",
-       "      <td>4550.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1233</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>43.3</td>\n",
-       "      <td>14.0</td>\n",
-       "      <td>208.0</td>\n",
-       "      <td>4575.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1311</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>47.5</td>\n",
-       "      <td>14.0</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>4875.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1316</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>49.1</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>4625.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1313</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.5</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>4750.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1381</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>47.6</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>215.0</td>\n",
-       "      <td>5400.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1377</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.1</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>207.0</td>\n",
-       "      <td>5050.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1380</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.1</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>215.0</td>\n",
-       "      <td>5000.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1257</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.2</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>209.0</td>\n",
-       "      <td>4800.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1336</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.5</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>213.0</td>\n",
-       "      <td>4400.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1237</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>43.2</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>208.0</td>\n",
-       "      <td>4450.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1302</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>48.5</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>219.0</td>\n",
-       "      <td>4850.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1325</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>49.1</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>228.0</td>\n",
-       "      <td>5500.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1285</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>47.5</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>218.0</td>\n",
-       "      <td>4950.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1242</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>49.6</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>216.0</td>\n",
-       "      <td>4750.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1246</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>47.7</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>216.0</td>\n",
-       "      <td>4750.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1320</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.5</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>220.0</td>\n",
-       "      <td>5000.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1244</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.4</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>216.0</td>\n",
-       "      <td>4700.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>[347 rows x 7 columns in total]"
-      ],
-      "text/plain": [
-       "                                      species  island  culmen_length_mm  \\\n",
-       "tag_number                                                                \n",
-       "1225        Gentoo penguin (Pygoscelis papua)  Biscoe              <NA>   \n",
-       "1278        Gentoo penguin (Pygoscelis papua)  Biscoe              42.0   \n",
-       "1275        Gentoo penguin (Pygoscelis papua)  Biscoe              46.5   \n",
-       "1233        Gentoo penguin (Pygoscelis papua)  Biscoe              43.3   \n",
-       "1311        Gentoo penguin (Pygoscelis papua)  Biscoe              47.5   \n",
-       "1316        Gentoo penguin (Pygoscelis papua)  Biscoe              49.1   \n",
-       "1313        Gentoo penguin (Pygoscelis papua)  Biscoe              45.5   \n",
-       "1381        Gentoo penguin (Pygoscelis papua)  Biscoe              47.6   \n",
-       "1377        Gentoo penguin (Pygoscelis papua)  Biscoe              45.1   \n",
-       "1380        Gentoo penguin (Pygoscelis papua)  Biscoe              45.1   \n",
-       "1257        Gentoo penguin (Pygoscelis papua)  Biscoe              46.2   \n",
-       "1336        Gentoo penguin (Pygoscelis papua)  Biscoe              46.5   \n",
-       "1237        Gentoo penguin (Pygoscelis papua)  Biscoe              43.2   \n",
-       "1302        Gentoo penguin (Pygoscelis papua)  Biscoe              48.5   \n",
-       "1325        Gentoo penguin (Pygoscelis papua)  Biscoe              49.1   \n",
-       "1285        Gentoo penguin (Pygoscelis papua)  Biscoe              47.5   \n",
-       "1242        Gentoo penguin (Pygoscelis papua)  Biscoe              49.6   \n",
-       "1246        Gentoo penguin (Pygoscelis papua)  Biscoe              47.7   \n",
-       "1320        Gentoo penguin (Pygoscelis papua)  Biscoe              45.5   \n",
-       "1244        Gentoo penguin (Pygoscelis papua)  Biscoe              46.4   \n",
-       "1390        Gentoo penguin (Pygoscelis papua)  Biscoe              50.7   \n",
-       "1379        Gentoo penguin (Pygoscelis papua)  Biscoe              47.8   \n",
-       "1267        Gentoo penguin (Pygoscelis papua)  Biscoe              50.1   \n",
-       "1389        Gentoo penguin (Pygoscelis papua)  Biscoe              47.2   \n",
-       "1269        Gentoo penguin (Pygoscelis papua)  Biscoe              49.6   \n",
-       "\n",
-       "            culmen_depth_mm  flipper_length_mm  body_mass_g     sex  \n",
-       "tag_number                                                           \n",
-       "1225                   <NA>               <NA>         <NA>    <NA>  \n",
-       "1278                   13.5              210.0       4150.0  FEMALE  \n",
-       "1275                   13.5              210.0       4550.0  FEMALE  \n",
-       "1233                   14.0              208.0       4575.0  FEMALE  \n",
-       "1311                   14.0              212.0       4875.0  FEMALE  \n",
-       "1316                   14.5              212.0       4625.0  FEMALE  \n",
-       "1313                   14.5              212.0       4750.0  FEMALE  \n",
-       "1381                   14.5              215.0       5400.0    MALE  \n",
-       "1377                   14.5              207.0       5050.0  FEMALE  \n",
-       "1380                   14.5              215.0       5000.0  FEMALE  \n",
-       "1257                   14.5              209.0       4800.0  FEMALE  \n",
-       "1336                   14.5              213.0       4400.0  FEMALE  \n",
-       "1237                   14.5              208.0       4450.0  FEMALE  \n",
-       "1302                   15.0              219.0       4850.0  FEMALE  \n",
-       "1325                   15.0              228.0       5500.0    MALE  \n",
-       "1285                   15.0              218.0       4950.0  FEMALE  \n",
-       "1242                   15.0              216.0       4750.0    MALE  \n",
-       "1246                   15.0              216.0       4750.0  FEMALE  \n",
-       "1320                   15.0              220.0       5000.0    MALE  \n",
-       "1244                   15.0              216.0       4700.0  FEMALE  \n",
-       "1390                   15.0              223.0       5550.0    MALE  \n",
-       "1379                   15.0              215.0       5650.0    MALE  \n",
-       "1267                   15.0              225.0       5000.0    MALE  \n",
-       "1389                   15.5              215.0       4975.0  FEMALE  \n",
-       "1269                   16.0              225.0       5700.0    MALE  \n",
-       "...\n",
-       "\n",
-       "[347 rows x 7 columns]"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = df.set_index(\"tag_number\")\n",
-    "df"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "f95fda6a",
-   "metadata": {},
-   "source": [
-    "We saw in the first view that there were some missing values. We're especially interested in observations that are missing just the body_mass_g, so lets look at those:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "941cb6c3-8c54-42ce-a945-4fa604176b2e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>species</th>\n",
-       "      <th>island</th>\n",
-       "      <th>culmen_length_mm</th>\n",
-       "      <th>culmen_depth_mm</th>\n",
-       "      <th>flipper_length_mm</th>\n",
-       "      <th>body_mass_g</th>\n",
-       "      <th>sex</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>tag_number</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>1225</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1393</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1524</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>41.6</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>204.0</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1523</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>38.0</td>\n",
-       "      <td>17.5</td>\n",
-       "      <td>194.0</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1525</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>36.3</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>194.0</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>[5 rows x 7 columns in total]"
-      ],
-      "text/plain": [
-       "                                        species     island  culmen_length_mm  \\\n",
-       "tag_number                                                                     \n",
-       "1225          Gentoo penguin (Pygoscelis papua)     Biscoe              <NA>   \n",
-       "1393        Adelie Penguin (Pygoscelis adeliae)  Torgersen              <NA>   \n",
-       "1524        Adelie Penguin (Pygoscelis adeliae)      Dream              41.6   \n",
-       "1523        Adelie Penguin (Pygoscelis adeliae)      Dream              38.0   \n",
-       "1525        Adelie Penguin (Pygoscelis adeliae)      Dream              36.3   \n",
-       "\n",
-       "            culmen_depth_mm  flipper_length_mm  body_mass_g     sex  \n",
-       "tag_number                                                           \n",
-       "1225                   <NA>               <NA>         <NA>    <NA>  \n",
-       "1393                   <NA>               <NA>         <NA>    <NA>  \n",
-       "1524                   20.0              204.0         <NA>    MALE  \n",
-       "1523                   17.5              194.0         <NA>  FEMALE  \n",
-       "1525                   18.5              194.0         <NA>    MALE  \n",
-       "\n",
-       "[5 rows x 7 columns]"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df[df.body_mass_g.isnull()]"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "a70c2027",
-   "metadata": {},
-   "source": [
-    "Here we see three Adelie penguins with tag numbers 1523, 1524, 1525 are missing their body_mass_g but have the other measurements. These are the ones we need to guess. We can do this by training a statistical model on the measurements that we do have, and then using it to predict the missing values.\n",
-    "\n",
-    "Our conservationists warned us that trying to generalize across species is a bad idea, so for now lets just try building a model for Adelie penguins. We can revisit it later and see if including the other observations improves the model performance."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "93ff013a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>species</th>\n",
-       "      <th>island</th>\n",
-       "      <th>culmen_length_mm</th>\n",
-       "      <th>culmen_depth_mm</th>\n",
-       "      <th>flipper_length_mm</th>\n",
-       "      <th>body_mass_g</th>\n",
-       "      <th>sex</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>tag_number</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>1172</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>32.1</td>\n",
-       "      <td>15.5</td>\n",
-       "      <td>188.0</td>\n",
-       "      <td>3050.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1371</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>37.7</td>\n",
-       "      <td>16.0</td>\n",
-       "      <td>183.0</td>\n",
-       "      <td>3075.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1417</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>38.6</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>188.0</td>\n",
-       "      <td>2900.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1204</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.7</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>3725.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1251</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>37.6</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3600.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1422</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>35.7</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>189.0</td>\n",
-       "      <td>3350.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1394</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>176.0</td>\n",
-       "      <td>3450.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1163</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>36.4</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>195.0</td>\n",
-       "      <td>3325.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1329</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>38.1</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>181.0</td>\n",
-       "      <td>3175.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1406</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>44.1</td>\n",
-       "      <td>18.0</td>\n",
-       "      <td>210.0</td>\n",
-       "      <td>4000.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1196</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>36.5</td>\n",
-       "      <td>18.0</td>\n",
-       "      <td>182.0</td>\n",
-       "      <td>3150.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1228</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>41.6</td>\n",
-       "      <td>18.0</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>3950.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1412</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>40.3</td>\n",
-       "      <td>18.0</td>\n",
-       "      <td>195.0</td>\n",
-       "      <td>3250.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1142</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>35.7</td>\n",
-       "      <td>18.0</td>\n",
-       "      <td>202.0</td>\n",
-       "      <td>3550.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1430</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>33.5</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>3600.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1333</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>43.2</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>197.0</td>\n",
-       "      <td>4775.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1414</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>38.7</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>195.0</td>\n",
-       "      <td>3450.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1197</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>41.1</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>182.0</td>\n",
-       "      <td>3425.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1443</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>40.6</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>199.0</td>\n",
-       "      <td>4000.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1295</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>41.0</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>203.0</td>\n",
-       "      <td>4725.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>[146 rows x 7 columns in total]"
-      ],
-      "text/plain": [
-       "                                        species     island  culmen_length_mm  \\\n",
-       "tag_number                                                                     \n",
-       "1172        Adelie Penguin (Pygoscelis adeliae)      Dream              32.1   \n",
-       "1371        Adelie Penguin (Pygoscelis adeliae)     Biscoe              37.7   \n",
-       "1417        Adelie Penguin (Pygoscelis adeliae)  Torgersen              38.6   \n",
-       "1204        Adelie Penguin (Pygoscelis adeliae)      Dream              40.7   \n",
-       "1251        Adelie Penguin (Pygoscelis adeliae)     Biscoe              37.6   \n",
-       "1422        Adelie Penguin (Pygoscelis adeliae)  Torgersen              35.7   \n",
-       "1394        Adelie Penguin (Pygoscelis adeliae)  Torgersen              40.2   \n",
-       "1163        Adelie Penguin (Pygoscelis adeliae)      Dream              36.4   \n",
-       "1329        Adelie Penguin (Pygoscelis adeliae)     Biscoe              38.1   \n",
-       "1406        Adelie Penguin (Pygoscelis adeliae)  Torgersen              44.1   \n",
-       "1196        Adelie Penguin (Pygoscelis adeliae)      Dream              36.5   \n",
-       "1228        Adelie Penguin (Pygoscelis adeliae)     Biscoe              41.6   \n",
-       "1412        Adelie Penguin (Pygoscelis adeliae)  Torgersen              40.3   \n",
-       "1142        Adelie Penguin (Pygoscelis adeliae)      Dream              35.7   \n",
-       "1430        Adelie Penguin (Pygoscelis adeliae)  Torgersen              33.5   \n",
-       "1333        Adelie Penguin (Pygoscelis adeliae)     Biscoe              43.2   \n",
-       "1414        Adelie Penguin (Pygoscelis adeliae)  Torgersen              38.7   \n",
-       "1197        Adelie Penguin (Pygoscelis adeliae)      Dream              41.1   \n",
-       "1443        Adelie Penguin (Pygoscelis adeliae)  Torgersen              40.6   \n",
-       "1295        Adelie Penguin (Pygoscelis adeliae)     Biscoe              41.0   \n",
-       "1207        Adelie Penguin (Pygoscelis adeliae)      Dream              38.8   \n",
-       "1349        Adelie Penguin (Pygoscelis adeliae)     Biscoe              38.2   \n",
-       "1350        Adelie Penguin (Pygoscelis adeliae)     Biscoe              37.8   \n",
-       "1351        Adelie Penguin (Pygoscelis adeliae)     Biscoe              38.1   \n",
-       "1116        Adelie Penguin (Pygoscelis adeliae)      Dream              37.0   \n",
-       "\n",
-       "            culmen_depth_mm  flipper_length_mm  body_mass_g     sex  \n",
-       "tag_number                                                           \n",
-       "1172                   15.5              188.0       3050.0  FEMALE  \n",
-       "1371                   16.0              183.0       3075.0  FEMALE  \n",
-       "1417                   17.0              188.0       2900.0  FEMALE  \n",
-       "1204                   17.0              190.0       3725.0    MALE  \n",
-       "1251                   17.0              185.0       3600.0  FEMALE  \n",
-       "1422                   17.0              189.0       3350.0  FEMALE  \n",
-       "1394                   17.0              176.0       3450.0  FEMALE  \n",
-       "1163                   17.0              195.0       3325.0  FEMALE  \n",
-       "1329                   17.0              181.0       3175.0  FEMALE  \n",
-       "1406                   18.0              210.0       4000.0    MALE  \n",
-       "1196                   18.0              182.0       3150.0  FEMALE  \n",
-       "1228                   18.0              192.0       3950.0    MALE  \n",
-       "1412                   18.0              195.0       3250.0  FEMALE  \n",
-       "1142                   18.0              202.0       3550.0  FEMALE  \n",
-       "1430                   19.0              190.0       3600.0  FEMALE  \n",
-       "1333                   19.0              197.0       4775.0    MALE  \n",
-       "1414                   19.0              195.0       3450.0  FEMALE  \n",
-       "1197                   19.0              182.0       3425.0    MALE  \n",
-       "1443                   19.0              199.0       4000.0    MALE  \n",
-       "1295                   20.0              203.0       4725.0    MALE  \n",
-       "1207                   20.0              190.0       3950.0    MALE  \n",
-       "1349                   20.0              190.0       3900.0    MALE  \n",
-       "1350                   20.0              190.0       4250.0    MALE  \n",
-       "1351                   16.5              198.0       3825.0  FEMALE  \n",
-       "1116                   16.5              185.0       3400.0  FEMALE  \n",
-       "...\n",
-       "\n",
-       "[146 rows x 7 columns]"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# get all the rows with adelie penguins\n",
-    "adelie_data = df[df.species == \"Adelie Penguin (Pygoscelis adeliae)\"]\n",
-    "\n",
-    "# separate out the rows that have a body mass measurement\n",
-    "training_data = adelie_data[adelie_data.body_mass_g.notnull()]\n",
-    "\n",
-    "# we noticed there were also some rows that were missing other values,\n",
-    "# lets remove these so they don't affect our results\n",
-    "training_data = training_data.dropna()\n",
-    "\n",
-    "# lets take a quick peek and make sure things look right:\n",
-    "training_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "d55a39f9",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "species              string[pyarrow]\n",
-       "island               string[pyarrow]\n",
-       "culmen_length_mm             Float64\n",
-       "culmen_depth_mm              Float64\n",
-       "flipper_length_mm            Float64\n",
-       "body_mass_g                  Float64\n",
-       "sex                  string[pyarrow]\n",
-       "dtype: object"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# we'll look at the schema too:\n",
-    "training_data.dtypes"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "59d374b5",
-   "metadata": {},
-   "source": [
-    "Great! Now lets configure a linear regression model to predict body mass from the other columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "18c4cecf",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "LinearRegression()"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import bigframes.ml.linear_model as ml\n",
-    "\n",
-    "model = ml.LinearRegression()\n",
-    "model"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "6e54a1a2",
-   "metadata": {},
-   "source": [
-    "As in SKLearn, an unfitted model object is just a bundle of parameters."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "a2060cf1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'fit_intercept': True}"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# lets view the parameters\n",
-    "model.get_params()"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "8e25fe41",
-   "metadata": {},
-   "source": [
-    "For this task, really all the default options are fine. But just so we can see how configuration works, lets specify that we want to use gradient descent to find the solution:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "327e2232",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "LinearRegression()"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.optimize_strategy = \"BATCH_GRADIENT_DESCENT\"\n",
-    "model"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "2c2e0835",
-   "metadata": {},
-   "source": [
-    "BigQuery models provide a couple of extra conveniences:\n",
-    "\n",
-    "1. By default, they will automatically perform feature engineering on the inputs - encoding our string columns and scaling our numeric columns.\n",
-    "2. By default, they will also automatically manage the test/training data split for us.\n",
-    "\n",
-    "So all we need to do is hook our chosen feature and label columns into the model and call .fit()!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "085c9a99",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "LinearRegression()"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "X_train = training_data[['island', 'culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'sex']]\n",
-    "y_train = training_data[['body_mass_g']]\n",
-    "model.fit(X_train, y_train)\n",
-    "model"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "9e76e10c",
-   "metadata": {},
-   "source": [
-    "...and there, we've successfully trained a linear regressor model. Lets see how it performs, using the automatic data split:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "c9458c02",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>mean_absolute_error</th>\n",
-       "      <th>mean_squared_error</th>\n",
-       "      <th>mean_squared_log_error</th>\n",
-       "      <th>median_absolute_error</th>\n",
-       "      <th>r2_score</th>\n",
-       "      <th>explained_variance</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>223.878763</td>\n",
-       "      <td>78553.601634</td>\n",
-       "      <td>0.005614</td>\n",
-       "      <td>181.330911</td>\n",
-       "      <td>0.623951</td>\n",
-       "      <td>0.623951</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>[1 rows x 6 columns in total]"
-      ],
-      "text/plain": [
-       "   mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-       "0           223.878763        78553.601634                0.005614   \n",
-       "\n",
-       "   median_absolute_error  r2_score  explained_variance  \n",
-       "0             181.330911  0.623951            0.623951  \n",
-       "\n",
-       "[1 rows x 6 columns]"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.score(X_train, y_train)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "f0b39603",
-   "metadata": {},
-   "source": [
-    "Great! The model seems useful, predicting 62% of the variance.\n",
-    "\n",
-    "We realize we made a mistake though - we're trying to predict mass using a linear model, mass will increase with the cube of the penguin's size, whereas our inputs are linear with size. Can we improve our model by cubing them?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "b94eddc7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'\\ndef cubify(penguin_df):\\n    penguin_df.culmen_length_mm = train_x.culmen_length_mm.pow(3)\\n    penguin_df.culmen_depth_mm = train_x.culmen_depth_mm.pow(3)\\n    penguin_df.flipper_length_mm = train_x.flipper_length_mm.pow(3)\\n\\ncubify(train_x)\\ntrain_x\\n'"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# SKIP THIS STEP (not yet work working in BigQuery DataFrame)\n",
-    "\n",
-    "# lets define a preprocessing step that adjust the linear measurements to use the cube\n",
-    "'''\n",
-    "def cubify(penguin_df):\n",
-    "    penguin_df.culmen_length_mm = X_train.culmen_length_mm.pow(3)\n",
-    "    penguin_df.culmen_depth_mm = X_train.culmen_depth_mm.pow(3)\n",
-    "    penguin_df.flipper_length_mm = X_train.flipper_length_mm.pow(3)\n",
-    "\n",
-    "cubify(X_train)\n",
-    "X_train\n",
-    "'''"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "1b0e3f02",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'\\nmodel.fit(train_x, train_y)\\nmodel.evaluate()\\n'"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# AS ABOVE, SKIP FOR NOW\n",
-    "'''\n",
-    "model.fit(X_train, y_train)\n",
-    "model.evaluate()\n",
-    "'''"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "45c5e755",
-   "metadata": {},
-   "source": [
-    "Now that we're satisfied with our model, lets see what it predicts for those Adelie penguins with no body mass measurement:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "f21ebc1f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>predicted_body_mass_g</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>tag_number</th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>1393</th>\n",
-       "      <td>3459.735118</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1524</th>\n",
-       "      <td>4304.175638</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1523</th>\n",
-       "      <td>3471.668379</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1525</th>\n",
-       "      <td>3947.881639</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>[4 rows x 1 columns in total]"
-      ],
-      "text/plain": [
-       "            predicted_body_mass_g\n",
-       "tag_number                       \n",
-       "1393                  3459.735118\n",
-       "1524                  4304.175638\n",
-       "1523                  3471.668379\n",
-       "1525                  3947.881639\n",
-       "\n",
-       "[4 rows x 1 columns]"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Lets predict the missing observations\n",
-    "missing_body_mass = adelie_data[adelie_data.body_mass_g.isnull()]\n",
-    "\n",
-    "model.predict(missing_body_mass)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "e66bd0b0",
-   "metadata": {},
-   "source": [
-    "Because we created it without a name, it was just a temporary model that will disappear after 24 hours. \n",
-    "\n",
-    "We decide that this approach is promising, so lets tell BigQuery to save it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "c508691b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "LinearRegression()"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.to_gbq(\"bqml_tutorial.penguins_model\", replace=True)\n",
-    "model"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "46abef08",
-   "metadata": {},
-   "source": [
-    "We can now use this model anywhere in BigQuery with this name. We can also load\n",
-    "it again in our BigQuery DataFrames session and evaluate or inference it without\n",
-    "needing to retrain it:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "0c87e972",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "LinearRegression()"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model = bigframes.pandas.read_gbq_model(\"bqml_tutorial.penguins_model\")\n",
-    "model"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "d6ab8def",
-   "metadata": {},
-   "source": [
-    "And of course we can retrain it if we like. Lets make another version that is based on all the penguins, so we can test that assumption we made at the beginning that it would be best to separate them:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "f4960452",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>mean_absolute_error</th>\n",
-       "      <th>mean_squared_error</th>\n",
-       "      <th>mean_squared_log_error</th>\n",
-       "      <th>median_absolute_error</th>\n",
-       "      <th>r2_score</th>\n",
-       "      <th>explained_variance</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>224.717433</td>\n",
-       "      <td>79527.879623</td>\n",
-       "      <td>0.005693</td>\n",
-       "      <td>169.235869</td>\n",
-       "      <td>0.619287</td>\n",
-       "      <td>0.619287</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>[1 rows x 6 columns in total]"
-      ],
-      "text/plain": [
-       "   mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-       "0           224.717433        79527.879623                0.005693   \n",
-       "\n",
-       "   median_absolute_error  r2_score  explained_variance  \n",
-       "0             169.235869  0.619287            0.619287  \n",
-       "\n",
-       "[1 rows x 6 columns]"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# This time we'll take all the training data, for all species\n",
-    "training_data = df[df.body_mass_g.notnull()]\n",
-    "training_data = training_data.dropna()\n",
-    "\n",
-    "# And we'll include species in our features\n",
-    "X_train = training_data[['species', 'island', 'culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'sex']]\n",
-    "y_train = training_data[['body_mass_g']]\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# And we'll evaluate it on the Adelie penguins only\n",
-    "adelie_data = training_data[training_data.species == \"Adelie Penguin (Pygoscelis adeliae)\"]\n",
-    "X_test = adelie_data[['species', 'island', 'culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'sex']]\n",
-    "y_test = adelie_data[['body_mass_g']]\n",
-    "model.score(X_test, y_test)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "7d101140",
-   "metadata": {},
-   "source": [
-    "It looks like the conservationists were right! Including other species, even though it gave us more training data, worsened prediction on the Adelie penguins."
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "7f3fe50d",
-   "metadata": {},
-   "source": [
-    "===============================================\n",
-    "\n",
-    "**Everything below this line not yet implemented**"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "62577c72",
-   "metadata": {},
-   "source": [
-    "We want to productionalize this model, so lets start publishing it to the vertex model registry ([prerequisites](https://cloud.google.com/bigquery-ml/docs/managing-models-vertex#prerequisites))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b82e79ee",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model.publish(\n",
-    "    registry=\"vertex_ai\",\n",
-    "    vertex_ai_model_version_aliases=[\"experimental\"])"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "69d2482c",
-   "metadata": {},
-   "source": [
-    "Now when we fit the model, we can see it published here: https://console.cloud.google.com/vertex-ai/models"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "b97d9b64",
-   "metadata": {},
-   "source": [
-    "# Custom feature engineering"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "c837ace9",
-   "metadata": {},
-   "source": [
-    "So far, we've relied on BigQuery to do our feature engineering for us. What if we want to do it manually?\n",
-    "\n",
-    "BigQuery DataFrames provides a way to do this using Pipelines."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "480cb12f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from bigframes.ml.pipeline import Pipeline\n",
-    "from bigframes.ml.preprocessing import StandardScaler\n",
-    "\n",
-    "pipe = Pipeline([\n",
-    "    ('scaler', StandardScaler()),\n",
-    "    ('linreg', LinearRegression())\n",
-    "])\n",
-    "\n",
-    "pipe.fit(X_train, y_train)\n",
-    "pipe.evaluate()"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "9a0e7d19",
-   "metadata": {},
-   "source": [
-    "We then can then save the entire pipeline to BigQuery, BigQuery will save this as a single model, with the pre-processing steps embedded in the TRANSFORM property:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0d1831ed",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pipe.to_gbq(\"bqml_tutorial.penguins_pipeline\")"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "f6b60898",
-   "metadata": {},
-   "source": [
-    "# Custom data split"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "60ac0174",
-   "metadata": {},
-   "source": [
-    "BigQuery has also managed splitting out our training data. What if we want to do this manually?\n",
-    "\n",
-    "*TODO: Write this section*"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.9"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "a850322d07d9bdc9ec5f301d307e048bcab2390ae395e1cbce9335f4e081e5e2"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/experimental/multimodal_dataframe.ipynb b/notebooks/experimental/multimodal_dataframe.ipynb
new file mode 100644
index 0000000000..9c76654a53
--- /dev/null
+++ b/notebooks/experimental/multimodal_dataframe.ipynb
@@ -0,0 +1,1078 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Copyright 2025 Google LLC\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YOrUAvz6DMw-"
+      },
+      "source": [
+        "# BigFrames Multimodal DataFrame\n",
+        "\n",
+        "<table align=\"left\">\n",
+        "\n",
+        "  <td>\n",
+        "    <a href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/multimodal_dataframe.ipynb\">\n",
+        "      <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"> Run in Colab\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/multimodal_dataframe.ipynb\">\n",
+        "      <img src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" alt=\"GitHub logo\">\n",
+        "      View on GitHub\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/multimodal_dataframe.ipynb\">\n",
+        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+        "      Open in BQ Studio\n",
+        "    </a>\n",
+        "  </td>\n",
+        "</table>\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "This notebook is introducing BigFrames experimental Multimodal features:\n",
+        "1. Create Multimodal DataFrame\n",
+        "2. Combine unstructured data with structured data\n",
+        "3. Conduct image transformations\n",
+        "4. Use LLM models to ask questions and generate embeddings on images\n",
+        "5. PDF chunking function"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PEAJQQ6AFg-n"
+      },
+      "source": [
+        "### Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "bGyhLnfEeB0X",
+        "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/garrettwu/src/bigframes/bigframes/_config/experiment_options.py:68: PreviewWarning: BigFrames Blob is still under experiments. It may not work and subject\n",
+            "to change in the future.\n",
+            "  warnings.warn(msg, category=bfe.PreviewWarning)\n"
+          ]
+        }
+      ],
+      "source": [
+        "PROJECT = \"bigframes-dev\" # replace with your project, project needs to be allowlisted go/bq-multimodal-allowlist (internal)\n",
+        "# User must have https://cloud.google.com/bigquery/docs/use-bigquery-dataframes#permissions to use bigframes, BQ connection admin/user to create/use connections, BQ ObjRef permissions for ObjectRef and BQ routines permissions for using transform functions.\n",
+        "# Or simply has BQ Admin role for all.\n",
+        "\n",
+        "import bigframes\n",
+        "# Setup project\n",
+        "bigframes.options.bigquery.project = PROJECT\n",
+        "# Flag to enable the feature\n",
+        "bigframes.options.experiments.blob = True\n",
+        "\n",
+        "bigframes.options.display.progress_bar = None\n",
+        "\n",
+        "import bigframes.pandas as bpd"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ifKOq7VZGtZy"
+      },
+      "source": [
+        "### 1. Create Multimodal DataFrame\n",
+        "There are several ways to create Multimodal DataFrame. The easiest way is from the wiledcard paths."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "fx6YcZJbeYru",
+        "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/global_session.py:114: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
+            "  return func(get_global_session(), *args, **kwargs)\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Create blob columns from wildcard path.\n",
+        "df_image = bpd.from_glob_path(\n",
+        "    \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\n",
+        ")\n",
+        "# Other ways are: from string uri column\n",
+        "# df = bpd.DataFrame({\"uri\": [\"gs://<my_bucket>/<my_file_0>\", \"gs://<my_bucket>/<my_file_1>\"]})\n",
+        "# df[\"blob_col\"] = df[\"uri\"].str.to_blob()\n",
+        "\n",
+        "# From an existing object table\n",
+        "# df = bpd.read_gbq_object_table(\"<my_object_table>\", name=\"blob_col\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 487
+        },
+        "id": "HhCb8jRsLe9B",
+        "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>image</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202140Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492698095241&X-Goog-Signature=5a8a6d7985619afec026cf5d74add8eb1cf3caf543d9cb14fa44c09fa862067aaac677fae3b188ec1c871f3904fcc64962aef982c73bc6b7e76e414d17c245c7367a5d26885ae2b1079e52b550ecdde604bc6877e80ac15e62aeda668a7ea653ac57f3e7573f4f38a0eb0df60e3c285e5af09f1514af022a23fe41a4484f5d1bcbbde5e4bf1e149aca83fe1b05cc3e1267bd38ed910e63e3c1345285050a382f4f53c2cbbbd49a9784a602a3952669616ab932284439525489020d498ff25134eaa37a14dccc9ebeff0d49727e4d598eaa5ca37023d39ac1d13cb8b0fcc968fc7b40b8a65132bb3796abdf4565b277f33934fdad032d3e7467e056d621ac5e9b\"></td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-guinea-pig-hay.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202140Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492696656039&X-Goog-Signature=7b4bca2a5310e1a8f1eb16b3ce90ffa27064ca88c8c7a465c0401c71473031d9c05830d53cf4f3220183ebbdd0bc6d9fd79865aeb6451ef7a54bd7bd10800b9cbdc8bb539cd179b83fc32ea75059b3f2560ba1289debe253f5230ac24c0fed436af0ec6d43958d2da8b238383db7469ce4d65e334128995b74763625d5f9c8876ed1ad9de8ca647f6f39fa6038464e0fe065a738357f282ee957b0423e044f4bf947960b2fe3579ac6dbf0e059c7381b7f21e581fefa5772b2a7d4c3d369711e439e2e7a6736fbaa6a410d8137bc72e3909cc3b651409df4f72396d2931b7432d8a16247f5e21cded60cd712201d2071735f9f1324ca4d0baef0ef5409d3010b\"></td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fplayful-pup-dog-bed.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202140Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492714436334&X-Goog-Signature=82f3a138b90f5467c6f70553ba632c5bf50e580273facf5286f14984a25388ba648e47d1267e0016dd1521ed040bcb86ea4142f2e5ba83deb28557ffbede0172be23e4e393d36ab80e78310a34a060c59303744cd98c0010db6f5ba9e365d9092c15b0fbab4ac6c46233f17e43d84a1d20b65d888e3e99ff4c5d429e6275df216413d7d3dab06b57887ffe390cc19202251f3042d2ce9201fa063c34c78d236fe0ff3065bd1344097ad6b0494cd6ebce68512bbf627d01dc68ec8bab378e7119ae83598d107546baebb1035bc44f6db7d5e608f9d5f2503f5042d8321c64dfdf50e081777f0cd7d41d880a95e45b1dd4544a1e3aeefa1e9f566bfde972a7203b\"></td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Faquaclear-aquarium-water-pump.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202140Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492685428968&X-Goog-Signature=b0ce578dfe11ad5c76e2d4e89d7c5261b928b02c5601597ebcba35541d8097d36b9ecf44d937c7f449ce0a3e30aeca724de159ad64ba324e74a9db55512b690c4ab40c93ee0eef1e0a7a6270c884482bc8918a14737678c59fab87ada3dffa4f97932cc30a01fbee480f2097349676e53978124acc1e9ec30cc9171c9b31dc905a3c039a880bd7f3e847a738109ce5414276e0a0195cccbe720332179583bedaad6a7ffa182799eb89199cc0fa2ab38d8f0dc26cfc00bc244594d2a66229ad84715ddd1daa5a6018922bbb05cfa23dea30b12c043e5b3f53d1c8d0b647539d9ae62e06bd039afe577f9aac0727aed947433013cc330d8a47b74b7e0686f1ee9e\"></td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-training-collar.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202140Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492705754869&X-Goog-Signature=23f7472442575eaa85355ffbce430023bb3c1126f5cb0622982e45cfbce194314d4c679e12a6b4aa3a38efde133997f4332e81c89cbf754c40872fbd0a99470eed5ef42560f88a1e9663d7d9d96a85247647b045bb03379a80f32ae79ec57eda9844d9170c365aab20f66793fe1b26ba10dfa02f70167a836338e33928d81575282225721a65c9d57a6fd52fec16adb920474e49129324bdcb8693800f1e8713881ad353cd1f63a7383be9e31e467bece1399af4bb7cef88891fa40dceb34736133cb96f616ff5cf217c2157dc626c98af748a760ef33d57f2a634aa64b72b22c23ad18fc82a0aee9cc967c245a2b15e1d00873d4e7db78a68f05746b26d0647\"></td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1 columns</p>\n",
+              "</div>[5 rows x 1 columns in total]"
+            ],
+            "text/plain": [
+              "                                               image\n",
+              "0  {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n",
+              "1  {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n",
+              "2  {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n",
+              "3  {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n",
+              "4  {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n",
+              "\n",
+              "[5 rows x 1 columns]"
+            ]
+          },
+          "execution_count": 3,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n",
+        "df_image = df_image.head(5)\n",
+        "df_image"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b6RRZb3qPi_T"
+      },
+      "source": [
+        "### 2. Combine unstructured data with structured data"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4YJCdmLtR-qu"
+      },
+      "source": [
+        "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "YYYVn7NDH0Me"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>image</th>\n",
+              "      <th>author</th>\n",
+              "      <th>content_type</th>\n",
+              "      <th>size</th>\n",
+              "      <th>updated</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202213Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492698095241&X-Goog-Signature=b51bffd73e6ba39ffbbcb3e53b0f9206f482e2ef4e2e25be3b2e8490a23052423538f8c6618befc4ff8c79cebbdf8743083bd8feb9bfe4a4c1158a29f8eb72131ce07c99356aa70e488044da97c64f77c625b117faf6c7e31b4c0ee268303aa000769a8797b90dc76999d36bae5731bbd0c98cbf6464b26d831ab858e37bf60ce005f9b710a91881fb132e634dec00a639938eb906b12651ead60e8f4b504ccbc1daa6ccce2b3ad773e98442d5cb3f898edf45a3555985d9bebba4ee95f86c2c8a90601e9bb1522f1fa400ec6d04f7e7fc34ee7dc75b4aac0c3da3706489d28f34f482d27c373ff8380aab729ddbec460810bcb6d39c4289c8a016e565bb592b\"></td>\n",
+              "      <td>alice</td>\n",
+              "      <td>image/png</td>\n",
+              "      <td>1489405</td>\n",
+              "      <td>2025-03-20 17:44:58+00:00</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-guinea-pig-hay.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202213Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492696656039&X-Goog-Signature=11a72de7d95a6e8132fe46c1e328a5d665d9b59667ce42e5caaf6e70a74fa9c7690a98c2809e6ee85c6290f05a2b5029f416fd1322920b846a05456aa9332b7ac3ff4d24b5be9a76e04e9f8940b57e464c73562f9ff67173b8e493b5a40f95c670ca1b89f1cbd029711aa1cb72fbb59447dcb4add95f3b93ee31e87afd86ffeab123f9469bfde9390291c2714e8273c718dbe659836f08a250d4e7ebe3430bdcfd70e45c25556ada261e2740e9c7a3da71bed2cb06a49856f68aa887fea66f5246ac5ffc56f3f22dd3245d3e19c771a0f484d0616cc5da08907b94e236c7b639ee5c28054619f2adb98cbac6346ffabf923122e257b18ea338eded2bd98cf65c\"></td>\n",
+              "      <td>bob</td>\n",
+              "      <td>image/png</td>\n",
+              "      <td>1538007</td>\n",
+              "      <td>2025-03-20 17:44:56+00:00</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fplayful-pup-dog-bed.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202213Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492714436334&X-Goog-Signature=a73f9e9e5da21f608113e68af446c8720a53aa34ffeee929e2a094425c4f3af4cbd41e0fed86f51e8d7f18ee17894c7a7829140f873564a3f4bd8d9da93ca67cd3ae8b89ed8bd1179e88f493a68020b0060555c4d4c3c56b08494ec9d4060eb738bd83e9275d193b551f90e5d8c52b68ff2102fa1c3a21fc60d3786ddf59358ccbce865fd7a67938b79b5ed5303b4b30f05f2a90c517253556cf0574464eb4391ae8f5339cdd76ad4260bb2875501adf3d25b21e6e223fb6e673d0ebbbd3faa83b4e48d3bffe2ebbfc6775bce46c0b675b358f5a8a2049558961a8ba2ff18bdba2a620e3dd38971a62b316ed1ec97e5b78c8b84a6aa529ab66b5d86f9c927549\"></td>\n",
+              "      <td>bob</td>\n",
+              "      <td>image/png</td>\n",
+              "      <td>1237551</td>\n",
+              "      <td>2025-03-20 17:45:14+00:00</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Faquaclear-aquarium-water-pump.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202213Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492685428968&X-Goog-Signature=4d08f0bb37a44f0e85323c426b764d3dfbc6684b65adc67ada605099f331232aef427333d2bafea8d048e983c6d8ddf1365e1403105cbd98fac806213a98fcbe00222cfe7bffb23e72104f0da98a1e40b88fe3419343551653aa532069238cabbf48399f369bd4cf820242ee15e27748cce8227adf9733688b79933614fdd3e061418513f0d15d8241c76afa32e0c6dcdb344e36d2db6e55479609458f3846ff73ec8112523f39a76a6fa16d97438ce706daa11c8aba162397d73aee5ff084e39b2c61a28ebbeb51734d5ee6289f26b63ce1f69e68d363081cf9a3c93aac9e166bc897fcd819806ccaec935dab6a872ff561c6372f0579461199ab4d3af2c06e\"></td>\n",
+              "      <td>alice</td>\n",
+              "      <td>image/png</td>\n",
+              "      <td>1040455</td>\n",
+              "      <td>2025-03-20 17:44:45+00:00</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-training-collar.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202213Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492705754869&X-Goog-Signature=6a5f5025b12a175ad8c23c5f35f7ff503202d97f35cb61544ca8b612192221841bc62ad8662c1fcb8401b0c1c18eea0b0b7dc33920dbb9af7d678bc24cbe402ca189ded44b67c2aa5cfd937ca5ed312b22a3ac2a429eee2e25a1793b76f2ad13dd8a710fb03ff4f0d79f6056e7a57659908e20cf8e5a1b02c04a1d63286850133b6148f8687c671d2545a905ae41a6a3299faa80aa844563585097288dc0afe160006a9cbbdbddd598416b65ccd4fe911c5f8e6784a8b818008ea66f6f2c70e46f9883399f93e2efda5799dc73f081aa3bd42d5bcb6921582346a492ad7c02f690e605c064e85237edff6c118d8e080e8c4c4f49257e04bdaba17e8049a8a4fc\"></td>\n",
+              "      <td>bob</td>\n",
+              "      <td>image/png</td>\n",
+              "      <td>1517938</td>\n",
+              "      <td>2025-03-20 17:45:05+00:00</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 5 columns</p>\n",
+              "</div>[5 rows x 5 columns in total]"
+            ],
+            "text/plain": [
+              "                                               image author content_type  \\\n",
+              "0  {'uri': 'gs://cloud-samples-data/bigquery/tuto...  alice    image/png   \n",
+              "1  {'uri': 'gs://cloud-samples-data/bigquery/tuto...    bob    image/png   \n",
+              "2  {'uri': 'gs://cloud-samples-data/bigquery/tuto...    bob    image/png   \n",
+              "3  {'uri': 'gs://cloud-samples-data/bigquery/tuto...  alice    image/png   \n",
+              "4  {'uri': 'gs://cloud-samples-data/bigquery/tuto...    bob    image/png   \n",
+              "\n",
+              "      size                    updated  \n",
+              "0  1489405  2025-03-20 17:44:58+00:00  \n",
+              "1  1538007  2025-03-20 17:44:56+00:00  \n",
+              "2  1237551  2025-03-20 17:45:14+00:00  \n",
+              "3  1040455  2025-03-20 17:44:45+00:00  \n",
+              "4  1517938  2025-03-20 17:45:05+00:00  \n",
+              "\n",
+              "[5 rows x 5 columns]"
+            ]
+          },
+          "execution_count": 4,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# Combine unstructured data with structured data\n",
+        "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"]  # type: ignore\n",
+        "df_image[\"content_type\"] = df_image[\"image\"].blob.content_type()\n",
+        "df_image[\"size\"] = df_image[\"image\"].blob.size()\n",
+        "df_image[\"updated\"] = df_image[\"image\"].blob.updated()\n",
+        "df_image"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NUd4Kog_QLRS"
+      },
+      "source": [
+        "Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 75
+        },
+        "id": "UGuAk9PNDRF3",
+        "outputId": "73feb33d-4a05-48fb-96e5-3c48c2a456f3"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202223Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=c95680b71489a38215a5e7fc0d8d055bce5a588cbc6def7b046efcaedc19e5ab8be083011d54757f2591a74efcbc025bae5efbc691ff99ee78678479419cd81ca77e31298efd8589d226f329821a2e3699d69154ab460df0224c2ac1e63c9c5f49f87f2d985c522ea4d00c606dafc3288f839434671f6a7295ef94a6664b9921a91e0598c5d19fc7119b7c76ba44705fae4ab62a8fdb66dadfb2ed5137931ab572f68841591a21a0c963857f81658b4d950a074fda33992ac40a8f37ccb8a637697162b6893be0112fbb830dd53f23f8fc29bfa784eab08f1c7e509f85e4600b13932ddbcf1b2927781020ec6d8bdb855a1fecd6f1a3b531680975b45d024e40\"/>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.Image object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "<img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Faquaclear-aquarium-water-pump.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T202223Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=20e1fb26d8846560767b2bbf0b23783d533cb370a5b1db2ecfc6d01923d32f513c123a9b3e2e5db5868bf86d382bd8a06f8d9297e3cd9fbd878c6d40965b8e528c683e53019629cf88b5c6482d1de50c402df3d49fc3a931e0eb0540a07c8cb050e688b4f2fe91f58ab68c84695d303c1041a1a1c738d8cbb83357793438811a296122e2fb834e7b38a5393c26dde6de6def6f76b5b5667659d330898eb06fed0ac59dc526cacad7400af119ee0771ffa9e74d658574e296b62ae1cfb3aeff5bce1f04dcad730f2b5f32270675dc7c5fc66172294692a1c4d97e754c123b14508cc5e9aad859a8f6577c849f484482aba3cf1cf909efec7fbf2ea1e973a23956\"/>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.Image object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "# filter images and display, you can also display audio and video types\n",
+        "df_image[df_image[\"author\"] == \"alice\"][\"image\"].blob.display()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1IJuakwJTZey"
+      },
+      "source": [
+        "### 3. Conduct image transformations\n",
+        "BigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "VWsl5BBPJ6N7",
+        "outputId": "45d2356e-322b-4982-cfa7-42d034dc4344"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/log_adapter.py:164: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n",
+            "  return method(self, *args, **kwargs)\n",
+            "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/log_adapter.py:164: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n",
+            "  return method(self, *args, **kwargs)\n",
+            "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/log_adapter.py:164: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n",
+            "  return method(self, *args, **kwargs)\n"
+          ]
+        }
+      ],
+      "source": [
+        "df_image[\"blurred\"] = df_image[\"image\"].blob.image_blur(\n",
+        "    (20, 20), dst=\"gs://bigframes_blob_test/image_blur_transformed/\"\n",
+        ")\n",
+        "df_image[\"resized\"] = df_image[\"image\"].blob.image_resize(\n",
+        "    (300, 200), dst=\"gs://bigframes_blob_test/image_resize_transformed/\"\n",
+        ")\n",
+        "df_image[\"normalized\"] = df_image[\"image\"].blob.image_normalize(\n",
+        "    alpha=50.0,\n",
+        "    beta=150.0,\n",
+        "    norm_type=\"minmax\",\n",
+        "    dst=\"gs://bigframes_blob_test/image_normalize_transformed/\",\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "rWCAGC8w64vU",
+        "outputId": "d7d456f0-8b56-492c-fe1b-967e9664d813"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/log_adapter.py:164: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n",
+            "  return method(self, *args, **kwargs)\n"
+          ]
+        }
+      ],
+      "source": [
+        "# You can also chain functions together\n",
+        "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=\"gs://bigframes_blob_test/image_blur_resize_transformed/\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 605
+        },
+        "id": "6NGK6GYSU44B",
+        "outputId": "859101c1-2ee4-4f9a-e250-e8947127420a"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>image</th>\n",
+              "      <th>author</th>\n",
+              "      <th>content_type</th>\n",
+              "      <th>size</th>\n",
+              "      <th>updated</th>\n",
+              "      <th>blurred</th>\n",
+              "      <th>resized</th>\n",
+              "      <th>normalized</th>\n",
+              "      <th>blur_resized</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492698095241&X-Goog-Signature=a2425d79604ed4191da62bde9a9b776b7a7ec2a1c20b970451fe6dcf3e07231ee98bb8e8602e7260c586689b00ac8013f0f028118e8e7d87e1786600ac06c5c973ced75eeb66aadf06a22d464a262d0b8cf7f3f055e0769903a6122cb3596475cfc7a0775788d97eebaf01740cf900ed160da073ac7803546af1c87b34e1071503de60e32fd416357f5d6fad3afaf848f37af196de2aad33e5dc786c5bb3d0f36d4ffe46001647ff1d3905b9b4e784f3c46d921c850e3f34905aa95889c01ec06346a0fae364420953daea7ac78e0b15ad5ef9f01801bfa93c0fc3cccf585d6b3cc3313e38d8e9e3886f769a8bda6c13cf38866478892ed46950aecc903b25ee\"></td>\n",
+              "      <td>alice</td>\n",
+              "      <td>image/png</td>\n",
+              "      <td>1489405</td>\n",
+              "      <td>2025-03-20 17:44:58+00:00</td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744143915732873&X-Goog-Signature=af826124cc6f5b496b2bf7e1216d684dd80b44349254d2c7bce6340dd1ac874572695daf512f72613a4330fc5989e820791bc0e5f6bf7373a3dcd87e629b07c422c339da4feda0b9ac84139e8900eb2d552aa4a111662063199d77aed929b740adb76f0e58c629491ffd961fd52e790d5e7f5f4d05054b6434356e3860999022de13fc75a8f9086a2253330dd5b54a8db19dbc9fa409b1e6258dcf3905320a34303d239184320e4b0e522e761501c8da0e8c7ab7c47386b1066e834b2906a80208ffd0becd95bd35ba5b0488ef561716581d1e2b8dca7c5a69d06ff5470546ca00e83525551711daef6157625a8ada65ce2a2f10a7615ef2e99065db4d49258a\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144082666382&X-Goog-Signature=4467cfed551860784627d15c95d6cffdfd031c39809af34a20ccd2a5b8d7b466caf2d58b7a2c3adc70f8f4620e31980dae8d673c35b1552b0e10689ce76f6f4495d77f88f253dc4ae9f07a3e958de07506de3b10f7d412f21ce04d54bfd684bf3c1106fcf24413300bbc5691f0b453b48eab5dd9af649468fc829bda6c3d26024011db59f8880048d5fdfb1f68fd17f2f43a96b45443bfebb45223a4982abdf090413572461132e593fa4414a188cdffdee9d9c096b09f79483a45c2dcadbd9378c2be51eaa20602abd9ad0b9af33e8612b4db3b218432671871bd4b51741856d900d14cddca43981bb18c84ac46f90601a28b604432fe092598cad7ed6da334\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144274336821&X-Goog-Signature=0b8ca3a26375eb0b9308793e414c78f625ae5de98f4ead253c2e2f3be6996d4eec606a2ee9abdb9cee033fb38496eb1459e4c52e793dd96747412d96fcb00593ee963511c860d068274f26b126341558973e15f4ecaf2c85d6a97f0d2b7fd3c0479bf3bd0b578d7b9b4a27102d7ac6add8fc8fbf5e1db692279d099ef5fba851ea2ca0d86af535d365953d5bae262e37d8daa8464d418687c54f7db6e5b542ca6bbce57db1fb9612c2e19cb999389563754ccf97b15f7a0abdb1c2cdcc7696d5b3641247f23ac04471f70018f0b1f1b589f6259bede22624969fd83ca3cc8ed98701bd147b809201b4d0f79a7026292da4375e95e18189f73fe449f0f319abbd\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144451093049&X-Goog-Signature=50ff978f71c839a954049d0b4b950a1b6f0ef21b882469a6122b00b7d4f1dc9b7ae65e21815b3b2ea9f324071be5a481a40b3b29cba73f097e6ce6cf8faf86efffa9b5a536899abfb7ede9ed1a54be6dda10f5fbca1e66b4baad4d6c5bb7c48f1bdd9147e050e055089c9cc8c2c45ee701616b7f4cdd759cea0bcde7ea822ea22720c3bea21ac9c41abc43b9b90eff86d30603c5abb60a15d2b4f328948c50148090761000d51ee4c42de50a11e545c93cc45ab572c45148896029dc9ab250db1c6f9d08ed132008cfb7a88b28d7f68cd00f090e07f1211287c4f61508107d358324ed353e96ef282283432a34d9d1d94831be7365d81da0f5d78715b6e473ac\"></td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-guinea-pig-hay.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492696656039&X-Goog-Signature=2006d605dc0fc91842a3db1964dff613764f84ac7144a969d6769307f9c25a0d8daa2d8bc784354f442a8aefe8349b59ea0c5a5748c48adf719e0837811a273a94cd06510d4533d1e3ab1dce29576dae79a82af24051e4c78d54107ed84b15dc25aa82422438ed1f3aa7827d45b3c5928b4a49af31b37ccd1e820eda350de5a4b9b8b3075db4e349a525f15cc70c3a1bfa01a62004125a6c262a26a8070fc894544308de9fb99740e30d41495826d4a649be828b68030a0e31080d70bc9bccda0d4927d9232e41e9d733a008c514f508153ff8ec633b8bc478044e6309b8cd662e8e6571fab1ade8d26f103b4d9f44b874786cae116b662f246c3bae534d3876\"></td>\n",
+              "      <td>bob</td>\n",
+              "      <td>image/png</td>\n",
+              "      <td>1538007</td>\n",
+              "      <td>2025-03-20 17:44:56+00:00</td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-guinea-pig-hay.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744143914863441&X-Goog-Signature=058e26ad725a78a93b56da56b2eecdb4cd789483fe3967b36d81625cb0b389788e43a7cad134c6e65e200161e1c6546193cb0f5b631a4d346871ea767dc7559372a755e9900f0784777ccfee35b6f6a50fd2b28b959b8b49fd436cd1ed3a169c7609315af0fdf816690490ee43c2594d4ddbb8db19e506a3de550c141c3f84144bff3f22bdd83c6a325534a39e6c4715b3d8ff816093dd9e310422ac8f8a694153c55878a7950e8bc7b9890d74e264870853c4b576a3402b431394917a1435787567e993a8628152cf66b2d3f651e2731503f580229600f8e10ebaee7f988ca4c5fb4e5d75a14ed0b760c09300c3890911515b905d24ccf77daaee00cd11236d\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Ffluffy-buns-guinea-pig-hay.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144083843604&X-Goog-Signature=7a55ce1cc047ad9603c629f3bf39ee35f7e14d0e01b2f400957da37d3b34927a46b4281241a00c1534dee5da859f1f444e78ae6b177dfbd61cbd026b441cb031dce1cef699fc7b8f155eed042542ec0622662aed4f2f90d4637ac031e346f7fbde109aa02a26ebcccf6c8f4596f8dab962e2d5a42bd54d03410637b1a43e55ccead3884e0b4ef24544042dec01d41ea0a12800103882b0b554e450106a00cd2cf76147f624aac28bafa068868bac040dcd632290967e6d2e496ec3d601eb9fcc08e6ee2b9f5a675f38cbc75c3c9c555a2f3d240118cb061c9d6a8459b3b2e93a016d2de61cad650f14a6d81a66db289c03f88a19b3d617c2dbcfa8e8ef254646\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Ffluffy-buns-guinea-pig-hay.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144276092207&X-Goog-Signature=17e388956837b8ca182ee63f36df6af17affe29c4c2a60ab5377e835954aa65fcb6233ddd89b6ff045a50bed5315719e84ef95dfaa5e55db5f73e2a217d096654ca177660df77a2d86fa2ec2aa589dbea19a17efbbbcf6f1ff1d9203caef405c9a537974e2315c742edb6ca0adcb6ad6b7715defdfb5ec07ea60b4cb8ad07ebed185626cfb1e0e571a5bded390e9d8b461642a902db265b372d67b02d6065d677e0530cc6134d946a08c3d956dfd26bad4ee87fb4ecd03d448706b7678cefb918238ede4ccced8f95d03d6c5de0cca7f7afb670ec012e3e8e3b7b35ff61ef091cde47e8310f9965b4f3855cb12e5686000c492a526cea350661ba2e09fe7ca52\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Ffluffy-buns-guinea-pig-hay.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144451561103&X-Goog-Signature=7d7ea44bb7972fa535de143a1725d0e4b8d90ce90c655c75a200601af80fef1b1cce4e37ccc713be402114659579cf729f262fdad806ba3766efb8cd4d339b179b0285eb14a5f9ea9071000b47d8d8bac369d2b5de104b915855457c36d4f2064aee08ba404dd9b4a71f20041e8887ee0f27b98d84405ec040e130ca77dec1b3612e36310dd44af02a54f05410331c070867df16fb8af4c63caa5dc4e19c9076edac3163d0e46356c5fcf20af05487ba1c71fe46ecd5687957a2b2e2a4ce40563f4b84deb36819cb5a946845f8e619ba888234a13fc51cf1e2d988ecf1c10d1002fd5f430c01cb6c4ffdb3fbf8c1fef1b4c7c6496d673ab72534f73271a27661\"></td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fplayful-pup-dog-bed.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492714436334&X-Goog-Signature=2f02d012ef0170a22f63ae977b803471c8b21ac6e1bf10715a5e36aa60c454464a412592042a5eac9149f01e7c41fe16f8cd84ecf2bb54d16a810d968c81d11ab86956f9195bcca494225ba21f4e7cdf6033e021cc72d670aedb8cf6c68754b34ab162fd279e3bf78b68fd8c9b484ef2c3955c501cb18acdb4602f039d447d299a188c81568dfe159d2b4d401c91f78b1a6a17d77a88e832fb55267b2132288427245d2347e86a58cadddf6d090c5a52692de893d4173323e4733ff94ceba053edadf9f551a0e73ae3fb7326f1a650377ac7274ba2509a25b773597a1bb91a47aac73156d8f30ad1b9d5285e0ff29df7b75c0251f712dc44b55f1db9c2b979fa\"></td>\n",
+              "      <td>bob</td>\n",
+              "      <td>image/png</td>\n",
+              "      <td>1237551</td>\n",
+              "      <td>2025-03-20 17:45:14+00:00</td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fplayful-pup-dog-bed.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744143913263713&X-Goog-Signature=8dbb3ee5265d11096dbe96a869dd27f387d379ff8d9e275d0ac2b35d2fea2d6ab242f81e86725ead52c359ec2625b083e7546723f9cb0e0379eeae2fb06ec82d5e84b38da267f3dce7360b2358c11487d9d164c666d8b27f0092a6817453125f186076a522c48e45b7fc838f2cb4528392af03cf12dbb6d601f3b383ba91ba613958f095722a40b61ca7b91cca8019d9f3b7f6602e28f4095cd06ce57ded3dcac3ec2970d9016f6237e218bf249529b3b529d6d559c4811aeaea9f86552ab9543228fb258b54bc062e04ea0f7630288bb5b1cbab15551ece7b8e4b461b5a5608a66cd73ac652a0f7f746475a24395eba11e4d7cd737091b8919154eb7986efb0\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Fplayful-pup-dog-bed.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144086190022&X-Goog-Signature=a0e6b0d13ce46fd575e4f1ea3b5ce1d106ef526d8ba65d7bb6af1f8ccfbbb6cbba6c832925ef847a517fac1f17962e79d35f0118ffc8555fba83e8fd8de7c3265363fbd856f98871ed69a1f3c836ef6a9b66eb5f9c6c47bc1062e33f5a467860510e8624b72963723dc2405149fe328a102580711bdb842d78a5cd59b19f260963491ec5572918d74b7d4c5a721aa32f225af6e5ea88f4568c95b7637f020103c78529b5b65391d941ae81f9385dbcd93b45bbd9b696725106c9b1e7c8607a6a65765d6962df5f07f54f494898be017c8dfcaf360ba295570b87a98c60549fcc5e4cfb556c4f3f65b49561520b547f82caea243f820730cb4176eb2d2e24e5c4\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Fplayful-pup-dog-bed.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144276072133&X-Goog-Signature=0f43b6316025439f5e11e6c726b22030558ff25c098e58458ed1c53a35e53e86a866e5352b685c2f9ebc50650968a42f1d6d875eb5d766e9e84d29cb23f1031b8b9a8ea72c303cf1a2f86e83f0ec78c8fa43ffbf7cfdec4a6933dea77f19d88e07b2c20ba3a567faf371431f9a36829242781553f4503af727375d524813f00930923f808908042a32952c5448b1212155e9c3b91f9e78e874225f2e1f71bc55f96eb92cfc2a6974477114e431ceedca90ed66e612a399a695a3a26ae7fb8c8870211e98c546660b48c4663d849eef40bbf9b1a82840b73c6bd543fd5b531cce540c3dbf9af02a4250d7313f4f622994ca8bb6e523a10496f39be1450370959a\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Fplayful-pup-dog-bed.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144451960968&X-Goog-Signature=7b922bea34fae33d411d49ebd2c9a5769e746d3c55ba9de36be54b1d6bda6dac6a53c438f31cffe6a17ed4fc4f67df5146406abd8531d4a07e1d53742b4e3b690a89017ea2da5b28f3d7d1e115300bac8734ca57f6450c8515110220b63a1003a052a0ce18511f49f8ba7cb48c244f2d36d143854c50dee37bbafca1fc361471ced65bb3db8fe32c9689ae1350324138366291f5a72a5a8092c78ef49a521ec9cad4804ec2cbbf417c24fc56c557aa7383d734c18dff3b6c171410dbad084f0ff8acca7b879b85867933521ce5640c7c6817384baaca1e345c4c4aef56a4bdae3d634879c9cc5dd3b273a5e8d1ade970d3a0968f4d3fc7eadf32ef34cd6b9e69\"></td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Faquaclear-aquarium-water-pump.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492685428968&X-Goog-Signature=5a05fa6032ba7259b94803f951d6c107c950e9efe29f03541b797b1b3638a64eab454b1b0426d88d07417f543e16766721e382373b6dfe10b8bb8eb3c26949d1bc990febf35214ddada498dc1927f6e584651efa8cbf7e03e32fc7d6ee32bac33a059c8cc55223ea83dae89f5559ec62319b0b194c4ef615d54896c479ba8a33fa592ac36d76dbd98d72bc17c479d7a4447aebaae021a57006b49f2f6a3d4cc7cad955956d002f6b009bc43474c31f6a4e3dbf2270b2022519051f71909f8979a4acf56e99fb7fde30d29d131581d5d38fb39a4f2002c569ec6f2f2ed45af08676ebe56f559ad3b81c77aed32717fe25c3f27c9ffd2967761aa89c1b43c9e4cd\"></td>\n",
+              "      <td>alice</td>\n",
+              "      <td>image/png</td>\n",
+              "      <td>1040455</td>\n",
+              "      <td>2025-03-20 17:44:45+00:00</td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Faquaclear-aquarium-water-pump.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744143916457846&X-Goog-Signature=030b58ef4b9a98ff4217a844af85fbc6c93f45e76def06d9bb644e4aa0e476370fa531847d4001fac35ba4b40ad8f0716b6575517706f256e131e7ebce4611eaa848894560344df77f22dc860ad53e7136194246922ad598b97c1553ec55fbd1fee50d7aa5eacb2d8a295088a8167bf924ec0c464fc19235d6e701894aa97e7980dde8c8d164d4d096d82b66f989183517e8dd5617cf761b44f205638c1bcca036921e27c08241340fb0e46802f8d5b3454eb92ba83174893bcb155ef640758de4ce232ad373d65d53e5b19afcc57df2794fe3344c60eb170929dba190a3a012732c1f5b045aff466ec128c43055a713d6c0fa7335cdc1877f2eef95f7205c28\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Faquaclear-aquarium-water-pump.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144082094650&X-Goog-Signature=add9ad1977e687b7b66f99c17d4c8dc5413cb103ee6d6a780b8f3f65dacdd2d96eab3edc0e5f02edafbf599cf679e609cf4b8ca845ab1a266ff014cbab05f7571ba92518bc7bb55f4706b2a8b40603f0a57ef1dc214cb052403336c7a7249e53ac172ea4fde6f75b46ad53339b1c67813d0bad44dca5355147fb3f9ae6fa77c1639bab7a842e9d64ee3cfd9cf0e623cb582de90c9a16e0364ecff3565efa1cec417e734cc28a886573e33acc8224394f4646ef5847797cb3b43cc16bd6cc505ce437a5f2d1435c488d4613fcf1014c66f10b86601c774b84137a777855e67ecfd02b4bb2145cbc7e301968b4014847b1fb7f80726754cdf5bbf38f41bba03ccd\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Faquaclear-aquarium-water-pump.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144276061792&X-Goog-Signature=c0b1256eb8945e59fc10f9ec0a4795beec534960a6f0b435979664ce303da38fe0650b362b77d7aecae89e26e8bfa58fc265f1b2c950b4d211a4a3913f74007eaec72480a7641cba7130f92b85a7c53ff8a116d98a7dc752a971189c093806528c97d74a86fb4cdf5809d77b435c5a3e22d3d1664926446ed5db6904fd7b1ff685116c17ecc473701a1eabbb7fde44d6d7a21bd86f7f8b9a6214d83d6cb92964d27f0948ee54a161b9d1bea233022fab71fd0f554d06e0318dcc34d347f1e20737388caee227b25bc585215281d84a03abfff83db6f11e7230902a57e2594c0471f4fb100457420ea54a63b8951b29f794fe5b4e5fb3f90f2883b92bdeff43de\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Faquaclear-aquarium-water-pump.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144455063126&X-Goog-Signature=95cbf2437b269f2fc9c5871e124a1afb45995dcb43cc37af269080e2f73b9f0110f4bfff7ea28abe1e3e7b16e3dce0c0c8f24c4c703c7784cb833d1f4060e195244898d800c6f1d52d6ec8e92fdca2fef6c197441ecab86abdc501a07d25462e02c87256b59e4bdc0cfeed0c4e88661fdc289495afd7c98b2308367e1acbc034503bbb3dad48a7084e42e9716e63f3bce153b11b94af96ade52cd133841a463cb22388a6434523eb682debe6a142926304569a4faba596720a952a4260a8d4bfe6ad214b5a1a321f57bca2c5ab8a49fc20b9eb68f311f3e6fcf83e06395be6ab8081fb1b081605e2c0c61405512e1b67ac09a494f9f9fb7a55a1bef25c941270\"></td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-training-collar.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492705754869&X-Goog-Signature=2818463345630c6f80ef1494b645f4fd7087d995ca36653fe05dc04646e7733f2301a7cd019046397ac11e8094b570e98c9c4747be59847ac843173d2235db090cc1bd6e07aff8c960a4ec2429a5828f2080c23443985bb9429b0f23eb7cade310047ecc356efb3d970bc8344869fe49073bc528d0b36e62daed6ec08ccdf6f9cc9fefd80859d83ec826caf81e1f5643369b0929dd058bb8c3c36bf66d36978216ba442fbb6694a188056df787a885250ea540776d48502a3e10ec7c4a5b10a5bd7ed8899079cf0e016755664a30251df47d4fd7344a624a3e52de4f7b5082d766c8e1d68dc9b9a67efb4feb0f1e4534fc734fadffb73e7477237bfa73d71ca9\"></td>\n",
+              "      <td>bob</td>\n",
+              "      <td>image/png</td>\n",
+              "      <td>1517938</td>\n",
+              "      <td>2025-03-20 17:45:05+00:00</td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-training-collar.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744143914179703&X-Goog-Signature=cf512c3f7b9414584101b4ed2d7a7ffee324f73f7714f3ada3693cb4b3695a53b4577632b8ba3d96079335cd1bfc9ca57bc9c4120e0d578ab1a9d23a6da41931903329ec82a59f4fcaac2a83f120de534d8df86025c8ad8491b4dcedd3ef3e6d5b1f0f0a149521a9c03ab434e38286bca07e1c1a237d8c8184c5eaf5661be81c66e50ec987fe040171f486740544cc6db3940d0ea25938a1b92c68ceae7c49435592ef6d20b3d02ee2115c906c3667fa24edbe503c5c5d0d0ef2c266166e21aad32d271ef6cfae3bf20b7f1015146566597ceec572155d85d14fdb3301219e0f9cd144febd2bfc58722023d19789ed0b3087718f07c7bd521864e4db27b2b098\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Fk9-guard-dog-training-collar.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144081452147&X-Goog-Signature=83d556e20d1a29ec964d110a132c2df1a25cca03aa176e6cdb21924c6fc41f6be08b3eadba41197b91e991678f88aafcdf6ec4fa8b324817425c55f870ad20aab0b5b1b9f18bc000cfe06638405c7955f5ad214145da385522bb0aaf3da7ae592558c96e9194e67781a0afdbd879975bbfa256a9c8864e251d07517c8676779819c9d994c5c7876b977233f0261be05d6269c5689df50cf17346f0b4d88ada17cab23e5e441368c2711617dffc59ef5b49d88d397b9122a973eaff093eb652b0fdd044372a5c55383dae26d80ce6b25f3298f638f1edbbb008391a8f8f6fbb9bd3d428e56cdb5c599b34ccabdbc5c6f32553d822cfd16e19a4fe795d2d443a4a\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Fk9-guard-dog-training-collar.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144273247667&X-Goog-Signature=6c11b82ef634f53dace8252cb93d844a337828507e3e43f9247d2b913a7058c0eebaccd2e2ab7096d73c2e52ced56c2522923ba24ca4d15878e737517e10ebf7244b9eea0ae701a35a4826d9dfdcfbe3734619e39ccea48917931a304d16ade62c4e14fe73b00bddeb497b0aa06e6e9d37b60493d601d7f2c044b999c5efe2d0fa7d67327a2bb9447391600ccfdb12783b9d244641ace7fe305caa41ae84114858cf8b889477270b517f3688da6a410be78471b10a330d701c864414327990a6a305c9b543d00c6a61ebefe7f909faefef9d9e9d81600b48b681f129e44b09dc9f394cac8dfa40fb76c3ce2fa18d44adb9477fcb04c078fc504d35d89b0a573e\"></td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Fk9-guard-dog-training-collar.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203430Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1744144450674078&X-Goog-Signature=71725946eecb8d6d82bc3bec1c90eac972c3261ad6659c51cd5ca8d65184d9eba62bce65d23debdceb01ea1588287c14f41e15245031804eb6cf56d393f79068eaa4c0f119b23c2e39da904cc5941b1be8baa5e56068d6e2df7d959ae03ba61abf64b69cb08762bcd5a16a5189b47e4f52a12292522a994bd45c0e6a97ec5a7348852633bd1f96de1348e346d9ab6449f1a3891d7136218533bede44dd46686088baed98d1599a8180370e481e8b2c7e052ad20b5cffaa95988b50616eecb450bd1201fd23c32502c256872eb67b416de48eb3589df16b7c287a75a6f17681a939f72983b040cd9ea58c2614c6b758d8b46ff42ac3e9de294d4f8247d94e54ac\"></td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 9 columns</p>\n",
+              "</div>[5 rows x 9 columns in total]"
+            ],
+            "text/plain": [
+              "                                               image author content_type  \\\n",
+              "0  {'uri': 'gs://cloud-samples-data/bigquery/tuto...  alice    image/png   \n",
+              "1  {'uri': 'gs://cloud-samples-data/bigquery/tuto...    bob    image/png   \n",
+              "2  {'uri': 'gs://cloud-samples-data/bigquery/tuto...    bob    image/png   \n",
+              "3  {'uri': 'gs://cloud-samples-data/bigquery/tuto...  alice    image/png   \n",
+              "4  {'uri': 'gs://cloud-samples-data/bigquery/tuto...    bob    image/png   \n",
+              "\n",
+              "      size                    updated  \\\n",
+              "0  1489405  2025-03-20 17:44:58+00:00   \n",
+              "1  1538007  2025-03-20 17:44:56+00:00   \n",
+              "2  1237551  2025-03-20 17:45:14+00:00   \n",
+              "3  1040455  2025-03-20 17:44:45+00:00   \n",
+              "4  1517938  2025-03-20 17:45:05+00:00   \n",
+              "\n",
+              "                                             blurred  \\\n",
+              "0  {'uri': 'gs://bigframes_blob_test/image_blur_t...   \n",
+              "1  {'uri': 'gs://bigframes_blob_test/image_blur_t...   \n",
+              "2  {'uri': 'gs://bigframes_blob_test/image_blur_t...   \n",
+              "3  {'uri': 'gs://bigframes_blob_test/image_blur_t...   \n",
+              "4  {'uri': 'gs://bigframes_blob_test/image_blur_t...   \n",
+              "\n",
+              "                                             resized  \\\n",
+              "0  {'uri': 'gs://bigframes_blob_test/image_resize...   \n",
+              "1  {'uri': 'gs://bigframes_blob_test/image_resize...   \n",
+              "2  {'uri': 'gs://bigframes_blob_test/image_resize...   \n",
+              "3  {'uri': 'gs://bigframes_blob_test/image_resize...   \n",
+              "4  {'uri': 'gs://bigframes_blob_test/image_resize...   \n",
+              "\n",
+              "                                          normalized  \\\n",
+              "0  {'uri': 'gs://bigframes_blob_test/image_normal...   \n",
+              "1  {'uri': 'gs://bigframes_blob_test/image_normal...   \n",
+              "2  {'uri': 'gs://bigframes_blob_test/image_normal...   \n",
+              "3  {'uri': 'gs://bigframes_blob_test/image_normal...   \n",
+              "4  {'uri': 'gs://bigframes_blob_test/image_normal...   \n",
+              "\n",
+              "                                        blur_resized  \n",
+              "0  {'uri': 'gs://bigframes_blob_test/image_blur_r...  \n",
+              "1  {'uri': 'gs://bigframes_blob_test/image_blur_r...  \n",
+              "2  {'uri': 'gs://bigframes_blob_test/image_blur_r...  \n",
+              "3  {'uri': 'gs://bigframes_blob_test/image_blur_r...  \n",
+              "4  {'uri': 'gs://bigframes_blob_test/image_blur_r...  \n",
+              "\n",
+              "[5 rows x 9 columns]"
+            ]
+          },
+          "execution_count": 8,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df_image"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Euk5saeVVdTP"
+      },
+      "source": [
+        "### 4. Use LLM models to ask questions and generate embeddings on images"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "id": "mRUGfcaFVW-3"
+      },
+      "outputs": [],
+      "source": [
+        "from bigframes.ml import llm\n",
+        "gemini = llm.GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 657
+        },
+        "id": "DNFP7CbjWdR9",
+        "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/array_value.py:107: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>ml_generate_text_llm_result</th>\n",
+              "      <th>image</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>That's a bag of **rabbit food** from the brand **Fluffy Buns**.  The specific product is labeled as \"Ranbhow's trood.flee!\"  (which appears to be a playful brand name).</td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203513Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492698095241&X-Goog-Signature=a1133a2d02e9bd93d2004700c56f32a4c4e201c2c848a60222f353c5a6712e827ec8817fb46bda06af86968621386a850b1372e13c24236d7e9fb9c3fe1b547c46394561b7da79428c330e61537060feebcb5ed132e5f441d614eb6d3b981318d8e15cf901e6fe93636391e65a66f04abdfa2deb7af2f9c25ce1ccbd3a5ec2067dc7fbc706d5be0c360a3c40a0d8dee68e6212a5482494efa4fca4ec9ea294fa590493b54c285f6f94b741bea6321cd3b1f870e54a26c4a3711a33d1397dce8567d5a56a69eec42d9515fe434d76817de2bd6594b8ef3e5eca96d9d1ad802abf36078aa675fd32b9f5967ddc65050edbf08775b301298b092322f40cd823c682\"></td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>That's hay.  More specifically, it looks like a type of grass hay, often used as feed for small animals like rabbits, guinea pigs, and chinchillas.</td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-guinea-pig-hay.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203513Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492696656039&X-Goog-Signature=39483ceee459e6134a005e067818622fdc7de0e59e5611a16cf62e709a4eb1cab9ee601981e6829c61d2e837425b894adcd224f5bf534a31ecae1c162eb35a1c5cdf30a51dc12e277bcac9409afa58046511120707e8e373944b9ec0453ec45a7e53ddf3fb595211ad82d15ebfc22429338e862c4734f347664eba020c272fa3a5b7822213e309d26116916928363dc336dd7c99ff48b1594d77b0d0e2630ceefde9eb6cf96b76fc851646bde2bec5bc46a2e81b643a82c79f217b6e3fc10219ba53ac10486b0955970511eec1ed2bb68904b737dd4d79f6c87d31804068c32dc48567cc399cfe1e949307f48ecccaf5b873b75aceb9184025dbe6b7c84ef883\"></td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2 rows × 2 columns</p>\n",
+              "</div>[2 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "                         ml_generate_text_llm_result  \\\n",
+              "0  That's a bag of **rabbit food** from the brand...   \n",
+              "1  That's hay.  More specifically, it looks like ...   \n",
+              "\n",
+              "                                               image  \n",
+              "0  {'uri': 'gs://cloud-samples-data/bigquery/tuto...  \n",
+              "1  {'uri': 'gs://cloud-samples-data/bigquery/tuto...  \n",
+              "\n",
+              "[2 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 10,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# Ask the same question on the images\n",
+        "df_image = df_image.head(2)\n",
+        "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n",
+        "answer[[\"ml_generate_text_llm_result\", \"image\"]]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {
+        "id": "IG3J3HsKhyBY"
+      },
+      "outputs": [],
+      "source": [
+        "# Ask different questions\n",
+        "df_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 657
+        },
+        "id": "qKOb765IiVuD",
+        "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/array_value.py:107: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>ml_generate_text_llm_result</th>\n",
+              "      <th>image</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>That's a bag of **Fluffy Buns Rabbit Food**.  It's a blend of various ingredients designed as food for rabbits.</td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203553Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492698095241&X-Goog-Signature=c654335eb6d2d63bc8c005e8fbeb028b655cee78b82159903348465076675d9d389798dcefd9b9f67080cbfcc4909f8ad711ec6a780558864520beb68a33b29bbc5ebd0174898911c0701a4f8765b36ae12a52a66750000d68a8c21152c725829e38f13c9ca8f27f2468e73cd314ce1e0149cfb893962eaccd5595254954a1e33059a47f6a4daff404ab254bed9d6d6e89d504f6d9e39c6579282300c24f12f6452dc2b7e4f93489256da4680f58520f5907871d56a25062857cfe1b81f5c952e50fd69f73e2fb8ae2ad99dd4e34d0a73fe9b0151c34d974ac196c4c34376920563601c557cdd9a04171a0c11355e463bc571f43321f527e3c41b1087aa58e25\"></td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>The picture is primarily light green.  There are some slightly darker green and yellowish-green shades mixed in, but the overall color is light green.</td>\n",
+              "      <td><img src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-guinea-pig-hay.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203553Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492696656039&X-Goog-Signature=9cdc0aee99c3c4f598fe1ace5b5bc61db84a4e3bc2fe8ee477a3a4869dbb0c65895ff7eeb1980f78318dd5729d785b940443fee4d07212dae5497bccb7f086d1ea888608575c8caa11d57dd3bdf69b3963c7035cc5fa1c81e9072858e72f2eb527fda4f0f9c43f9eaaad4cce5b9b79ea4db634b1607afd5fee48cf41e388748d8325b3937a743ef7aae72467aca7d4b5c2eb995320f0c85601e7325577119ea189f8c38b8cc291e6c9aebe55d740ac65b3cbf7cfc7b21a150ff1c651159549615cdbddee939d2f0bc650cfd4e21b675cdcc4aec3108766455dbc5ee5485560083b6ff819a17c64f544908f5f9fb312e174ae5e2436a96129a06e48e8c802e4cb\"></td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2 rows × 2 columns</p>\n",
+              "</div>[2 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "                         ml_generate_text_llm_result  \\\n",
+              "0  That's a bag of **Fluffy Buns Rabbit Food**.  ...   \n",
+              "1  The picture is primarily light green.  There a...   \n",
+              "\n",
+              "                                               image  \n",
+              "0  {'uri': 'gs://cloud-samples-data/bigquery/tuto...  \n",
+              "1  {'uri': 'gs://cloud-samples-data/bigquery/tuto...  \n",
+              "\n",
+              "[2 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 12,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n",
+        "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 300
+        },
+        "id": "KATVv2CO5RT1",
+        "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/array_value.py:107: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
+            "`db_dtypes` is a preview feature and subject to change.\n",
+            "  warnings.warn(msg, bfe.PreviewWarning)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>ml_generate_embedding_result</th>\n",
+              "      <th>ml_generate_embedding_status</th>\n",
+              "      <th>ml_generate_embedding_start_sec</th>\n",
+              "      <th>ml_generate_embedding_end_sec</th>\n",
+              "      <th>content</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>[ 0.01182145  0.01575819  0.06243018 ...  0.00010706 -0.03063935\n",
+              " -0.05756916]</td>\n",
+              "      <td></td>\n",
+              "      <td><NA></td>\n",
+              "      <td><NA></td>\n",
+              "      <td>{\"access_urls\":{\"expiry_time\":\"2025-04-09T02:36:17Z\",\"read_url\":\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-rabbit-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203617Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492698095241&X-Goog-Signature=860fb5fbf48778f66f0ec2d141b26dd7ce2de7cacc427491f5cc3f420361770e33936c79552a562a3db31c8e02a9ea73e73ac9c7c379dfa0e213eda456c48243acea3227c3e9e786859b19e2b74718d7c3447f09ba371d77e3df65a9e2936c9b9ad5ad7ba359bfaa9fc3a2785d32359a9d50ee64f90f6e7d3a20a5c13f38f932c83b143dd2abdd31f0b35ab60aa21293d2cbf7ea780b13ef02d6b1f9aa56538a498d3da13798a1cbe2535b118caeb35f1e5be36d09c9593796b5ecf8b171d4915735644a94d19d7e78351e475da7b75f72fc8f88b2607ce8d1fb53d7dc2aa16da3b6ed2130fd700cbc797d1a6cc495833945b3bdfaf933b9a4dc70ff3299ab4f\",\"write_url\":\"\"},\"objectref\":{\"authorizer\":\"bigframes-dev.us.bigframes-default-connection\",\"details\":{\"gcs_metadata\":{\"content_type\":\"image/png\",\"md5_hash\":\"4c01d79182ea7580183a2168076e16b8\",\"size\":1489405,\"updated\":1742492698000000}},\"uri\":\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/fluffy-buns-rabbit-food.png\",\"version\":\"1742492698095241\"}}</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>[ 0.02554693  0.01508185  0.04101892 ... -0.02417112 -0.01356636\n",
+              " -0.01999673]</td>\n",
+              "      <td></td>\n",
+              "      <td><NA></td>\n",
+              "      <td><NA></td>\n",
+              "      <td>{\"access_urls\":{\"expiry_time\":\"2025-04-09T02:36:17Z\",\"read_url\":\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-guinea-pig-hay.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250408%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250408T203617Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&generation=1742492696656039&X-Goog-Signature=192e852a5296d31a048af459afe3dc539e2bbf90c65bc2997219e7822bd0ca2858b8e04475e12d14d63d295b45e51403b4f4585a6b66c8b0dbc3adf19e135a93687aeff7ba675eec2aeddb4a1cb4d2b83bee22c7c2de80287af63158a85ee56fa1daccbf31bf42d57e5724ea24bdd630a8a1930d70a5d38fb0340d846848039f53bf4efbc21da6df9a7d91fec727385018b159e4fc53fce0b57ab0c77583361bc4e10b2a7080aafa288789240e565eb58cb9abf2bd298732fddaad4f32472110b2607f6b3a21d9fbce1fc3ecb23caf967a4e3ff5101ae29fc6c65b888930a1306c8deb3b569997a0a364325b3ac0350ff671f2682d9a8a4a96bfac28eb9f9fd8\",\"write_url\":\"\"},\"objectref\":{\"authorizer\":\"bigframes-dev.us.bigframes-default-connection\",\"details\":{\"gcs_metadata\":{\"content_type\":\"image/png\",\"md5_hash\":\"0888367a63729f5a42f4a041596f635d\",\"size\":1538007,\"updated\":1742492696000000}},\"uri\":\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/fluffy-buns-guinea-pig-hay.png\",\"version\":\"1742492696656039\"}}</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2 rows × 5 columns</p>\n",
+              "</div>[2 rows x 5 columns in total]"
+            ],
+            "text/plain": [
+              "                        ml_generate_embedding_result  \\\n",
+              "0  [ 0.01182145  0.01575819  0.06243018 ...  0.00...   \n",
+              "1  [ 0.02554693  0.01508185  0.04101892 ... -0.02...   \n",
+              "\n",
+              "  ml_generate_embedding_status  ml_generate_embedding_start_sec  \\\n",
+              "0                                                          <NA>   \n",
+              "1                                                          <NA>   \n",
+              "\n",
+              "   ml_generate_embedding_end_sec  \\\n",
+              "0                           <NA>   \n",
+              "1                           <NA>   \n",
+              "\n",
+              "                                             content  \n",
+              "0  {\"access_urls\":{\"expiry_time\":\"2025-04-09T02:3...  \n",
+              "1  {\"access_urls\":{\"expiry_time\":\"2025-04-09T02:3...  \n",
+              "\n",
+              "[2 rows x 5 columns]"
+            ]
+          },
+          "execution_count": 13,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# Generate embeddings.\n",
+        "embed_model = llm.MultimodalEmbeddingGenerator()\n",
+        "embeddings = embed_model.predict(df_image[\"image\"])\n",
+        "embeddings"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iRUi8AjG7cIf"
+      },
+      "source": [
+        "### 5. PDF chunking function"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "metadata": {
+        "id": "oDDuYtUm5Yiy"
+      },
+      "outputs": [],
+      "source": [
+        "df_pdf = bpd.from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 15,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "7jLpMYaj7nj8",
+        "outputId": "06d5456f-580f-4693-adff-2605104b056c"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/log_adapter.py:164: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n",
+            "  return method(self, *args, **kwargs)\n"
+          ]
+        }
+      ],
+      "source": [
+        "df_pdf[\"chunked\"] = df_pdf[\"pdf\"].blob.pdf_chunk()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "metadata": {
+        "id": "kaPvJATN7zlw"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
+              "0    on a level, stable surface to prevent tipping....\n",
+              "0    included)\n",
+              "to maintain the schedule during powe...\n",
+              "0    digits for Meal 1 will flash.\n",
+              "\u0000. Use the UP/DO...\n",
+              "0    paperclip) for 5\n",
+              "seconds. This will reset all ...\n",
+              "0    unit with a damp cloth. Do not immerse the bas...\n",
+              "0    continues,\n",
+              "contact customer support.\n",
+              "E2: Food ...\n",
+              "Name: chunked, dtype: string"
+            ]
+          },
+          "execution_count": 16,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "chunked = df_pdf[\"chunked\"].explode()\n",
+        "chunked"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.14"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/notebooks/experimental/semantic_operators.ipynb b/notebooks/experimental/semantic_operators.ipynb
index d3fec469b4..fc46a43e7b 100644
--- a/notebooks/experimental/semantic_operators.ipynb
+++ b/notebooks/experimental/semantic_operators.ipynb
@@ -25,3164 +25,11 @@
     },
     {
       "cell_type": "markdown",
-      "metadata": {
-        "id": "rWJnGj2ViouP"
-      },
-      "source": [
-        "# BigFrames AI (semantic) Operator Tutorial\n",
-        "\n",
-        "<table align=\"left\">\n",
-        "\n",
-        "  <td>\n",
-        "    <a href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/semantic_operators.ipynb\">\n",
-        "      <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"> Run in Colab\n",
-        "    </a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/semantic_operators.ipynb\">\n",
-        "      <img src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" alt=\"GitHub logo\">\n",
-        "      View on GitHub\n",
-        "    </a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/semantic_operators.ipynb\">\n",
-        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
-        "      Open in BQ Studio\n",
-        "    </a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mgOrr256iouQ"
-      },
-      "source": [
-        "This notebook provides a hands-on preview of AI operator APIs powered by the Gemini model.\n",
-        "\n",
-        "The notebook is divided into two sections. The first section introduces the API syntax with examples, aiming to familiarize you with how AI operators work. The second section applies AI operators to a large real-world dataset and presents performance statistics.\n",
-        "\n",
-        "This work is inspired by [this paper](https://arxiv.org/pdf/2407.11418) and powered by BigQuery ML and Vertex AI."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "2ymVbJV2iouQ"
-      },
-      "source": [
-        "# Preparation"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "vvVzFzo3iouQ"
-      },
-      "source": [
-        "First, import the BigFrames modules.\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Jb9glT2ziouQ"
-      },
-      "outputs": [],
-      "source": [
-        "import bigframes\n",
-        "import bigframes.pandas as bpd"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xQiCWj7OiouQ"
-      },
-      "source": [
-        "Make sure the BigFrames version is at least `1.23.0`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "LTPpI8IpiouQ"
-      },
-      "outputs": [],
-      "source": [
-        "from packaging.version import Version\n",
-        "\n",
-        "assert Version(bigframes.__version__) >= Version(\"1.23.0\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "agxLmtlbiouR"
-      },
-      "source": [
-        "Turn on the semantic operator experiment. You will see a warning sign saying that these operators are still under experiments. If you don't turn on the experiment before using the operators, you will get `NotImplemenetedError`s."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "1wXqdDr8iouR"
-      },
-      "outputs": [],
-      "source": [
-        "bigframes.options.experiments.semantic_operators = True"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "W8TPUvnsqxhv"
-      },
-      "source": [
-        "Specify your GCP project and location."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "vCkraKOeqJFl"
-      },
-      "outputs": [],
-      "source": [
-        "bpd.options.bigquery.project = 'YOUR_PROJECT_ID'\n",
-        "bpd.options.bigquery.location = 'US'"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "n95MFlS0iouR"
-      },
-      "source": [
-        "**Optional**: turn off the display of progress bar so that only the operation results will be printed out"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "5r6ahx7MiouR"
-      },
-      "outputs": [],
-      "source": [
-        "# bpd.options.display.progress_bar = None"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "93iYvp7niouR"
-      },
-      "source": [
-        "Create LLM instances. They will be passed in as parameters for each semantic operator.\n",
-        "\n",
-        "This tutorial uses the \"gemini-1.5-flash-002\" model for text generation and \"text-embedding-005\" for embedding. While these are recommended, you can choose [other Vertex AI LLM models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models) based on your needs and availability. Ensure you have [sufficient quota](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas) for your chosen models and adjust it if necessary."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "tHkymaLNiouR"
-      },
-      "outputs": [],
-      "source": [
-        "from bigframes.ml import llm\n",
-        "gemini_model = llm.GeminiTextGenerator(model_name=\"gemini-1.5-flash-001\")\n",
-        "text_embedding_model = llm.TextEmbeddingGenerator(model_name=\"text-embedding-005\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mbFDcvnPiouR"
-      },
-      "source": [
-        "**Note**: semantic operators could be expensive over a large set of data. As a result, our team added this option `bigframes.options.compute.sem_ops_confirmation_threshold` at `version 1.31.0` so that the BigFrames will ask for your confirmation if the amount of data to be processed is too large. If the amount of rows exceeds your threshold, you will see a prompt for your keyboard input -- 'y' to proceed and 'n' to abort. If you abort the operation, no LLM processing will be done.\n",
-        "\n",
-        "The default threshold is 0, which means the operators will always ask for confirmations. You are free to adjust the value as needed. You can also set the threshold to `None` to disable this feature."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "F4dZm4b7iouR"
-      },
-      "outputs": [],
-      "source": [
-        "if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
-        "    bigframes.options.compute.semantic_ops_confirmation_threshold = 1000"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_dEA3G9RiouR"
-      },
-      "source": [
-        "If you would like your operations to fail automatically when the data is too large, set `bigframes.options.compute.semantic_ops_threshold_autofail` to `True`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "BoUK-cpbiouS"
-      },
-      "outputs": [],
-      "source": [
-        "# if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
-        "#     bigframes.options.compute.semantic_ops_threshold_autofail = True"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "hQft3o3OiouS"
-      },
-      "source": [
-        "# API Samples"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dt5Kl-QGiouS"
-      },
-      "source": [
-        "You will learn about each semantic operator by trying some examples."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "J7XAT459iouS"
-      },
-      "source": [
-        "## Semantic Filtering"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "9d5HUIvliouS"
-      },
-      "source": [
-        "Semantic filtering allows you to filter your dataframe based on the instruction (i.e. prompt) you provided.\n",
-        "\n",
-        "First, create a dataframe:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 190
-        },
-        "id": "NDpCRGd_iouS",
-        "outputId": "5048c935-06d3-4ef1-ad87-72e14a30b1b7"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>country</th>\n",
-              "      <th>city</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>USA</td>\n",
-              "      <td>Seattle</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Germany</td>\n",
-              "      <td>Berlin</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>Japan</td>\n",
-              "      <td>Kyoto</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>3 rows × 2 columns</p>\n",
-              "</div>[3 rows x 2 columns in total]"
-            ],
-            "text/plain": [
-              "   country     city\n",
-              "0      USA  Seattle\n",
-              "1  Germany   Berlin\n",
-              "2    Japan    Kyoto\n",
-              "\n",
-              "[3 rows x 2 columns]"
-            ]
-          },
-          "execution_count": 10,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df = bpd.DataFrame({'country': ['USA', 'Germany', 'Japan'], 'city': ['Seattle', 'Berlin', 'Kyoto']})\n",
-        "df"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "6AXmT7sniouS"
-      },
-      "source": [
-        "Now, filter this dataframe by keeping only the rows where the value in `city` column is the capital of the value in `country` column. The column references could be \"escaped\" by using a pair of braces in your instruction. In this example, your instruction should be like this:\n",
-        "```\n",
-        "The {city} is the capital of the {country}.\n",
-        "```\n",
-        "\n",
-        "Note that this is not a Python f-string, so you shouldn't prefix your instruction with an `f`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 127
-        },
-        "id": "ipW3Z_l4iouS",
-        "outputId": "ad447459-225a-419c-d4c8-fedac4a9ed0f"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>country</th>\n",
-              "      <th>city</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Germany</td>\n",
-              "      <td>Berlin</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>1 rows × 2 columns</p>\n",
-              "</div>[1 rows x 2 columns in total]"
-            ],
-            "text/plain": [
-              "   country    city\n",
-              "1  Germany  Berlin\n",
-              "\n",
-              "[1 rows x 2 columns]"
-            ]
-          },
-          "execution_count": 11,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df.semantics.filter(\"The {city} is the capital of the {country}\", model=gemini_model)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "swKvgfm1iouS"
-      },
-      "source": [
-        "The filter operator extracts the information from the referenced column to enrich your instruction with context. The instruction is then sent for the designated model for evaluation. For filtering operations, the LLM is asked to return only `True` and `False` for each row, and the operator removes the rows accordingly."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "r_2AAGGoiouS"
-      },
-      "source": [
-        "## Semantic Mapping"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "vT6skC57iouS"
-      },
-      "source": [
-        "Semantic mapping allows to you to combine values from multiple columns into a single output based your instruction.\n",
-        "\n",
-        "Here is an example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 190
-        },
-        "id": "BQ7xeUK3iouS",
-        "outputId": "33dcb742-77ed-4bea-8dbc-1cf775102a25"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>ingredient_1</th>\n",
-              "      <th>ingredient_2</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>Bun</td>\n",
-              "      <td>Beef Patty</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Soy Bean</td>\n",
-              "      <td>Bittern</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>Sausage</td>\n",
-              "      <td>Long Bread</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>3 rows × 2 columns</p>\n",
-              "</div>[3 rows x 2 columns in total]"
-            ],
-            "text/plain": [
-              "  ingredient_1 ingredient_2\n",
-              "0          Bun   Beef Patty\n",
-              "1     Soy Bean      Bittern\n",
-              "2      Sausage   Long Bread\n",
-              "\n",
-              "[3 rows x 2 columns]"
-            ]
-          },
-          "execution_count": 12,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df = bpd.DataFrame({\n",
-        "    \"ingredient_1\": [\"Bun\", \"Soy Bean\", \"Sausage\"],\n",
-        "    \"ingredient_2\": [\"Beef Patty\", \"Bittern\", \"Long Bread\"]\n",
-        "    })\n",
-        "df"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VFObP2aFiouS"
-      },
-      "source": [
-        "Now, you ask LLM what kind of food can be made from the two ingredients in each row. The column reference syntax in your instruction stays the same. In addition, you need to specify the column name by setting the `output_column` parameter to hold the mapping results."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 190
-        },
-        "id": "PpL24AQFiouS",
-        "outputId": "e7aff038-bf4b-4833-def8-fe2648e8885b"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>ingredient_1</th>\n",
-              "      <th>ingredient_2</th>\n",
-              "      <th>food</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>Bun</td>\n",
-              "      <td>Beef Patty</td>\n",
-              "      <td>Burger</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Soy Bean</td>\n",
-              "      <td>Bittern</td>\n",
-              "      <td>Tofu</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>Sausage</td>\n",
-              "      <td>Long Bread</td>\n",
-              "      <td>Hotdog</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>3 rows × 3 columns</p>\n",
-              "</div>[3 rows x 3 columns in total]"
-            ],
-            "text/plain": [
-              "  ingredient_1 ingredient_2      food\n",
-              "0          Bun   Beef Patty  Burger \n",
-              "\n",
-              "1     Soy Bean      Bittern    Tofu \n",
-              "\n",
-              "2      Sausage   Long Bread  Hotdog \n",
-              "\n",
-              "\n",
-              "[3 rows x 3 columns]"
-            ]
-          },
-          "execution_count": 13,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df.semantics.map(\"What is the food made from {ingredient_1} and {ingredient_2}? One word only.\", output_column=\"food\", model=gemini_model)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "70WTZZfdiouS"
-      },
-      "source": [
-        "## Semantic Joining"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "u93uieRaiouS"
-      },
-      "source": [
-        "Semantic joining can join two dataframes based on the instruction you provided.\n",
-        "\n",
-        "First, you prepare two dataframes:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "dffIGEUEiouS"
-      },
-      "outputs": [],
-      "source": [
-        "cities = bpd.DataFrame({'city': ['Seattle', 'Ottawa', 'Berlin', 'Shanghai', 'New Delhi']})\n",
-        "continents = bpd.DataFrame({'continent': ['North America', 'Africa', 'Asia']})"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Hz0X-0RtiouS"
-      },
-      "source": [
-        "You want to join the `cities` with `continents` to form a new dataframe such that, in each row the city from the `cities` data frame is in the continent from the `continents` dataframe. You could re-use the aforementioned column reference syntax:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 221
-        },
-        "id": "WPIOHEwCiouT",
-        "outputId": "976586c3-b5db-4088-a46a-44dfbf822ecb"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>city</th>\n",
-              "      <th>continent</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>Seattle</td>\n",
-              "      <td>North America</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Ottawa</td>\n",
-              "      <td>North America</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>Shanghai</td>\n",
-              "      <td>Asia</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>New Delhi</td>\n",
-              "      <td>Asia</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>4 rows × 2 columns</p>\n",
-              "</div>[4 rows x 2 columns in total]"
-            ],
-            "text/plain": [
-              "        city      continent\n",
-              "0    Seattle  North America\n",
-              "1     Ottawa  North America\n",
-              "2   Shanghai           Asia\n",
-              "3  New Delhi           Asia\n",
-              "\n",
-              "[4 rows x 2 columns]"
-            ]
-          },
-          "execution_count": 15,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "cities.semantics.join(continents, \"{city} is in {continent}\", model=gemini_model)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "4Qc97GMWiouT"
-      },
-      "source": [
-        "!! **Important:** Semantic join can trigger probihitively expensitve operations! This operation first cross joins two dataframes, then invokes semantic filter on each row. That means if you have two dataframes of sizes `M` and `N`, the total amount of queries sent to the LLM is on the scale of `M * N`."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MUEJXT1IiouT"
-      },
-      "source": [
-        "### Self Joins"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "QvX-nCogiouT"
-      },
-      "source": [
-        "This self-join example is for demonstrating a special case: what happens when the joining columns exist in both data frames? It turns out that you need to provide extra information in your column references: by attaching \"left.\" and \"right.\" prefixes to your column names.\n",
-        "\n",
-        "Create an example data frame:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "OIGz5sqxiouW"
-      },
-      "outputs": [],
-      "source": [
-        "animals = bpd.DataFrame({'animal': ['cow', 'cat', 'spider', 'elephant']})"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VmJbuWNniouX"
-      },
-      "source": [
-        "You want to compare the weights of these animals, and output all the pairs where the animal on the left is heavier than the animal on the right. In this case, you use `left.animal` and `right.animal` to differentiate the data sources:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 284
-        },
-        "id": "UHfggdhBiouX",
-        "outputId": "a439e3aa-1382-4244-951f-127dc8da0fe3"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>animal_left</th>\n",
-              "      <th>animal_right</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>cow</td>\n",
-              "      <td>cat</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>cow</td>\n",
-              "      <td>spider</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>cat</td>\n",
-              "      <td>spider</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>elephant</td>\n",
-              "      <td>cow</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>elephant</td>\n",
-              "      <td>cat</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>5</th>\n",
-              "      <td>elephant</td>\n",
-              "      <td>spider</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>6 rows × 2 columns</p>\n",
-              "</div>[6 rows x 2 columns in total]"
-            ],
-            "text/plain": [
-              "  animal_left animal_right\n",
-              "0         cow          cat\n",
-              "1         cow       spider\n",
-              "2         cat       spider\n",
-              "3    elephant          cow\n",
-              "4    elephant          cat\n",
-              "5    elephant       spider\n",
-              "\n",
-              "[6 rows x 2 columns]"
-            ]
-          },
-          "execution_count": 17,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "animals.semantics.join(animals, \"{left.animal} generally weighs heavier than {right.animal}\", model=gemini_model)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KONR7ywqiouX"
-      },
-      "source": [
-        "## Semantic Aggregation"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "I8iNRogoiouX"
-      },
-      "source": [
-        "Semantic aggregation merges all the values in a column into one. At this moment you can only aggregate a single column in each oeprator call.\n",
-        "\n",
-        "Here is an example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 315
-        },
-        "id": "9tsem17aiouX",
-        "outputId": "1db5fa6e-b59d-41f5-9c13-db2c9ed0415b"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>Movies</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>Titanic</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>The Wolf of Wall Street</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>Killers of the Flower Moon</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>The Revenant</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>Inception</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>5</th>\n",
-              "      <td>Shuttle Island</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>6</th>\n",
-              "      <td>The Great Gatsby</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>7 rows × 1 columns</p>\n",
-              "</div>[7 rows x 1 columns in total]"
-            ],
-            "text/plain": [
-              "                       Movies\n",
-              "0                     Titanic\n",
-              "1     The Wolf of Wall Street\n",
-              "2  Killers of the Flower Moon\n",
-              "3                The Revenant\n",
-              "4                   Inception\n",
-              "5              Shuttle Island\n",
-              "6            The Great Gatsby\n",
-              "\n",
-              "[7 rows x 1 columns]"
-            ]
-          },
-          "execution_count": 18,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df = bpd.DataFrame({\n",
-        "    \"Movies\": [\n",
-        "        \"Titanic\",\n",
-        "        \"The Wolf of Wall Street\",\n",
-        "        \"Killers of the Flower Moon\",\n",
-        "        \"The Revenant\",\n",
-        "        \"Inception\",\n",
-        "        \"Shuttle Island\",\n",
-        "        \"The Great Gatsby\",\n",
-        "    ],\n",
-        "})\n",
-        "df"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uA9XpV0aiouX"
-      },
-      "source": [
-        "You ask LLM to find the oldest movie:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "KzYoX3mRiouX",
-        "outputId": "1ac50d7b-dfa7-4c16-8daf-aeb03b6df7a5"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "0    Titanic \n",
-              "\n",
-              "Name: Movies, dtype: string"
-            ]
-          },
-          "execution_count": 19,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "agg_df = df.semantics.agg(\"Find the oldest movie from {Movies}. Reply with only the movie title\", model=gemini_model)\n",
-        "agg_df"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "drvn75qJiouX"
-      },
-      "source": [
-        "Instead of going through each row one by one, this operator first batches rows to get many  aggregation results. It then repeatly batches those results for aggregation, until there is only one value left. You could set the batch size with `max_agg_rows` parameter, which defaults to 10."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "kU7BsyTyiouX"
-      },
-      "source": [
-        "## Semantic Top K"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "s9QePXEoiouX"
-      },
-      "source": [
-        "Semantic Top K selects the top K values based on your instruction. Here is an example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "bMQqtyZ2iouX"
-      },
-      "outputs": [],
-      "source": [
-        "df = bpd.DataFrame({\"Animals\": [\"Corgi\", \"Orange Cat\", \"Parrot\", \"Tarantula\"]})"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KiljGBSCiouX"
-      },
-      "source": [
-        "You want to find the top two most popular pets:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 159
-        },
-        "id": "OZv5WUGIiouX",
-        "outputId": "ae1cee27-cc31-455e-c4ac-c0a9a5cf4ca5"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>Animals</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>Corgi</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Orange Cat</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>2 rows × 1 columns</p>\n",
-              "</div>[2 rows x 1 columns in total]"
-            ],
-            "text/plain": [
-              "      Animals\n",
-              "0       Corgi\n",
-              "1  Orange Cat\n",
-              "\n",
-              "[2 rows x 1 columns]"
-            ]
-          },
-          "execution_count": 21,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df.semantics.top_k(\"{Animals} are more popular as pets\", model=gemini_model, k=2)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dC8fyu3aiouX"
-      },
-      "source": [
-        "Under the hood, the semantic top K operator performs pair-wise comparisons with LLM. The top K results are returned in the order of their indices instead of their ranks."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sIszJ0zPiouX"
-      },
-      "source": [
-        "## Semantic Search"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "e4ojHRKAiouX"
-      },
-      "source": [
-        "Semantic search searches the most similar values to your query within a single column. Here is an example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 253
-        },
-        "id": "gnQSIZ5SiouX",
-        "outputId": "dd6e1ecb-1bad-4a7c-8065-e56c697d0863"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>creatures</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>salmon</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>sea urchin</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>baboons</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>frog</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>chimpanzee</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 1 columns</p>\n",
-              "</div>[5 rows x 1 columns in total]"
-            ],
-            "text/plain": [
-              "    creatures\n",
-              "0      salmon\n",
-              "1  sea urchin\n",
-              "2     baboons\n",
-              "3        frog\n",
-              "4  chimpanzee\n",
-              "\n",
-              "[5 rows x 1 columns]"
-            ]
-          },
-          "execution_count": 22,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df = bpd.DataFrame({\"creatures\": [\"salmon\", \"sea urchin\", \"baboons\", \"frog\", \"chimpanzee\"]})\n",
-        "df"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "5apfIaZMiouX"
-      },
-      "source": [
-        "You want to get the top 2 creatures that are most similar to \"monkey\":"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 159
-        },
-        "id": "CkAuFgPYiouY",
-        "outputId": "723c7604-f53c-43d7-c754-4c91ec198dff"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>creatures</th>\n",
-              "      <th>similarity score</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>baboons</td>\n",
-              "      <td>0.708434</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>chimpanzee</td>\n",
-              "      <td>0.635844</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>2 rows × 2 columns</p>\n",
-              "</div>[2 rows x 2 columns in total]"
-            ],
-            "text/plain": [
-              "    creatures  similarity score\n",
-              "2     baboons          0.708434\n",
-              "4  chimpanzee          0.635844\n",
-              "\n",
-              "[2 rows x 2 columns]"
-            ]
-          },
-          "execution_count": 23,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df.semantics.search(\"creatures\", query=\"monkey\", top_k = 2, model = text_embedding_model, score_column='similarity score')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GDZeVzFTiouY"
-      },
-      "source": [
-        "Note that you are using a text embedding model this time. This model generates embedding vectors for both your query as well as the values in the search space. The operator then uses BigQuery's built-in VECTOR_SEARCH function to find the nearest neighbors of your query.\n",
-        "\n",
-        "In addition, `score_column` is an optional parameter for storing the distances between the results and your query. If not set, the score column won't be attached to the result."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "EXNutIXqiouY"
-      },
-      "source": [
-        "## Semantic Similarity Join"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BhWrhQMjiouY"
-      },
-      "source": [
-        "When you want to perform multiple similarity queries in the same value space, you could use similarity join to simplify your call. For example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "cUc7-8O6iouY"
-      },
-      "outputs": [],
-      "source": [
-        "df1 = bpd.DataFrame({'animal': ['monkey', 'spider', 'salmon', 'giraffe', 'sparrow']})\n",
-        "df2 = bpd.DataFrame({'animal': ['scorpion', 'baboon', 'owl', 'elephant', 'tuna']})"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "k96WerOviouY"
-      },
-      "source": [
-        "In this example, you want to pick the most related animal from `df2` for each value in `df1`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 253
-        },
-        "id": "wPV5EkfpiouY",
-        "outputId": "4be1211d-0353-4b94-8c27-ebd568e8e104"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>animal</th>\n",
-              "      <th>animal_1</th>\n",
-              "      <th>distance</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>monkey</td>\n",
-              "      <td>baboon</td>\n",
-              "      <td>0.620521</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>spider</td>\n",
-              "      <td>scorpion</td>\n",
-              "      <td>0.728024</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>salmon</td>\n",
-              "      <td>tuna</td>\n",
-              "      <td>0.782141</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>giraffe</td>\n",
-              "      <td>elephant</td>\n",
-              "      <td>0.7135</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>sparrow</td>\n",
-              "      <td>owl</td>\n",
-              "      <td>0.810864</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 3 columns</p>\n",
-              "</div>[5 rows x 3 columns in total]"
-            ],
-            "text/plain": [
-              "    animal  animal_1  distance\n",
-              "0   monkey    baboon  0.620521\n",
-              "1   spider  scorpion  0.728024\n",
-              "2   salmon      tuna  0.782141\n",
-              "3  giraffe  elephant    0.7135\n",
-              "4  sparrow       owl  0.810864\n",
-              "\n",
-              "[5 rows x 3 columns]"
-            ]
-          },
-          "execution_count": 25,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df1.semantics.sim_join(df2, left_on='animal', right_on='animal', top_k=1, model=text_embedding_model, score_column='distance')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GplzD7v0iouY"
-      },
-      "source": [
-        "!! **Important** Like semantic join, this operator can also be very expensive. To guard against unexpected processing of large dataset, use the `bigframes.options.compute.sem_ops_confirmation_threshold` option to specify a threshold."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uG6FyMH_iouY"
-      },
-      "source": [
-        "## Semantic Cluster"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uIh3ViNciouY"
-      },
-      "source": [
-        "Semantic Cluster group similar values together. For example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "jyQ_aT9qiouY"
-      },
-      "outputs": [],
-      "source": [
-        "df = bpd.DataFrame({'Product': ['Smartphone', 'Laptop', 'Coffee Maker', 'T-shirt', 'Jeans']})"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "K3IMIFrtiouY"
-      },
-      "source": [
-        "You want to cluster these products into 3 groups:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 253
-        },
-        "id": "0Tc0DqXJiouY",
-        "outputId": "1c8b6e28-713c-4666-e623-3b2c42c50b30"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>Product</th>\n",
-              "      <th>Cluster ID</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>Smartphone</td>\n",
-              "      <td>1</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>Laptop</td>\n",
-              "      <td>1</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>Coffee Maker</td>\n",
-              "      <td>1</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>T-shirt</td>\n",
-              "      <td>1</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>Jeans</td>\n",
-              "      <td>1</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 2 columns</p>\n",
-              "</div>[5 rows x 2 columns in total]"
-            ],
-            "text/plain": [
-              "        Product  Cluster ID\n",
-              "0    Smartphone           1\n",
-              "1        Laptop           1\n",
-              "2  Coffee Maker           1\n",
-              "3       T-shirt           1\n",
-              "4         Jeans           1\n",
-              "\n",
-              "[5 rows x 2 columns]"
-            ]
-          },
-          "execution_count": 27,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df.semantics.cluster_by(column='Product', output_column='Cluster ID', model=text_embedding_model, n_clusters=3)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "zWIzYX3niouY"
-      },
-      "source": [
-        "This operator uses the the embedding model to generate vectors for each value, and then the KMeans algorithm for clustering."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "hgj8GoQhiouY"
-      },
-      "source": [
-        "# Performance Analyses"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "EZomL0BciouY"
-      },
-      "source": [
-        "In this section, you will use BigQuery's public data of hacker news to perform some heavy work. We recommend you to check the code without executing them in order to save your time and money. The execution results are attached after each cell for your reference.\n",
-        "\n",
-        "First, load 3k rows from the table:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 880
-        },
-        "id": "wRR0SrcSiouY",
-        "outputId": "3b25f3a3-09c7-4396-9107-4aa4cdb4b963"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>title</th>\n",
-              "      <th>text</th>\n",
-              "      <th>by</th>\n",
-              "      <th>score</th>\n",
-              "      <th>timestamp</th>\n",
-              "      <th>type</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Well, most people aren&amp;#x27;t alcoholics, so I...</td>\n",
-              "      <td>slipframe</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-06-26 02:37:56+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>No, you don&amp;#x27;t really &lt;i&gt;need&lt;/i&gt; a smartp...</td>\n",
-              "      <td>vetinari</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-04-19 15:56:34+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>It&amp;#x27;s for the late Paul Allen RIP. Should&amp;...</td>\n",
-              "      <td>lsr_ssri</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2018-10-16 01:07:55+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Yup they are dangerous. Be careful Donald Trump.</td>\n",
-              "      <td>Sven7</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2015-08-10 16:05:54+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Sure, it&amp;#x27;s totally reasonable. Just point...</td>\n",
-              "      <td>nicoburns</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2020-10-05 11:20:51+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>5</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I wonder how long before special forces start ...</td>\n",
-              "      <td>autisticcurio</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2020-09-01 15:38:50+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>6</th>\n",
-              "      <td>The Impending NY Tech Apocalypse: Here's What ...</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>gaoprea</td>\n",
-              "      <td>3</td>\n",
-              "      <td>2011-09-27 22:43:27+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>7</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Where would you relocate to? I'm assuming that...</td>\n",
-              "      <td>pavel_lishin</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2011-09-16 19:02:01+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>8</th>\n",
-              "      <td>Eureca beta is live. A place for your business...</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>ricardos</td>\n",
-              "      <td>1</td>\n",
-              "      <td>2012-10-15 13:09:32+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>9</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
-              "      <td>archiewood</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-04-21 16:45:13+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>10</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I guess I don’t see the relevance. Vegans eat ...</td>\n",
-              "      <td>stevula</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-01-19 20:05:54+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>11</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I remember watching the American news media go...</td>\n",
-              "      <td>fareesh</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2019-06-17 19:49:17+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>12</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>This article is incorrectly using the current ...</td>\n",
-              "      <td>stale2002</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2018-03-18 18:57:21+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>13</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>In the firm I made my internship, we have to u...</td>\n",
-              "      <td>iserlohnmage</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2019-10-22 10:41:01+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>14</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>The main reason it requires unsafe is for memo...</td>\n",
-              "      <td>comex</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2017-05-05 20:45:37+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>15</th>\n",
-              "      <td>Discord vs. IRC Rough Notes</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>todsacerdoti</td>\n",
-              "      <td>48</td>\n",
-              "      <td>2024-07-12 18:39:52+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>16</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>you have to auth again when you use apple pay.</td>\n",
-              "      <td>empath75</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2017-09-12 18:58:20+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>17</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>It goes consumer grade, automotive, military, ...</td>\n",
-              "      <td>moftz</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-04-13 01:24:03+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>18</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I don&amp;#x27;t have a link handy but the differe...</td>\n",
-              "      <td>KennyBlanken</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2022-05-13 16:08:38+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>19</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>&amp;gt; I don&amp;#x27;t think the use case you menti...</td>\n",
-              "      <td>colanderman</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2017-09-28 05:16:06+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>20</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I think you need to watch it again, because yo...</td>\n",
-              "      <td>vladimirralev</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2018-12-07 11:25:52+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>21</th>\n",
-              "      <td>Oh dear: new Yahoo anti-spoofing measures brea...</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>joshreads</td>\n",
-              "      <td>1</td>\n",
-              "      <td>2014-04-08 13:29:50+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>22</th>\n",
-              "      <td>How Much Warmer Was Your City in 2016?</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>smb06</td>\n",
-              "      <td>1</td>\n",
-              "      <td>2017-02-16 23:26:34+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>23</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Except that they clearly never tried to incent...</td>\n",
-              "      <td>aenis</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2022-01-31 17:08:57+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>24</th>\n",
-              "      <td>Working Best at Coffee Shops</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>GiraffeNecktie</td>\n",
-              "      <td>249</td>\n",
-              "      <td>2011-04-19 14:25:17+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>25 rows × 6 columns</p>\n",
-              "</div>[3000 rows x 6 columns in total]"
-            ],
-            "text/plain": [
-              "                                                title  \\\n",
-              "0                                                <NA>   \n",
-              "1                                                <NA>   \n",
-              "2                                                <NA>   \n",
-              "3                                                <NA>   \n",
-              "4                                                <NA>   \n",
-              "5                                                <NA>   \n",
-              "6   The Impending NY Tech Apocalypse: Here's What ...   \n",
-              "7                                                <NA>   \n",
-              "8   Eureca beta is live. A place for your business...   \n",
-              "9                                                <NA>   \n",
-              "10                                               <NA>   \n",
-              "11                                               <NA>   \n",
-              "12                                               <NA>   \n",
-              "13                                               <NA>   \n",
-              "14                                               <NA>   \n",
-              "15                        Discord vs. IRC Rough Notes   \n",
-              "16                                               <NA>   \n",
-              "17                                               <NA>   \n",
-              "18                                               <NA>   \n",
-              "19                                               <NA>   \n",
-              "20                                               <NA>   \n",
-              "21  Oh dear: new Yahoo anti-spoofing measures brea...   \n",
-              "22             How Much Warmer Was Your City in 2016?   \n",
-              "23                                               <NA>   \n",
-              "24                       Working Best at Coffee Shops   \n",
-              "\n",
-              "                                                 text              by  score  \\\n",
-              "0   Well, most people aren&#x27;t alcoholics, so I...       slipframe   <NA>   \n",
-              "1   No, you don&#x27;t really <i>need</i> a smartp...        vetinari   <NA>   \n",
-              "2   It&#x27;s for the late Paul Allen RIP. Should&...        lsr_ssri   <NA>   \n",
-              "3    Yup they are dangerous. Be careful Donald Trump.           Sven7   <NA>   \n",
-              "4   Sure, it&#x27;s totally reasonable. Just point...       nicoburns   <NA>   \n",
-              "5   I wonder how long before special forces start ...   autisticcurio   <NA>   \n",
-              "6                                                <NA>         gaoprea      3   \n",
-              "7   Where would you relocate to? I'm assuming that...    pavel_lishin   <NA>   \n",
-              "8                                                <NA>        ricardos      1   \n",
-              "9   It doesn’t work on Safari, and WebKit based br...      archiewood   <NA>   \n",
-              "10  I guess I don’t see the relevance. Vegans eat ...         stevula   <NA>   \n",
-              "11  I remember watching the American news media go...         fareesh   <NA>   \n",
-              "12  This article is incorrectly using the current ...       stale2002   <NA>   \n",
-              "13  In the firm I made my internship, we have to u...    iserlohnmage   <NA>   \n",
-              "14  The main reason it requires unsafe is for memo...           comex   <NA>   \n",
-              "15                                               <NA>    todsacerdoti     48   \n",
-              "16     you have to auth again when you use apple pay.        empath75   <NA>   \n",
-              "17  It goes consumer grade, automotive, military, ...           moftz   <NA>   \n",
-              "18  I don&#x27;t have a link handy but the differe...    KennyBlanken   <NA>   \n",
-              "19  &gt; I don&#x27;t think the use case you menti...     colanderman   <NA>   \n",
-              "20  I think you need to watch it again, because yo...   vladimirralev   <NA>   \n",
-              "21                                               <NA>       joshreads      1   \n",
-              "22                                               <NA>           smb06      1   \n",
-              "23  Except that they clearly never tried to incent...           aenis   <NA>   \n",
-              "24                                               <NA>  GiraffeNecktie    249   \n",
-              "\n",
-              "                    timestamp     type  \n",
-              "0   2021-06-26 02:37:56+00:00  comment  \n",
-              "1   2023-04-19 15:56:34+00:00  comment  \n",
-              "2   2018-10-16 01:07:55+00:00  comment  \n",
-              "3   2015-08-10 16:05:54+00:00  comment  \n",
-              "4   2020-10-05 11:20:51+00:00  comment  \n",
-              "5   2020-09-01 15:38:50+00:00  comment  \n",
-              "6   2011-09-27 22:43:27+00:00    story  \n",
-              "7   2011-09-16 19:02:01+00:00  comment  \n",
-              "8   2012-10-15 13:09:32+00:00    story  \n",
-              "9   2023-04-21 16:45:13+00:00  comment  \n",
-              "10  2023-01-19 20:05:54+00:00  comment  \n",
-              "11  2019-06-17 19:49:17+00:00  comment  \n",
-              "12  2018-03-18 18:57:21+00:00  comment  \n",
-              "13  2019-10-22 10:41:01+00:00  comment  \n",
-              "14  2017-05-05 20:45:37+00:00  comment  \n",
-              "15  2024-07-12 18:39:52+00:00    story  \n",
-              "16  2017-09-12 18:58:20+00:00  comment  \n",
-              "17  2021-04-13 01:24:03+00:00  comment  \n",
-              "18  2022-05-13 16:08:38+00:00  comment  \n",
-              "19  2017-09-28 05:16:06+00:00  comment  \n",
-              "20  2018-12-07 11:25:52+00:00  comment  \n",
-              "21  2014-04-08 13:29:50+00:00    story  \n",
-              "22  2017-02-16 23:26:34+00:00    story  \n",
-              "23  2022-01-31 17:08:57+00:00  comment  \n",
-              "24  2011-04-19 14:25:17+00:00    story  \n",
-              "...\n",
-              "\n",
-              "[3000 rows x 6 columns]"
-            ]
-          },
-          "execution_count": 9,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "hacker_news = bpd.read_gbq(\"bigquery-public-data.hacker_news.full\")[['title', 'text', 'by', 'score', 'timestamp', 'type']].head(3000)\n",
-        "hacker_news"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3e94DPOdiouY"
-      },
-      "source": [
-        "Then, keep only the rows that have text content:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "mQl8hc1biouY",
-        "outputId": "2b4ffa85-9d95-4a20-9040-0420c67da2d4"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "2556"
-            ]
-          },
-          "execution_count": 10,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "hacker_news_with_texts = hacker_news[hacker_news['text'].isnull() == False]\n",
-        "len(hacker_news_with_texts)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JWalDtLDiouZ"
-      },
-      "source": [
-        "You can get an idea of the input token length by calculating the average string length."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "PZeg4LCUiouZ",
-        "outputId": "05b67cac-6b3d-42ef-d6d6-b578a9734f4c"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "390.05125195618155"
-            ]
-          },
-          "execution_count": 11,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "hacker_news_with_texts['text'].str.len().mean()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "2IXqskHHiouZ"
-      },
-      "source": [
-        "**Optional**: You can raise the confirmation threshold for a smoother experience."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "EpjXQ4FViouZ"
-      },
-      "outputs": [],
-      "source": [
-        "if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
-        "    bigframes.options.compute.semantic_ops_confirmation_threshold = 5000"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "SYFB-X1RiouZ"
-      },
-      "source": [
-        "Now it's LLM's turn. You want to keep only the rows whose texts are talking about iPhone. This will take several minutes to finish."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 253
-        },
-        "id": "rditQlmoiouZ",
-        "outputId": "2b44dcbf-2ef5-4119-ca05-9b082db9c0c1"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>title</th>\n",
-              "      <th>text</th>\n",
-              "      <th>by</th>\n",
-              "      <th>score</th>\n",
-              "      <th>timestamp</th>\n",
-              "      <th>type</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>9</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
-              "      <td>archiewood</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-04-21 16:45:13+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>420</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Well last time I got angry down votes for sayi...</td>\n",
-              "      <td>drieddust</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-01-11 19:27:27+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>815</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>New iPhone should be announced on September. L...</td>\n",
-              "      <td>meerita</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2019-07-30 20:54:42+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1516</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Why would this take a week? i(phone)OS was ori...</td>\n",
-              "      <td>TheOtherHobbes</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-06-08 09:25:24+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1563</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>&amp;gt;or because Apple drama brings many clicks?...</td>\n",
-              "      <td>weberer</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2022-09-05 13:16:02+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 6 columns</p>\n",
-              "</div>[5 rows x 6 columns in total]"
-            ],
-            "text/plain": [
-              "     title                                               text              by  \\\n",
-              "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
-              "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
-              "815   <NA>  New iPhone should be announced on September. L...         meerita   \n",
-              "1516  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
-              "1563  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
-              "\n",
-              "      score                  timestamp     type  \n",
-              "9      <NA>  2023-04-21 16:45:13+00:00  comment  \n",
-              "420    <NA>  2021-01-11 19:27:27+00:00  comment  \n",
-              "815    <NA>  2019-07-30 20:54:42+00:00  comment  \n",
-              "1516   <NA>  2021-06-08 09:25:24+00:00  comment  \n",
-              "1563   <NA>  2022-09-05 13:16:02+00:00  comment  \n",
-              "\n",
-              "[5 rows x 6 columns]"
-            ]
-          },
-          "execution_count": 13,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "iphone_comments = hacker_news_with_texts.semantics.filter(\"The {text} is mainly focused on iPhone\", gemini_model)\n",
-        "iphone_comments"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yl24sJFIiouZ"
-      },
+      "metadata": {},
       "source": [
-        "The performance of the semantic operators depends on the length of your input as well as your quota. Here are our benchmarks for running the previous operation over data of different sizes. Here are the estimates supposing your quota is [the default 200 requests per minute](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas):\n",
+        "Semantic Operators have been deprecated since version 1.42.0. Please use AI Operators instead.\n",
         "\n",
-        "* 800 Rows -> ~4m\n",
-        "* 2550 Rows -> ~13m\n",
-        "* 8500 Rows -> ~40m\n",
-        "\n",
-        "These numbers can give you a general idea of how fast the operators run."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "eo4nfISuiouZ"
-      },
-      "source": [
-        "Now, use LLM to summarize the sentiments towards iPhone:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 253
-        },
-        "id": "IlKBrNxUiouZ",
-        "outputId": "818d01e4-1cdf-42a2-9e02-61c4736a8905"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>title</th>\n",
-              "      <th>text</th>\n",
-              "      <th>by</th>\n",
-              "      <th>score</th>\n",
-              "      <th>timestamp</th>\n",
-              "      <th>type</th>\n",
-              "      <th>sentiment</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>9</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
-              "      <td>archiewood</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-04-21 16:45:13+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "      <td>Frustrated, but hopeful.</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>420</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Well last time I got angry down votes for sayi...</td>\n",
-              "      <td>drieddust</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-01-11 19:27:27+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "      <td>Frustrated and angry.</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>815</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>New iPhone should be announced on September. L...</td>\n",
-              "      <td>meerita</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2019-07-30 20:54:42+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "      <td>Excited anticipation.</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1516</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Why would this take a week? i(phone)OS was ori...</td>\n",
-              "      <td>TheOtherHobbes</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-06-08 09:25:24+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "      <td>Frustrated, critical, obvious.</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1563</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>&amp;gt;or because Apple drama brings many clicks?...</td>\n",
-              "      <td>weberer</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2022-09-05 13:16:02+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "      <td>Negative, clickbait, Apple.</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 7 columns</p>\n",
-              "</div>[5 rows x 7 columns in total]"
-            ],
-            "text/plain": [
-              "     title                                               text              by  \\\n",
-              "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
-              "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
-              "815   <NA>  New iPhone should be announced on September. L...         meerita   \n",
-              "1516  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
-              "1563  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
-              "\n",
-              "      score                  timestamp     type  \\\n",
-              "9      <NA>  2023-04-21 16:45:13+00:00  comment   \n",
-              "420    <NA>  2021-01-11 19:27:27+00:00  comment   \n",
-              "815    <NA>  2019-07-30 20:54:42+00:00  comment   \n",
-              "1516   <NA>  2021-06-08 09:25:24+00:00  comment   \n",
-              "1563   <NA>  2022-09-05 13:16:02+00:00  comment   \n",
-              "\n",
-              "                             sentiment  \n",
-              "9           Frustrated, but hopeful. \n",
-              "  \n",
-              "420            Frustrated and angry. \n",
-              "  \n",
-              "815            Excited anticipation. \n",
-              "  \n",
-              "1516  Frustrated, critical, obvious. \n",
-              "  \n",
-              "1563     Negative, clickbait, Apple. \n",
-              "  \n",
-              "\n",
-              "[5 rows x 7 columns]"
-            ]
-          },
-          "execution_count": 14,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "iphone_comments.semantics.map(\"Summarize the sentiment of the {text}. Your answer should have at most 3 words\", output_column=\"sentiment\", model=gemini_model)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "y7_16T2xiouZ"
-      },
-      "source": [
-        "Here is another example: count the number of rows whose authors have animals in their names."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 880
-        },
-        "id": "CbGwc_uXiouZ",
-        "outputId": "138acca0-7fb9-495a-e797-0d42495d65e6"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>title</th>\n",
-              "      <th>text</th>\n",
-              "      <th>by</th>\n",
-              "      <th>score</th>\n",
-              "      <th>timestamp</th>\n",
-              "      <th>type</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Well, most people aren&amp;#x27;t alcoholics, so I...</td>\n",
-              "      <td>slipframe</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-06-26 02:37:56+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>No, you don&amp;#x27;t really &lt;i&gt;need&lt;/i&gt; a smartp...</td>\n",
-              "      <td>vetinari</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-04-19 15:56:34+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>It&amp;#x27;s for the late Paul Allen RIP. Should&amp;...</td>\n",
-              "      <td>lsr_ssri</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2018-10-16 01:07:55+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Yup they are dangerous. Be careful Donald Trump.</td>\n",
-              "      <td>Sven7</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2015-08-10 16:05:54+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Sure, it&amp;#x27;s totally reasonable. Just point...</td>\n",
-              "      <td>nicoburns</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2020-10-05 11:20:51+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>5</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I wonder how long before special forces start ...</td>\n",
-              "      <td>autisticcurio</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2020-09-01 15:38:50+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>6</th>\n",
-              "      <td>The Impending NY Tech Apocalypse: Here's What ...</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>gaoprea</td>\n",
-              "      <td>3</td>\n",
-              "      <td>2011-09-27 22:43:27+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>7</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Where would you relocate to? I'm assuming that...</td>\n",
-              "      <td>pavel_lishin</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2011-09-16 19:02:01+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>8</th>\n",
-              "      <td>Eureca beta is live. A place for your business...</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>ricardos</td>\n",
-              "      <td>1</td>\n",
-              "      <td>2012-10-15 13:09:32+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>9</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
-              "      <td>archiewood</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-04-21 16:45:13+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>10</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I guess I don’t see the relevance. Vegans eat ...</td>\n",
-              "      <td>stevula</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-01-19 20:05:54+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>11</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I remember watching the American news media go...</td>\n",
-              "      <td>fareesh</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2019-06-17 19:49:17+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>12</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>This article is incorrectly using the current ...</td>\n",
-              "      <td>stale2002</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2018-03-18 18:57:21+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>13</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>In the firm I made my internship, we have to u...</td>\n",
-              "      <td>iserlohnmage</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2019-10-22 10:41:01+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>14</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>The main reason it requires unsafe is for memo...</td>\n",
-              "      <td>comex</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2017-05-05 20:45:37+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>15</th>\n",
-              "      <td>Discord vs. IRC Rough Notes</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>todsacerdoti</td>\n",
-              "      <td>48</td>\n",
-              "      <td>2024-07-12 18:39:52+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>16</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>you have to auth again when you use apple pay.</td>\n",
-              "      <td>empath75</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2017-09-12 18:58:20+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>17</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>It goes consumer grade, automotive, military, ...</td>\n",
-              "      <td>moftz</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-04-13 01:24:03+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>18</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I don&amp;#x27;t have a link handy but the differe...</td>\n",
-              "      <td>KennyBlanken</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2022-05-13 16:08:38+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>19</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>&amp;gt; I don&amp;#x27;t think the use case you menti...</td>\n",
-              "      <td>colanderman</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2017-09-28 05:16:06+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>20</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I think you need to watch it again, because yo...</td>\n",
-              "      <td>vladimirralev</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2018-12-07 11:25:52+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>21</th>\n",
-              "      <td>Oh dear: new Yahoo anti-spoofing measures brea...</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>joshreads</td>\n",
-              "      <td>1</td>\n",
-              "      <td>2014-04-08 13:29:50+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>22</th>\n",
-              "      <td>How Much Warmer Was Your City in 2016?</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>smb06</td>\n",
-              "      <td>1</td>\n",
-              "      <td>2017-02-16 23:26:34+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>23</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Except that they clearly never tried to incent...</td>\n",
-              "      <td>aenis</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2022-01-31 17:08:57+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>24</th>\n",
-              "      <td>Working Best at Coffee Shops</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>GiraffeNecktie</td>\n",
-              "      <td>249</td>\n",
-              "      <td>2011-04-19 14:25:17+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>25 rows × 6 columns</p>\n",
-              "</div>[3000 rows x 6 columns in total]"
-            ],
-            "text/plain": [
-              "                                                title  \\\n",
-              "0                                                <NA>   \n",
-              "1                                                <NA>   \n",
-              "2                                                <NA>   \n",
-              "3                                                <NA>   \n",
-              "4                                                <NA>   \n",
-              "5                                                <NA>   \n",
-              "6   The Impending NY Tech Apocalypse: Here's What ...   \n",
-              "7                                                <NA>   \n",
-              "8   Eureca beta is live. A place for your business...   \n",
-              "9                                                <NA>   \n",
-              "10                                               <NA>   \n",
-              "11                                               <NA>   \n",
-              "12                                               <NA>   \n",
-              "13                                               <NA>   \n",
-              "14                                               <NA>   \n",
-              "15                        Discord vs. IRC Rough Notes   \n",
-              "16                                               <NA>   \n",
-              "17                                               <NA>   \n",
-              "18                                               <NA>   \n",
-              "19                                               <NA>   \n",
-              "20                                               <NA>   \n",
-              "21  Oh dear: new Yahoo anti-spoofing measures brea...   \n",
-              "22             How Much Warmer Was Your City in 2016?   \n",
-              "23                                               <NA>   \n",
-              "24                       Working Best at Coffee Shops   \n",
-              "\n",
-              "                                                 text              by  score  \\\n",
-              "0   Well, most people aren&#x27;t alcoholics, so I...       slipframe   <NA>   \n",
-              "1   No, you don&#x27;t really <i>need</i> a smartp...        vetinari   <NA>   \n",
-              "2   It&#x27;s for the late Paul Allen RIP. Should&...        lsr_ssri   <NA>   \n",
-              "3    Yup they are dangerous. Be careful Donald Trump.           Sven7   <NA>   \n",
-              "4   Sure, it&#x27;s totally reasonable. Just point...       nicoburns   <NA>   \n",
-              "5   I wonder how long before special forces start ...   autisticcurio   <NA>   \n",
-              "6                                                <NA>         gaoprea      3   \n",
-              "7   Where would you relocate to? I'm assuming that...    pavel_lishin   <NA>   \n",
-              "8                                                <NA>        ricardos      1   \n",
-              "9   It doesn’t work on Safari, and WebKit based br...      archiewood   <NA>   \n",
-              "10  I guess I don’t see the relevance. Vegans eat ...         stevula   <NA>   \n",
-              "11  I remember watching the American news media go...         fareesh   <NA>   \n",
-              "12  This article is incorrectly using the current ...       stale2002   <NA>   \n",
-              "13  In the firm I made my internship, we have to u...    iserlohnmage   <NA>   \n",
-              "14  The main reason it requires unsafe is for memo...           comex   <NA>   \n",
-              "15                                               <NA>    todsacerdoti     48   \n",
-              "16     you have to auth again when you use apple pay.        empath75   <NA>   \n",
-              "17  It goes consumer grade, automotive, military, ...           moftz   <NA>   \n",
-              "18  I don&#x27;t have a link handy but the differe...    KennyBlanken   <NA>   \n",
-              "19  &gt; I don&#x27;t think the use case you menti...     colanderman   <NA>   \n",
-              "20  I think you need to watch it again, because yo...   vladimirralev   <NA>   \n",
-              "21                                               <NA>       joshreads      1   \n",
-              "22                                               <NA>           smb06      1   \n",
-              "23  Except that they clearly never tried to incent...           aenis   <NA>   \n",
-              "24                                               <NA>  GiraffeNecktie    249   \n",
-              "\n",
-              "                    timestamp     type  \n",
-              "0   2021-06-26 02:37:56+00:00  comment  \n",
-              "1   2023-04-19 15:56:34+00:00  comment  \n",
-              "2   2018-10-16 01:07:55+00:00  comment  \n",
-              "3   2015-08-10 16:05:54+00:00  comment  \n",
-              "4   2020-10-05 11:20:51+00:00  comment  \n",
-              "5   2020-09-01 15:38:50+00:00  comment  \n",
-              "6   2011-09-27 22:43:27+00:00    story  \n",
-              "7   2011-09-16 19:02:01+00:00  comment  \n",
-              "8   2012-10-15 13:09:32+00:00    story  \n",
-              "9   2023-04-21 16:45:13+00:00  comment  \n",
-              "10  2023-01-19 20:05:54+00:00  comment  \n",
-              "11  2019-06-17 19:49:17+00:00  comment  \n",
-              "12  2018-03-18 18:57:21+00:00  comment  \n",
-              "13  2019-10-22 10:41:01+00:00  comment  \n",
-              "14  2017-05-05 20:45:37+00:00  comment  \n",
-              "15  2024-07-12 18:39:52+00:00    story  \n",
-              "16  2017-09-12 18:58:20+00:00  comment  \n",
-              "17  2021-04-13 01:24:03+00:00  comment  \n",
-              "18  2022-05-13 16:08:38+00:00  comment  \n",
-              "19  2017-09-28 05:16:06+00:00  comment  \n",
-              "20  2018-12-07 11:25:52+00:00  comment  \n",
-              "21  2014-04-08 13:29:50+00:00    story  \n",
-              "22  2017-02-16 23:26:34+00:00    story  \n",
-              "23  2022-01-31 17:08:57+00:00  comment  \n",
-              "24  2011-04-19 14:25:17+00:00    story  \n",
-              "...\n",
-              "\n",
-              "[3000 rows x 6 columns]"
-            ]
-          },
-          "execution_count": 7,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "hacker_news = bpd.read_gbq(\"bigquery-public-data.hacker_news.full\")[['title', 'text', 'by', 'score', 'timestamp', 'type']].head(3000)\n",
-        "hacker_news"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 880
-        },
-        "id": "9dzU8SNziouZ",
-        "outputId": "da8815c1-c411-4afc-d1ca-5e44c75b5b48"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>title</th>\n",
-              "      <th>text</th>\n",
-              "      <th>by</th>\n",
-              "      <th>score</th>\n",
-              "      <th>timestamp</th>\n",
-              "      <th>type</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>24</th>\n",
-              "      <td>Working Best at Coffee Shops</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>GiraffeNecktie</td>\n",
-              "      <td>249</td>\n",
-              "      <td>2011-04-19 14:25:17+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>98</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>i resisted switching to chrome for months beca...</td>\n",
-              "      <td>catshirt</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2011-04-06 08:02:24+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>137</th>\n",
-              "      <td>FDA reverses marketing ban on Juul e-cigarettes</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>anigbrowl</td>\n",
-              "      <td>2</td>\n",
-              "      <td>2024-06-06 16:42:40+00:00</td>\n",
-              "      <td>story</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>188</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I think it&amp;#x27;s more than hazing. It may be ...</td>\n",
-              "      <td>bayesianhorse</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2015-06-18 16:42:53+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>209</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I like the idea of moving that arrow the way h...</td>\n",
-              "      <td>rattray</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2015-06-08 02:15:30+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>228</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I don&amp;#x27;t understand why a beginner would s...</td>\n",
-              "      <td>wolco</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2019-02-03 14:35:43+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>290</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I leaerned more with one minute of this than a...</td>\n",
-              "      <td>agumonkey</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2016-07-16 06:19:39+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>303</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I've suggested a &lt;i&gt;rationale&lt;/i&gt; for the tabo...</td>\n",
-              "      <td>mechanical_fish</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2008-12-17 04:42:02+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>312</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Do you have any reference for this?&lt;p&gt;I&amp;#x27;m...</td>\n",
-              "      <td>banashark</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-11-13 19:57:00+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>322</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Default search scope is an option in the Finde...</td>\n",
-              "      <td>kitsunesoba</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2017-08-13 17:15:19+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>391</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Orthogonality and biology aren&amp;#x27;t friends.</td>\n",
-              "      <td>agumonkey</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2016-04-24 16:33:41+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>396</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>I chose some random physics book that was good...</td>\n",
-              "      <td>prawn</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2011-03-27 22:29:51+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>424</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Seeing this get huge on Twitter. It&amp;#x27;s the...</td>\n",
-              "      <td>shenanigoat</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2016-01-09 03:04:22+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>428</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Looking through the comments there are a numbe...</td>\n",
-              "      <td>moomin</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2024-10-01 14:37:04+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>429</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Legacy media is a tough business. GBTC is payi...</td>\n",
-              "      <td>arcticbull</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-04-16 16:30:33+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>436</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Same thing if you sell unsafe food, yet we hav...</td>\n",
-              "      <td>jabradoodle</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-08-03 20:47:52+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>438</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>There was briefly a thing called HSCSD (&amp;quot;...</td>\n",
-              "      <td>LeoPanthera</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2019-02-11 19:49:29+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>446</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>&amp;gt; This article is a bit comical to read and...</td>\n",
-              "      <td>lapcat</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-01-02 16:00:49+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>453</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Large positions are most likely sold off in sm...</td>\n",
-              "      <td>meowkit</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-01-27 23:22:48+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>507</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>A US-based VPN (or really any VPN) is only goi...</td>\n",
-              "      <td>RandomBacon</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2019-04-05 00:58:58+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>543</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>&lt;a href=\"https:&amp;#x2F;&amp;#x2F;codeberg.org&amp;#x2F;A...</td>\n",
-              "      <td>ElectronBadger</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2023-12-13 08:13:15+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>565</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>It’s much harder for people without hands to w...</td>\n",
-              "      <td>Aeolun</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2024-05-03 11:58:13+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>612</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>So by using ADMIN_SL0T instead was it just set...</td>\n",
-              "      <td>minitoar</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2021-03-05 16:07:56+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>660</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>Outstanding!</td>\n",
-              "      <td>cafard</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2022-06-09 09:51:54+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>673</th>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>On the other hand, something can be said for &amp;...</td>\n",
-              "      <td>babby</td>\n",
-              "      <td>&lt;NA&gt;</td>\n",
-              "      <td>2013-08-12 00:31:02+00:00</td>\n",
-              "      <td>comment</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>25 rows × 6 columns</p>\n",
-              "</div>[123 rows x 6 columns in total]"
-            ],
-            "text/plain": [
-              "                                               title  \\\n",
-              "24                      Working Best at Coffee Shops   \n",
-              "98                                              <NA>   \n",
-              "137  FDA reverses marketing ban on Juul e-cigarettes   \n",
-              "188                                             <NA>   \n",
-              "209                                             <NA>   \n",
-              "228                                             <NA>   \n",
-              "290                                             <NA>   \n",
-              "303                                             <NA>   \n",
-              "312                                             <NA>   \n",
-              "322                                             <NA>   \n",
-              "391                                             <NA>   \n",
-              "396                                             <NA>   \n",
-              "424                                             <NA>   \n",
-              "428                                             <NA>   \n",
-              "429                                             <NA>   \n",
-              "436                                             <NA>   \n",
-              "438                                             <NA>   \n",
-              "446                                             <NA>   \n",
-              "453                                             <NA>   \n",
-              "507                                             <NA>   \n",
-              "543                                             <NA>   \n",
-              "565                                             <NA>   \n",
-              "612                                             <NA>   \n",
-              "660                                             <NA>   \n",
-              "673                                             <NA>   \n",
-              "\n",
-              "                                                  text               by  \\\n",
-              "24                                                <NA>   GiraffeNecktie   \n",
-              "98   i resisted switching to chrome for months beca...         catshirt   \n",
-              "137                                               <NA>        anigbrowl   \n",
-              "188  I think it&#x27;s more than hazing. It may be ...    bayesianhorse   \n",
-              "209  I like the idea of moving that arrow the way h...          rattray   \n",
-              "228  I don&#x27;t understand why a beginner would s...            wolco   \n",
-              "290  I leaerned more with one minute of this than a...        agumonkey   \n",
-              "303  I've suggested a <i>rationale</i> for the tabo...  mechanical_fish   \n",
-              "312  Do you have any reference for this?<p>I&#x27;m...        banashark   \n",
-              "322  Default search scope is an option in the Finde...      kitsunesoba   \n",
-              "391     Orthogonality and biology aren&#x27;t friends.        agumonkey   \n",
-              "396  I chose some random physics book that was good...            prawn   \n",
-              "424  Seeing this get huge on Twitter. It&#x27;s the...      shenanigoat   \n",
-              "428  Looking through the comments there are a numbe...           moomin   \n",
-              "429  Legacy media is a tough business. GBTC is payi...       arcticbull   \n",
-              "436  Same thing if you sell unsafe food, yet we hav...      jabradoodle   \n",
-              "438  There was briefly a thing called HSCSD (&quot;...      LeoPanthera   \n",
-              "446  &gt; This article is a bit comical to read and...           lapcat   \n",
-              "453  Large positions are most likely sold off in sm...          meowkit   \n",
-              "507  A US-based VPN (or really any VPN) is only goi...      RandomBacon   \n",
-              "543  <a href=\"https:&#x2F;&#x2F;codeberg.org&#x2F;A...   ElectronBadger   \n",
-              "565  It’s much harder for people without hands to w...           Aeolun   \n",
-              "612  So by using ADMIN_SL0T instead was it just set...         minitoar   \n",
-              "660                                       Outstanding!           cafard   \n",
-              "673  On the other hand, something can be said for &...            babby   \n",
-              "\n",
-              "     score                  timestamp     type  \n",
-              "24     249  2011-04-19 14:25:17+00:00    story  \n",
-              "98    <NA>  2011-04-06 08:02:24+00:00  comment  \n",
-              "137      2  2024-06-06 16:42:40+00:00    story  \n",
-              "188   <NA>  2015-06-18 16:42:53+00:00  comment  \n",
-              "209   <NA>  2015-06-08 02:15:30+00:00  comment  \n",
-              "228   <NA>  2019-02-03 14:35:43+00:00  comment  \n",
-              "290   <NA>  2016-07-16 06:19:39+00:00  comment  \n",
-              "303   <NA>  2008-12-17 04:42:02+00:00  comment  \n",
-              "312   <NA>  2023-11-13 19:57:00+00:00  comment  \n",
-              "322   <NA>  2017-08-13 17:15:19+00:00  comment  \n",
-              "391   <NA>  2016-04-24 16:33:41+00:00  comment  \n",
-              "396   <NA>  2011-03-27 22:29:51+00:00  comment  \n",
-              "424   <NA>  2016-01-09 03:04:22+00:00  comment  \n",
-              "428   <NA>  2024-10-01 14:37:04+00:00  comment  \n",
-              "429   <NA>  2021-04-16 16:30:33+00:00  comment  \n",
-              "436   <NA>  2023-08-03 20:47:52+00:00  comment  \n",
-              "438   <NA>  2019-02-11 19:49:29+00:00  comment  \n",
-              "446   <NA>  2023-01-02 16:00:49+00:00  comment  \n",
-              "453   <NA>  2021-01-27 23:22:48+00:00  comment  \n",
-              "507   <NA>  2019-04-05 00:58:58+00:00  comment  \n",
-              "543   <NA>  2023-12-13 08:13:15+00:00  comment  \n",
-              "565   <NA>  2024-05-03 11:58:13+00:00  comment  \n",
-              "612   <NA>  2021-03-05 16:07:56+00:00  comment  \n",
-              "660   <NA>  2022-06-09 09:51:54+00:00  comment  \n",
-              "673   <NA>  2013-08-12 00:31:02+00:00  comment  \n",
-              "...\n",
-              "\n",
-              "[123 rows x 6 columns]"
-            ]
-          },
-          "execution_count": 13,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "hacker_news.semantics.filter(\"{by} contains animal name\", model=gemini_model)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3bpkaspoiouZ"
-      },
-      "source": [
-        "Here are the runtime numbers with 500 requests per minute [raised quota](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas):\n",
-        "* 3000 rows -> ~6m\n",
-        "* 10000 rows -> ~26m"
+        "The tutorial notebook for AI operators is located [here](https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/ai_operators.ipynb)."
       ]
     }
   ],
@@ -3206,7 +53,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.10.15"
+      "version": "3.11.9"
     }
   },
   "nbformat": 4,
diff --git a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
index 88633f8635..788111cfe6 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
@@ -914,8 +914,8 @@
       },
       "outputs": [],
       "source": [
-        "@bf.remote_function([str], str)\n",
-        "def extract_code(text: str):\n",
+        "@bf.remote_function(cloud_function_service_account=\"default\")\n",
+        "def extract_code(text: str) -> str:\n",
         "  try:\n",
         "    res = text[text.find('\\n')+1:text.find('```', 3)]\n",
         "    res = res.replace(\"import pandas as pd\", \"import bigframes.pandas as bf\")\n",
diff --git a/notebooks/generative_ai/bq_dataframes_llm_gemini_2.ipynb b/notebooks/generative_ai/bq_dataframes_llm_gemini_2.ipynb
index d458a0f53b..1a9b568897 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_gemini_2.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_gemini_2.ipynb
@@ -369,7 +369,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.14"
+   "version": "3.10.15"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/getting_started/getting_started_bq_dataframes.ipynb b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
index c5deeef1c5..a8158bcb85 100644
--- a/notebooks/getting_started/getting_started_bq_dataframes.ipynb
+++ b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
@@ -1485,8 +1485,8 @@
       },
       "outputs": [],
       "source": [
-        "@bpd.remote_function([float], str)\n",
-        "def get_bucket(num):\n",
+        "@bpd.remote_function(cloud_function_service_account=\"default\")\n",
+        "def get_bucket(num: float) -> str:\n",
         "  if not num: return \"NA\"\n",
         "  boundary = 4000\n",
         "  return \"at_or_above_4000\" if num >= boundary else \"below_4000\""
diff --git a/notebooks/location/regionalized.ipynb b/notebooks/location/regionalized.ipynb
index 1b138c6a66..066cd18136 100644
--- a/notebooks/location/regionalized.ipynb
+++ b/notebooks/location/regionalized.ipynb
@@ -1475,8 +1475,8 @@
     }
    ],
    "source": [
-    "@bpd.remote_function([float], str, bigquery_connection='bigframes-rf-conn')\n",
-    "def get_bucket(num):\n",
+    "@bpd.remote_function(bigquery_connection='bigframes-rf-conn', cloud_function_service_account=\"default\")\n",
+    "def get_bucket(num: float) -> str:\n",
     "    if not num: return \"NA\"\n",
     "    boundary = 4000\n",
     "    return \"at_or_above_4000\" if num >= boundary else \"below_4000\""
diff --git a/notebooks/ml/bq_dataframes_ml_cross_validation.ipynb b/notebooks/ml/bq_dataframes_ml_cross_validation.ipynb
index 4bfdcc24aa..501bfc88d3 100644
--- a/notebooks/ml/bq_dataframes_ml_cross_validation.ipynb
+++ b/notebooks/ml/bq_dataframes_ml_cross_validation.ipynb
@@ -27,21 +27,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py:3577: UserWarning: Reading cached table from 2024-10-01 22:44:50.650768+00:00 to avoid incompatibilies with previous reads of this table. To read the latest version, set `use_cache=False` or close the current session with Session.close() or bigframes.pandas.close_session().\n",
-      "  exec(code_obj, self.user_global_ns, self.user_ns)\n"
-     ]
+     "data": {
+      "text/html": [
+       "Query job aa2b9845-0e66-4f42-a360-ffe03215caf6 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:aa2b9845-0e66-4f42-a360-ffe03215caf6&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
      "data": {
       "text/html": [
-       "Query job 4c2f2252-687a-47c3-87ad-22db8ad96e2b is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:4c2f2252-687a-47c3-87ad-22db8ad96e2b&page=queryresults\">Open Job</a>"
+       "Query job fe2bc354-672e-4d08-b969-bb2ede299fca is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:fe2bc354-672e-4d08-b969-bb2ede299fca&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -53,7 +57,7 @@
     {
      "data": {
       "text/html": [
-       "Query job a05c7268-8db2-468b-9fb4-0fb5c9534f51 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:a05c7268-8db2-468b-9fb4-0fb5c9534f51&page=queryresults\">Open Job</a>"
+       "Query job 8d16fa20-391f-4917-86fc-1a595dba3fc6 is DONE. 33.6 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8d16fa20-391f-4917-86fc-1a595dba3fc6&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -97,149 +101,317 @@
        "      <th>0</th>\n",
        "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>50.5</td>\n",
-       "      <td>15.9</td>\n",
-       "      <td>225.0</td>\n",
-       "      <td>5400.0</td>\n",
+       "      <td>45.2</td>\n",
+       "      <td>16.4</td>\n",
+       "      <td>223.0</td>\n",
+       "      <td>5950.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>45.1</td>\n",
+       "      <td>46.5</td>\n",
        "      <td>14.5</td>\n",
-       "      <td>215.0</td>\n",
-       "      <td>5000.0</td>\n",
+       "      <td>213.0</td>\n",
+       "      <td>4400.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>41.4</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>202.0</td>\n",
-       "      <td>3875.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>37.7</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>183.0</td>\n",
+       "      <td>3075.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>38.6</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>188.0</td>\n",
-       "      <td>2900.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.4</td>\n",
+       "      <td>15.6</td>\n",
+       "      <td>221.0</td>\n",
+       "      <td>5000.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>46.5</td>\n",
-       "      <td>14.8</td>\n",
-       "      <td>217.0</td>\n",
-       "      <td>5200.0</td>\n",
+       "      <td>46.1</td>\n",
+       "      <td>13.2</td>\n",
+       "      <td>211.0</td>\n",
+       "      <td>4500.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
+       "      <th>5</th>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>43.1</td>\n",
+       "      <td>19.2</td>\n",
+       "      <td>197.0</td>\n",
+       "      <td>3500.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>339</th>\n",
+       "      <th>6</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.2</td>\n",
+       "      <td>15.8</td>\n",
+       "      <td>215.0</td>\n",
+       "      <td>5300.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>38.1</td>\n",
-       "      <td>17.6</td>\n",
+       "      <td>36.2</td>\n",
+       "      <td>17.3</td>\n",
        "      <td>187.0</td>\n",
-       "      <td>3425.0</td>\n",
+       "      <td>3300.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>340</th>\n",
+       "      <th>8</th>\n",
+       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
+       "      <td>Dream</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>18.9</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>4150.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>54.3</td>\n",
+       "      <td>15.7</td>\n",
+       "      <td>231.0</td>\n",
+       "      <td>5650.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>39.5</td>\n",
+       "      <td>17.4</td>\n",
+       "      <td>186.0</td>\n",
+       "      <td>3800.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>36.4</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>2850.0</td>\n",
+       "      <td>42.7</td>\n",
+       "      <td>13.7</td>\n",
+       "      <td>208.0</td>\n",
+       "      <td>3950.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>341</th>\n",
+       "      <th>13</th>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>203.0</td>\n",
+       "      <td>4725.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>48.5</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>219.0</td>\n",
+       "      <td>4850.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>40.9</td>\n",
-       "      <td>16.6</td>\n",
-       "      <td>187.0</td>\n",
-       "      <td>3200.0</td>\n",
+       "      <td>49.6</td>\n",
+       "      <td>18.2</td>\n",
+       "      <td>193.0</td>\n",
+       "      <td>3775.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>50.8</td>\n",
+       "      <td>17.3</td>\n",
+       "      <td>228.0</td>\n",
+       "      <td>5600.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.2</td>\n",
+       "      <td>14.1</td>\n",
+       "      <td>217.0</td>\n",
+       "      <td>4375.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>342</th>\n",
+       "      <th>18</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>41.3</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>195.0</td>\n",
-       "      <td>4400.0</td>\n",
+       "      <td>38.8</td>\n",
+       "      <td>17.2</td>\n",
+       "      <td>180.0</td>\n",
+       "      <td>3800.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>343</th>\n",
+       "      <th>19</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>45.2</td>\n",
-       "      <td>16.6</td>\n",
-       "      <td>191.0</td>\n",
-       "      <td>3250.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>18.8</td>\n",
+       "      <td>203.0</td>\n",
+       "      <td>4100.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>42.9</td>\n",
+       "      <td>13.1</td>\n",
+       "      <td>215.0</td>\n",
+       "      <td>5000.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>50.4</td>\n",
+       "      <td>15.3</td>\n",
+       "      <td>224.0</td>\n",
+       "      <td>5550.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>49.0</td>\n",
+       "      <td>16.1</td>\n",
+       "      <td>216.0</td>\n",
+       "      <td>5550.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>43.4</td>\n",
+       "      <td>14.4</td>\n",
+       "      <td>218.0</td>\n",
+       "      <td>4600.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>15.4</td>\n",
+       "      <td>220.0</td>\n",
+       "      <td>5050.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>47.5</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>4875.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>334 rows × 7 columns</p>\n",
+       "<p>25 rows × 7 columns</p>\n",
        "</div>[334 rows x 7 columns in total]"
       ],
       "text/plain": [
-       "                                       species     island  culmen_length_mm  \\\n",
-       "0            Gentoo penguin (Pygoscelis papua)     Biscoe              50.5   \n",
-       "1            Gentoo penguin (Pygoscelis papua)     Biscoe              45.1   \n",
-       "2          Adelie Penguin (Pygoscelis adeliae)  Torgersen              41.4   \n",
-       "3          Adelie Penguin (Pygoscelis adeliae)  Torgersen              38.6   \n",
-       "4            Gentoo penguin (Pygoscelis papua)     Biscoe              46.5   \n",
-       "..                                         ...        ...               ...   \n",
-       "339        Adelie Penguin (Pygoscelis adeliae)      Dream              38.1   \n",
-       "340        Adelie Penguin (Pygoscelis adeliae)     Biscoe              36.4   \n",
-       "341  Chinstrap penguin (Pygoscelis antarctica)      Dream              40.9   \n",
-       "342        Adelie Penguin (Pygoscelis adeliae)     Biscoe              41.3   \n",
-       "343  Chinstrap penguin (Pygoscelis antarctica)      Dream              45.2   \n",
+       "                                      species     island  culmen_length_mm  \\\n",
+       "0           Gentoo penguin (Pygoscelis papua)     Biscoe              45.2   \n",
+       "1           Gentoo penguin (Pygoscelis papua)     Biscoe              46.5   \n",
+       "2         Adelie Penguin (Pygoscelis adeliae)     Biscoe              37.7   \n",
+       "3           Gentoo penguin (Pygoscelis papua)     Biscoe              46.4   \n",
+       "4           Gentoo penguin (Pygoscelis papua)     Biscoe              46.1   \n",
+       "5         Adelie Penguin (Pygoscelis adeliae)  Torgersen              43.1   \n",
+       "6           Gentoo penguin (Pygoscelis papua)     Biscoe              45.2   \n",
+       "7         Adelie Penguin (Pygoscelis adeliae)      Dream              36.2   \n",
+       "8   Chinstrap penguin (Pygoscelis antarctica)      Dream              46.0   \n",
+       "9           Gentoo penguin (Pygoscelis papua)     Biscoe              54.3   \n",
+       "11        Adelie Penguin (Pygoscelis adeliae)  Torgersen              39.5   \n",
+       "12          Gentoo penguin (Pygoscelis papua)     Biscoe              42.7   \n",
+       "13        Adelie Penguin (Pygoscelis adeliae)     Biscoe              41.0   \n",
+       "14          Gentoo penguin (Pygoscelis papua)     Biscoe              48.5   \n",
+       "15  Chinstrap penguin (Pygoscelis antarctica)      Dream              49.6   \n",
+       "16          Gentoo penguin (Pygoscelis papua)     Biscoe              50.8   \n",
+       "17          Gentoo penguin (Pygoscelis papua)     Biscoe              46.2   \n",
+       "18        Adelie Penguin (Pygoscelis adeliae)     Biscoe              38.8   \n",
+       "19  Chinstrap penguin (Pygoscelis antarctica)      Dream              51.0   \n",
+       "20          Gentoo penguin (Pygoscelis papua)     Biscoe              42.9   \n",
+       "21          Gentoo penguin (Pygoscelis papua)     Biscoe              50.4   \n",
+       "22          Gentoo penguin (Pygoscelis papua)     Biscoe              49.0   \n",
+       "23          Gentoo penguin (Pygoscelis papua)     Biscoe              43.4   \n",
+       "24          Gentoo penguin (Pygoscelis papua)     Biscoe              45.0   \n",
+       "25          Gentoo penguin (Pygoscelis papua)     Biscoe              47.5   \n",
        "\n",
-       "     culmen_depth_mm  flipper_length_mm  body_mass_g     sex  \n",
-       "0               15.9              225.0       5400.0    MALE  \n",
-       "1               14.5              215.0       5000.0  FEMALE  \n",
-       "2               18.5              202.0       3875.0    MALE  \n",
-       "3               17.0              188.0       2900.0  FEMALE  \n",
-       "4               14.8              217.0       5200.0  FEMALE  \n",
-       "..               ...                ...          ...     ...  \n",
-       "339             17.6              187.0       3425.0  FEMALE  \n",
-       "340             17.1              184.0       2850.0  FEMALE  \n",
-       "341             16.6              187.0       3200.0  FEMALE  \n",
-       "342             21.1              195.0       4400.0    MALE  \n",
-       "343             16.6              191.0       3250.0  FEMALE  \n",
+       "    culmen_depth_mm  flipper_length_mm  body_mass_g     sex  \n",
+       "0              16.4              223.0       5950.0    MALE  \n",
+       "1              14.5              213.0       4400.0  FEMALE  \n",
+       "2              16.0              183.0       3075.0  FEMALE  \n",
+       "3              15.6              221.0       5000.0    MALE  \n",
+       "4              13.2              211.0       4500.0  FEMALE  \n",
+       "5              19.2              197.0       3500.0    MALE  \n",
+       "6              15.8              215.0       5300.0    MALE  \n",
+       "7              17.3              187.0       3300.0  FEMALE  \n",
+       "8              18.9              195.0       4150.0  FEMALE  \n",
+       "9              15.7              231.0       5650.0    MALE  \n",
+       "11             17.4              186.0       3800.0  FEMALE  \n",
+       "12             13.7              208.0       3950.0  FEMALE  \n",
+       "13             20.0              203.0       4725.0    MALE  \n",
+       "14             15.0              219.0       4850.0  FEMALE  \n",
+       "15             18.2              193.0       3775.0    MALE  \n",
+       "16             17.3              228.0       5600.0    MALE  \n",
+       "17             14.1              217.0       4375.0  FEMALE  \n",
+       "18             17.2              180.0       3800.0    MALE  \n",
+       "19             18.8              203.0       4100.0    MALE  \n",
+       "20             13.1              215.0       5000.0  FEMALE  \n",
+       "21             15.3              224.0       5550.0    MALE  \n",
+       "22             16.1              216.0       5550.0    MALE  \n",
+       "23             14.4              218.0       4600.0  FEMALE  \n",
+       "24             15.4              220.0       5050.0    MALE  \n",
+       "25             14.0              212.0       4875.0  FEMALE  \n",
        "...\n",
        "\n",
        "[334 rows x 7 columns]"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -253,7 +425,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -277,7 +449,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -286,7 +458,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -297,37 +469,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
-       "Query job 582e7c02-bcc6-412a-a513-46ee5dba7ad8 is DONE. 2.7 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:582e7c02-bcc6-412a-a513-46ee5dba7ad8&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 917ff09b-072b-4c55-b26f-1780e2e97519 is DONE. 25.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:917ff09b-072b-4c55-b26f-1780e2e97519&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 2f4e102d-48bc-401f-a781-39830e2c6c9b is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:2f4e102d-48bc-401f-a781-39830e2c6c9b&page=queryresults\">Open Job</a>"
+       "Query job 9ce9fb43-306d-46e9-bbe5-d98ee55143bd is DONE. 37.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9ce9fb43-306d-46e9-bbe5-d98ee55143bd&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -339,7 +487,7 @@
     {
      "data": {
       "text/html": [
-       "Query job aabe8a28-8dce-4e00-8a8c-18e9e090e6e7 is DONE. 26.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:aabe8a28-8dce-4e00-8a8c-18e9e090e6e7&page=queryresults\">Open Job</a>"
+       "Query job 8c86156d-ee97-4f66-9dc1-db15ff3d8e8e is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8c86156d-ee97-4f66-9dc1-db15ff3d8e8e&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -351,19 +499,7 @@
     {
      "data": {
       "text/html": [
-       "Query job ec9d8798-e28e-44bc-aa8e-44ab28f0214f is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ec9d8798-e28e-44bc-aa8e-44ab28f0214f&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 8aa0fa94-e43e-41c6-9de3-f0a67392c47f is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8aa0fa94-e43e-41c6-9de3-f0a67392c47f&page=queryresults\">Open Job</a>"
+       "Query job b8f2b382-b938-4dff-8bdb-129703ade285 is DONE. 37.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:b8f2b382-b938-4dff-8bdb-129703ade285&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -377,10 +513,10 @@
      "output_type": "stream",
      "text": [
       "   mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-      "0           318.358226       151689.571141                0.009814   \n",
+      "0            297.36838       148892.914876                0.009057   \n",
       "\n",
       "   median_absolute_error  r2_score  explained_variance  \n",
-      "0             255.095561  0.780659            0.783304  \n",
+      "0             238.424052  0.814613            0.816053  \n",
       "\n",
       "[1 rows x 6 columns]\n"
      ]
@@ -388,7 +524,7 @@
     {
      "data": {
       "text/html": [
-       "Query job bf6ef937-9583-4aa8-8313-563638465d5f is DONE. 25.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bf6ef937-9583-4aa8-8313-563638465d5f&page=queryresults\">Open Job</a>"
+       "Query job ec2968f3-1713-4617-8a26-6fe4267f8061 is DONE. 37.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ec2968f3-1713-4617-8a26-6fe4267f8061&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -400,7 +536,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 4c8b564c-5bbd-4447-babf-e307524962e5 is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:4c8b564c-5bbd-4447-babf-e307524962e5&page=queryresults\">Open Job</a>"
+       "Query job c7a1b80f-26f5-41b1-bcdc-b276af141671 is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:c7a1b80f-26f5-41b1-bcdc-b276af141671&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -412,31 +548,7 @@
     {
      "data": {
       "text/html": [
-       "Query job cd5e337f-6d44-473d-a90b-be8a79bba6bf is DONE. 26.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:cd5e337f-6d44-473d-a90b-be8a79bba6bf&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job ad80012d-7c6c-4dbf-9271-2ff7f899f174 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ad80012d-7c6c-4dbf-9271-2ff7f899f174&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 8fc20587-d8ba-4c0f-bed9-3e1cf3c6ae52 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8fc20587-d8ba-4c0f-bed9-3e1cf3c6ae52&page=queryresults\">Open Job</a>"
+       "Query job 82054991-c22f-41b3-9802-f16919949e26 is DONE. 37.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:82054991-c22f-41b3-9802-f16919949e26&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -450,10 +562,10 @@
      "output_type": "stream",
      "text": [
       "   mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-      "0           306.435423        151573.84019                0.008539   \n",
+      "0             307.6149       139013.303482                0.007907   \n",
       "\n",
       "   median_absolute_error  r2_score  explained_variance  \n",
-      "0               244.2899  0.737623            0.742859  \n",
+      "0             266.589811  0.782835            0.794297  \n",
       "\n",
       "[1 rows x 6 columns]\n"
      ]
@@ -461,7 +573,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 90286d2b-e805-4b19-8876-c9973579e9ff is DONE. 25.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:90286d2b-e805-4b19-8876-c9973579e9ff&page=queryresults\">Open Job</a>"
+       "Query job 3e5ae019-7c5b-44ea-8392-85145fdb6802 is DONE. 37.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:3e5ae019-7c5b-44ea-8392-85145fdb6802&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -473,7 +585,7 @@
     {
      "data": {
       "text/html": [
-       "Query job ceb6c8f2-16cc-4758-bde8-3e4975ba1452 is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ceb6c8f2-16cc-4758-bde8-3e4975ba1452&page=queryresults\">Open Job</a>"
+       "Query job c35dfd28-504d-4d12-b039-da890b9cb51d is DONE. 16.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:c35dfd28-504d-4d12-b039-da890b9cb51d&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -485,31 +597,7 @@
     {
      "data": {
       "text/html": [
-       "Query job f49434fa-a7e0-406a-bbe2-5651595e3418 is DONE. 26.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:f49434fa-a7e0-406a-bbe2-5651595e3418&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 5dd7a277-10fe-4117-a354-ef8668a8b913 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5dd7a277-10fe-4117-a354-ef8668a8b913&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 4b58b016-9a50-4a66-b86c-8431faad43bf is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:4b58b016-9a50-4a66-b86c-8431faad43bf&page=queryresults\">Open Job</a>"
+       "Query job 29ac1bb3-f864-400e-8cac-0b4c7f78ebcd is DONE. 37.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:29ac1bb3-f864-400e-8cac-0b4c7f78ebcd&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -523,10 +611,10 @@
      "output_type": "stream",
      "text": [
       "   mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-      "0           253.349578       112039.741164                0.007153   \n",
+      "0           348.412701       180661.063512                 0.01125   \n",
       "\n",
       "   median_absolute_error  r2_score  explained_variance  \n",
-      "0             185.916761  0.823381            0.823456  \n",
+      "0              313.29406  0.744053             0.74537  \n",
       "\n",
       "[1 rows x 6 columns]\n"
      ]
@@ -534,7 +622,7 @@
     {
      "data": {
       "text/html": [
-       "Query job ca700ecf-0c08-4286-b979-2bc7a0bee89c is DONE. 25.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ca700ecf-0c08-4286-b979-2bc7a0bee89c&page=queryresults\">Open Job</a>"
+       "Query job d90f5938-2894-4c93-8691-21162a2fca4c is DONE. 37.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d90f5938-2894-4c93-8691-21162a2fca4c&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -546,7 +634,7 @@
     {
      "data": {
       "text/html": [
-       "Query job f0731e71-7754-47a2-a553-93a61e712533 is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:f0731e71-7754-47a2-a553-93a61e712533&page=queryresults\">Open Job</a>"
+       "Query job 4c6328b3-2d3f-42bb-9f83-4f8c84773c95 is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:4c6328b3-2d3f-42bb-9f83-4f8c84773c95&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -558,31 +646,7 @@
     {
      "data": {
       "text/html": [
-       "Query job ae66d34d-5f0a-4297-9d41-57067ae54a9b is DONE. 26.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ae66d34d-5f0a-4297-9d41-57067ae54a9b&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 7655a649-ceca-4792-b764-fb371f5872ec is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7655a649-ceca-4792-b764-fb371f5872ec&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 8b0634c8-73a9-422c-9644-842142dbb059 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8b0634c8-73a9-422c-9644-842142dbb059&page=queryresults\">Open Job</a>"
+       "Query job 8a885a6a-d3ad-4569-80ce-4f57d9b86105 is DONE. 37.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8a885a6a-d3ad-4569-80ce-4f57d9b86105&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -596,10 +660,10 @@
      "output_type": "stream",
      "text": [
       "   mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-      "0           320.381386       155234.800349                0.008638   \n",
+      "0           309.991882       151820.705254                0.008898   \n",
       "\n",
       "   median_absolute_error  r2_score  explained_variance  \n",
-      "0             306.281263  0.793405            0.794504  \n",
+      "0             212.758708  0.694001            0.694287  \n",
       "\n",
       "[1 rows x 6 columns]\n"
      ]
@@ -607,19 +671,7 @@
     {
      "data": {
       "text/html": [
-       "Query job bb26cde9-1991-4e0a-8492-b19d15b1b7aa is DONE. 25.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bb26cde9-1991-4e0a-8492-b19d15b1b7aa&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 7ddd0883-492d-46bc-a588-f3cbab2474bb is DONE. 16.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7ddd0883-492d-46bc-a588-f3cbab2474bb&page=queryresults\">Open Job</a>"
+       "Query job d1e60370-11c8-4f49-a8d5-85417662aa51 is DONE. 37.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d1e60370-11c8-4f49-a8d5-85417662aa51&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -631,7 +683,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 5de571e4-d2f9-43c7-b014-3d65a3731b64 is DONE. 26.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5de571e4-d2f9-43c7-b014-3d65a3731b64&page=queryresults\">Open Job</a>"
+       "Query job d8e8712a-6347-4725-a27d-49810d4acc1c is DONE. 16.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d8e8712a-6347-4725-a27d-49810d4acc1c&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -643,19 +695,7 @@
     {
      "data": {
       "text/html": [
-       "Query job d20ac7d8-cd21-4a1f-a200-2dfa6373bcdb is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d20ac7d8-cd21-4a1f-a200-2dfa6373bcdb&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 235e8a80-33ea-4a95-a7d0-34e40a8ca396 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:235e8a80-33ea-4a95-a7d0-34e40a8ca396&page=queryresults\">Open Job</a>"
+       "Query job 6a0ebaa6-5572-404f-a41d-b90e2c65d948 is DONE. 37.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6a0ebaa6-5572-404f-a41d-b90e2c65d948&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -669,10 +709,10 @@
      "output_type": "stream",
      "text": [
       "   mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-      "0           303.855563       141869.030392                0.008989   \n",
+      "0           256.569216       103495.042886                0.006605   \n",
       "\n",
       "   median_absolute_error  r2_score  explained_variance  \n",
-      "0             245.102301  0.731737            0.732793  \n",
+      "0             222.940815  0.818589            0.832344  \n",
       "\n",
       "[1 rows x 6 columns]\n"
      ]
@@ -696,145 +736,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
-       "Query job 9274ae2e-e9a7-4701-ac64-56632323d02a is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9274ae2e-e9a7-4701-ac64-56632323d02a&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 22f9477b-de02-4c07-b480-c3270a69d7e0 is DONE. 25.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:22f9477b-de02-4c07-b480-c3270a69d7e0&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job ebb192b7-4a9e-4238-b4e6-b630e2f94988 is DONE. 16.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ebb192b7-4a9e-4238-b4e6-b630e2f94988&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 44441e8c-8753-41b0-b1b7-9a6c4eab8c74 is DONE. 26.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:44441e8c-8753-41b0-b1b7-9a6c4eab8c74&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 239fed9a-b488-47da-a0df-a3b7c6ec40f4 is DONE. 25.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:239fed9a-b488-47da-a0df-a3b7c6ec40f4&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job f4248b2d-3430-426c-872d-8590f2878366 is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:f4248b2d-3430-426c-872d-8590f2878366&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job d9f6b034-c300-4dd7-91dd-48fa912f2456 is DONE. 26.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d9f6b034-c300-4dd7-91dd-48fa912f2456&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job e2f39f5b-2f4c-402a-a8d5-a7cff918508d is DONE. 25.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e2f39f5b-2f4c-402a-a8d5-a7cff918508d&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 54cf3710-b5f4-4aec-b11f-0281126a151a is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:54cf3710-b5f4-4aec-b11f-0281126a151a&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 833d13cd-ec59-499b-98f6-95ec18766698 is DONE. 26.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:833d13cd-ec59-499b-98f6-95ec18766698&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 0120e332-0691-44a4-9198-f5c131b8f59c is DONE. 25.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:0120e332-0691-44a4-9198-f5c131b8f59c&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job f4ba7a4c-5fd9-4f97-ab34-a8f139e7472a is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:f4ba7a4c-5fd9-4f97-ab34-a8f139e7472a&page=queryresults\">Open Job</a>"
+       "Query job 5bdcd65d-7d72-4094-be3a-cf67a1787cf4 is DONE. 37.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5bdcd65d-7d72-4094-be3a-cf67a1787cf4&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -846,7 +754,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 857aadfc-2ade-429c-bef8-428e44d48c55 is DONE. 26.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:857aadfc-2ade-429c-bef8-428e44d48c55&page=queryresults\">Open Job</a>"
+       "Query job bb0504b2-b656-4a08-9bf8-dcab0d188022 is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bb0504b2-b656-4a08-9bf8-dcab0d188022&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -858,7 +766,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 906d6d34-a506-4957-b07f-7e5ed2e0634b is DONE. 25.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:906d6d34-a506-4957-b07f-7e5ed2e0634b&page=queryresults\">Open Job</a>"
+       "Query job 8c5c4b66-9a14-455a-a3f5-99f0f522713f is DONE. 37.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8c5c4b66-9a14-455a-a3f5-99f0f522713f&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -870,7 +778,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 498563db-3e68-4df7-a2d5-83da6adb49ed is DONE. 16.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:498563db-3e68-4df7-a2d5-83da6adb49ed&page=queryresults\">Open Job</a>"
+       "Query job 9c9b81de-35b6-4561-8881-57da8b73cc7f is DONE. 37.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9c9b81de-35b6-4561-8881-57da8b73cc7f&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -882,7 +790,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 01af95ca-6288-4253-b379-7327e1c9de88 is DONE. 26.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:01af95ca-6288-4253-b379-7327e1c9de88&page=queryresults\">Open Job</a>"
+       "Query job b781f1aa-6572-49e5-ab8d-f1908b497a1c is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:b781f1aa-6572-49e5-ab8d-f1908b497a1c&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -894,7 +802,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 5ce36d32-6db1-42e5-a8cf-84bb8244a57e is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5ce36d32-6db1-42e5-a8cf-84bb8244a57e&page=queryresults\">Open Job</a>"
+       "Query job 41a2a58e-0289-4d58-8e39-de286f2a91fb is DONE. 37.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:41a2a58e-0289-4d58-8e39-de286f2a91fb&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -906,7 +814,7 @@
     {
      "data": {
       "text/html": [
-       "Query job e05ec77d-6025-4edd-b5e3-9c4e7a124e71 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e05ec77d-6025-4edd-b5e3-9c4e7a124e71&page=queryresults\">Open Job</a>"
+       "Query job 7ee839a9-f77c-49b0-844e-8eecc1647b97 is DONE. 37.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7ee839a9-f77c-49b0-844e-8eecc1647b97&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -918,7 +826,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 418a4a5d-2bb3-41e5-9e7c-9852389a491b is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:418a4a5d-2bb3-41e5-9e7c-9852389a491b&page=queryresults\">Open Job</a>"
+       "Query job a317d488-8589-4faa-940b-e59af91caf4d is DONE. 16.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:a317d488-8589-4faa-940b-e59af91caf4d&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -930,7 +838,7 @@
     {
      "data": {
       "text/html": [
-       "Query job b33e30da-cfed-4d6f-b227-f433d97879cb is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:b33e30da-cfed-4d6f-b227-f433d97879cb&page=queryresults\">Open Job</a>"
+       "Query job 2de96ea8-519a-4976-a641-eb26a4bd38fb is DONE. 37.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:2de96ea8-519a-4976-a641-eb26a4bd38fb&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -942,7 +850,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 7ad7f0c8-ecae-4ef2-bc91-0ebeb5f88e7b is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7ad7f0c8-ecae-4ef2-bc91-0ebeb5f88e7b&page=queryresults\">Open Job</a>"
+       "Query job 41a7d5a0-c76b-4ef3-a3da-d4d5a2ebbb0e is DONE. 37.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:41a7d5a0-c76b-4ef3-a3da-d4d5a2ebbb0e&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -954,7 +862,7 @@
     {
      "data": {
       "text/html": [
-       "Query job a6e8bd12-1122-4c26-b0e1-58342238016c is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:a6e8bd12-1122-4c26-b0e1-58342238016c&page=queryresults\">Open Job</a>"
+       "Query job 9e82ddc9-8461-4644-ba34-957a7426ff8e is DONE. 16.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9e82ddc9-8461-4644-ba34-957a7426ff8e&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -966,7 +874,7 @@
     {
      "data": {
       "text/html": [
-       "Query job c553439c-9586-479c-92c5-01a0d333125b is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:c553439c-9586-479c-92c5-01a0d333125b&page=queryresults\">Open Job</a>"
+       "Query job 0fa84d07-fdfa-41c9-b601-9326a94f3a09 is DONE. 37.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:0fa84d07-fdfa-41c9-b601-9326a94f3a09&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -978,7 +886,7 @@
     {
      "data": {
       "text/html": [
-       "Query job c598d64c-26b9-49fc-afad-a6544b38cfa2 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:c598d64c-26b9-49fc-afad-a6544b38cfa2&page=queryresults\">Open Job</a>"
+       "Query job d4495568-f1b5-431b-b892-4fc7dcbccfd5 is DONE. 37.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d4495568-f1b5-431b-b892-4fc7dcbccfd5&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -990,7 +898,7 @@
     {
      "data": {
       "text/html": [
-       "Query job ebcb73e8-1294-4f10-b826-c495046fd714 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ebcb73e8-1294-4f10-b826-c495046fd714&page=queryresults\">Open Job</a>"
+       "Query job af1e6460-3078-4a8b-8992-9e7df9dcfbb3 is DONE. 16.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:af1e6460-3078-4a8b-8992-9e7df9dcfbb3&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -1002,7 +910,7 @@
     {
      "data": {
       "text/html": [
-       "Query job d73f57ba-a25d-4b90-b474-13d81a3e22ab is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d73f57ba-a25d-4b90-b474-13d81a3e22ab&page=queryresults\">Open Job</a>"
+       "Query job f14401bf-fd80-401a-a61d-52614fba1ca7 is DONE. 37.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:f14401bf-fd80-401a-a61d-52614fba1ca7&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -1015,53 +923,53 @@
      "data": {
       "text/plain": [
        "{'test_score': [   mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-       "  0           237.154735         97636.17064                0.005571   \n",
+       "  0           322.341485       157616.627179                0.009137   \n",
        "  \n",
        "     median_absolute_error  r2_score  explained_variance  \n",
-       "  0             187.883888  0.842018            0.846816  \n",
+       "  0             269.412639  0.705594            0.724882  \n",
        "  \n",
        "  [1 rows x 6 columns],\n",
        "     mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-       "  0           304.281635       141966.045867                0.008064   \n",
+       "  0           289.682121       136550.318797                 0.00878   \n",
        "  \n",
        "     median_absolute_error  r2_score  explained_variance  \n",
-       "  0             236.096453  0.762979            0.764008  \n",
+       "  0             212.874686  0.799363             0.81416  \n",
        "  \n",
        "  [1 rows x 6 columns],\n",
        "     mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-       "  0           316.380322       157332.146085                0.009699   \n",
+       "  0           325.358522       155218.752974                0.009606   \n",
        "  \n",
        "     median_absolute_error  r2_score  explained_variance  \n",
-       "  0             222.824496  0.764607            0.765369  \n",
+       "  0             267.301671  0.777174              0.7782  \n",
        "  \n",
        "  [1 rows x 6 columns],\n",
        "     mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-       "  0           309.609657       152421.826588                0.009772   \n",
+       "  0           286.874056       120586.575364                0.007484   \n",
        "  \n",
        "     median_absolute_error  r2_score  explained_variance  \n",
-       "  0             254.163976  0.772954            0.773119  \n",
+       "  0             247.656578   0.79281            0.796001  \n",
        "  \n",
        "  [1 rows x 6 columns],\n",
        "     mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-       "  0           339.339345       169760.629993                0.010597   \n",
+       "  0           287.989397       145947.465344                0.008447   \n",
        "  \n",
        "     median_absolute_error  r2_score  explained_variance  \n",
-       "  0             312.335706  0.741167             0.74118  \n",
+       "  0             186.777549  0.791452            0.798825  \n",
        "  \n",
        "  [1 rows x 6 columns]],\n",
-       " 'fit_time': [18.200648623984307,\n",
-       "  17.565149880945683,\n",
-       "  18.202434757025912,\n",
-       "  18.04062689607963,\n",
-       "  19.370970834977925],\n",
-       " 'score_time': [4.76077218609862,\n",
-       "  4.577479084953666,\n",
-       "  4.581933492794633,\n",
-       "  4.741644307971001,\n",
-       "  5.1031754210125655]}"
+       " 'fit_time': [18.79181448201416,\n",
+       "  19.092008439009078,\n",
+       "  75.7446747609647,\n",
+       "  17.520530884969048,\n",
+       "  21.157033596013207],\n",
+       " 'score_time': [4.247669544012751,\n",
+       "  6.792615927988663,\n",
+       "  4.502274781989399,\n",
+       "  4.484583999030292,\n",
+       "  4.224339194013737]}"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1097,7 +1005,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.14"
+   "version": "3.10.15"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/ml/easy_linear_regression.ipynb b/notebooks/ml/easy_linear_regression.ipynb
index fdabd82a4b..5a7258a182 100644
--- a/notebooks/ml/easy_linear_regression.ipynb
+++ b/notebooks/ml/easy_linear_regression.ipynb
@@ -52,20 +52,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Dataset(DatasetReference('shobs-test', 'bqml_tutorial'))"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "dataset = f\"{session.bqclient.project}.bqml_tutorial\"\n",
     "session.bqclient.create_dataset(dataset, exists_ok=True)"
@@ -96,383 +85,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Query job 525fc879-1f59-45e8-96b4-f9c67d244d06 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:525fc879-1f59-45e8-96b4-f9c67d244d06&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 91aa1b30-2b0e-41eb-9bfb-4f6232913b31 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:91aa1b30-2b0e-41eb-9bfb-4f6232913b31&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>species</th>\n",
-       "      <th>island</th>\n",
-       "      <th>culmen_length_mm</th>\n",
-       "      <th>culmen_depth_mm</th>\n",
-       "      <th>flipper_length_mm</th>\n",
-       "      <th>body_mass_g</th>\n",
-       "      <th>sex</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>40.1</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>188.0</td>\n",
-       "      <td>4300.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>39.1</td>\n",
-       "      <td>18.7</td>\n",
-       "      <td>181.0</td>\n",
-       "      <td>3750.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>47.4</td>\n",
-       "      <td>14.6</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>4725.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>42.5</td>\n",
-       "      <td>16.7</td>\n",
-       "      <td>187.0</td>\n",
-       "      <td>3350.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>43.2</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>197.0</td>\n",
-       "      <td>4775.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.7</td>\n",
-       "      <td>15.3</td>\n",
-       "      <td>219.0</td>\n",
-       "      <td>5200.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>41.3</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>195.0</td>\n",
-       "      <td>4400.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.2</td>\n",
-       "      <td>13.8</td>\n",
-       "      <td>215.0</td>\n",
-       "      <td>4750.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.5</td>\n",
-       "      <td>13.5</td>\n",
-       "      <td>210.0</td>\n",
-       "      <td>4550.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>50.5</td>\n",
-       "      <td>15.2</td>\n",
-       "      <td>216.0</td>\n",
-       "      <td>5000.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>48.2</td>\n",
-       "      <td>15.6</td>\n",
-       "      <td>221.0</td>\n",
-       "      <td>5100.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>38.1</td>\n",
-       "      <td>18.6</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>3700.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>50.7</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>223.0</td>\n",
-       "      <td>5550.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>37.8</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>4250.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>35.0</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>3450.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>48.7</td>\n",
-       "      <td>15.7</td>\n",
-       "      <td>208.0</td>\n",
-       "      <td>5350.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>34.6</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>198.0</td>\n",
-       "      <td>4400.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.8</td>\n",
-       "      <td>15.4</td>\n",
-       "      <td>215.0</td>\n",
-       "      <td>5150.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>50.3</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>197.0</td>\n",
-       "      <td>3300.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.2</td>\n",
-       "      <td>18.1</td>\n",
-       "      <td>178.0</td>\n",
-       "      <td>3900.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>18.8</td>\n",
-       "      <td>203.0</td>\n",
-       "      <td>4100.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>40.5</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>187.0</td>\n",
-       "      <td>3200.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.5</td>\n",
-       "      <td>13.9</td>\n",
-       "      <td>210.0</td>\n",
-       "      <td>4200.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>42.2</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>180.0</td>\n",
-       "      <td>3550.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>51.7</td>\n",
-       "      <td>20.3</td>\n",
-       "      <td>194.0</td>\n",
-       "      <td>3775.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>25 rows × 7 columns</p>\n",
-       "</div>[344 rows x 7 columns in total]"
-      ],
-      "text/plain": [
-       "                                      species     island  culmen_length_mm  \\\n",
-       "0         Adelie Penguin (Pygoscelis adeliae)     Biscoe              40.1   \n",
-       "1         Adelie Penguin (Pygoscelis adeliae)  Torgersen              39.1   \n",
-       "2           Gentoo penguin (Pygoscelis papua)     Biscoe              47.4   \n",
-       "3   Chinstrap penguin (Pygoscelis antarctica)      Dream              42.5   \n",
-       "4         Adelie Penguin (Pygoscelis adeliae)     Biscoe              43.2   \n",
-       "5           Gentoo penguin (Pygoscelis papua)     Biscoe              46.7   \n",
-       "6         Adelie Penguin (Pygoscelis adeliae)     Biscoe              41.3   \n",
-       "7           Gentoo penguin (Pygoscelis papua)     Biscoe              45.2   \n",
-       "8           Gentoo penguin (Pygoscelis papua)     Biscoe              46.5   \n",
-       "9           Gentoo penguin (Pygoscelis papua)     Biscoe              50.5   \n",
-       "10          Gentoo penguin (Pygoscelis papua)     Biscoe              48.2   \n",
-       "11        Adelie Penguin (Pygoscelis adeliae)      Dream              38.1   \n",
-       "12          Gentoo penguin (Pygoscelis papua)     Biscoe              50.7   \n",
-       "13        Adelie Penguin (Pygoscelis adeliae)     Biscoe              37.8   \n",
-       "14        Adelie Penguin (Pygoscelis adeliae)     Biscoe              35.0   \n",
-       "15          Gentoo penguin (Pygoscelis papua)     Biscoe              48.7   \n",
-       "16        Adelie Penguin (Pygoscelis adeliae)  Torgersen              34.6   \n",
-       "17          Gentoo penguin (Pygoscelis papua)     Biscoe              46.8   \n",
-       "18  Chinstrap penguin (Pygoscelis antarctica)      Dream              50.3   \n",
-       "19        Adelie Penguin (Pygoscelis adeliae)      Dream              37.2   \n",
-       "20  Chinstrap penguin (Pygoscelis antarctica)      Dream              51.0   \n",
-       "21        Adelie Penguin (Pygoscelis adeliae)     Biscoe              40.5   \n",
-       "22          Gentoo penguin (Pygoscelis papua)     Biscoe              45.5   \n",
-       "23        Adelie Penguin (Pygoscelis adeliae)      Dream              42.2   \n",
-       "24  Chinstrap penguin (Pygoscelis antarctica)      Dream              51.7   \n",
-       "\n",
-       "    culmen_depth_mm  flipper_length_mm  body_mass_g     sex  \n",
-       "0              18.9              188.0       4300.0    MALE  \n",
-       "1              18.7              181.0       3750.0    MALE  \n",
-       "2              14.6              212.0       4725.0  FEMALE  \n",
-       "3              16.7              187.0       3350.0  FEMALE  \n",
-       "4              19.0              197.0       4775.0    MALE  \n",
-       "5              15.3              219.0       5200.0    MALE  \n",
-       "6              21.1              195.0       4400.0    MALE  \n",
-       "7              13.8              215.0       4750.0  FEMALE  \n",
-       "8              13.5              210.0       4550.0  FEMALE  \n",
-       "9              15.2              216.0       5000.0  FEMALE  \n",
-       "10             15.6              221.0       5100.0    MALE  \n",
-       "11             18.6              190.0       3700.0  FEMALE  \n",
-       "12             15.0              223.0       5550.0    MALE  \n",
-       "13             20.0              190.0       4250.0    MALE  \n",
-       "14             17.9              190.0       3450.0  FEMALE  \n",
-       "15             15.7              208.0       5350.0    MALE  \n",
-       "16             21.1              198.0       4400.0    MALE  \n",
-       "17             15.4              215.0       5150.0    MALE  \n",
-       "18             20.0              197.0       3300.0    MALE  \n",
-       "19             18.1              178.0       3900.0    MALE  \n",
-       "20             18.8              203.0       4100.0    MALE  \n",
-       "21             17.9              187.0       3200.0  FEMALE  \n",
-       "22             13.9              210.0       4200.0  FEMALE  \n",
-       "23             18.5              180.0       3550.0  FEMALE  \n",
-       "24             20.3              194.0       3775.0    MALE  \n",
-       "...\n",
-       "\n",
-       "[344 rows x 7 columns]"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# read a BigQuery table to a BigQuery DataFrame\n",
     "df = bigframes.pandas.read_gbq(f\"bigquery-public-data.ml_datasets.penguins\")\n",
@@ -491,357 +106,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Query job d2bd7c5e-2652-4c0d-8495-8ef65e89031b is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:d2bd7c5e-2652-4c0d-8495-8ef65e89031b&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 92f0a5e5-bc61-426f-a9ef-213a1c376851 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:92f0a5e5-bc61-426f-a9ef-213a1c376851&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>island</th>\n",
-       "      <th>culmen_length_mm</th>\n",
-       "      <th>culmen_depth_mm</th>\n",
-       "      <th>flipper_length_mm</th>\n",
-       "      <th>body_mass_g</th>\n",
-       "      <th>sex</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>40.1</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>188.0</td>\n",
-       "      <td>4300.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>39.1</td>\n",
-       "      <td>18.7</td>\n",
-       "      <td>181.0</td>\n",
-       "      <td>3750.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>43.2</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>197.0</td>\n",
-       "      <td>4775.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>41.3</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>195.0</td>\n",
-       "      <td>4400.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>38.1</td>\n",
-       "      <td>18.6</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>3700.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>37.8</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>4250.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>35.0</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>3450.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>34.6</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>198.0</td>\n",
-       "      <td>4400.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.2</td>\n",
-       "      <td>18.1</td>\n",
-       "      <td>178.0</td>\n",
-       "      <td>3900.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>40.5</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>187.0</td>\n",
-       "      <td>3200.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>42.2</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>180.0</td>\n",
-       "      <td>3550.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.2</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>196.0</td>\n",
-       "      <td>4150.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>32</th>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>42.9</td>\n",
-       "      <td>17.6</td>\n",
-       "      <td>196.0</td>\n",
-       "      <td>4700.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>38</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>41.1</td>\n",
-       "      <td>17.5</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>3900.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>40</th>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>38.6</td>\n",
-       "      <td>21.2</td>\n",
-       "      <td>191.0</td>\n",
-       "      <td>3800.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>42</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>35.5</td>\n",
-       "      <td>16.2</td>\n",
-       "      <td>195.0</td>\n",
-       "      <td>3350.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>44</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.2</td>\n",
-       "      <td>18.6</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>4250.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>45</th>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>35.2</td>\n",
-       "      <td>15.9</td>\n",
-       "      <td>186.0</td>\n",
-       "      <td>3050.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>46</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>43.2</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>4100.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>49</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>39.6</td>\n",
-       "      <td>17.7</td>\n",
-       "      <td>186.0</td>\n",
-       "      <td>3500.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>53</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>45.6</td>\n",
-       "      <td>20.3</td>\n",
-       "      <td>191.0</td>\n",
-       "      <td>4600.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>58</th>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>40.9</td>\n",
-       "      <td>16.8</td>\n",
-       "      <td>191.0</td>\n",
-       "      <td>3700.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>60</th>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>40.3</td>\n",
-       "      <td>18.0</td>\n",
-       "      <td>195.0</td>\n",
-       "      <td>3250.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>62</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>186.0</td>\n",
-       "      <td>3100.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>63</th>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>39.3</td>\n",
-       "      <td>20.6</td>\n",
-       "      <td>190.0</td>\n",
-       "      <td>3650.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>25 rows × 6 columns</p>\n",
-       "</div>[146 rows x 6 columns in total]"
-      ],
-      "text/plain": [
-       "       island  culmen_length_mm  culmen_depth_mm  flipper_length_mm  \\\n",
-       "0      Biscoe              40.1             18.9              188.0   \n",
-       "1   Torgersen              39.1             18.7              181.0   \n",
-       "4      Biscoe              43.2             19.0              197.0   \n",
-       "6      Biscoe              41.3             21.1              195.0   \n",
-       "11      Dream              38.1             18.6              190.0   \n",
-       "13     Biscoe              37.8             20.0              190.0   \n",
-       "14     Biscoe              35.0             17.9              190.0   \n",
-       "16  Torgersen              34.6             21.1              198.0   \n",
-       "19      Dream              37.2             18.1              178.0   \n",
-       "21     Biscoe              40.5             17.9              187.0   \n",
-       "23      Dream              42.2             18.5              180.0   \n",
-       "30      Dream              39.2             21.1              196.0   \n",
-       "32  Torgersen              42.9             17.6              196.0   \n",
-       "38      Dream              41.1             17.5              190.0   \n",
-       "40  Torgersen              38.6             21.2              191.0   \n",
-       "42     Biscoe              35.5             16.2              195.0   \n",
-       "44      Dream              39.2             18.6              190.0   \n",
-       "45  Torgersen              35.2             15.9              186.0   \n",
-       "46      Dream              43.2             18.5              192.0   \n",
-       "49     Biscoe              39.6             17.7              186.0   \n",
-       "53     Biscoe              45.6             20.3              191.0   \n",
-       "58  Torgersen              40.9             16.8              191.0   \n",
-       "60  Torgersen              40.3             18.0              195.0   \n",
-       "62      Dream              36.0             18.5              186.0   \n",
-       "63  Torgersen              39.3             20.6              190.0   \n",
-       "\n",
-       "    body_mass_g     sex  \n",
-       "0        4300.0    MALE  \n",
-       "1        3750.0    MALE  \n",
-       "4        4775.0    MALE  \n",
-       "6        4400.0    MALE  \n",
-       "11       3700.0  FEMALE  \n",
-       "13       4250.0    MALE  \n",
-       "14       3450.0  FEMALE  \n",
-       "16       4400.0    MALE  \n",
-       "19       3900.0    MALE  \n",
-       "21       3200.0  FEMALE  \n",
-       "23       3550.0  FEMALE  \n",
-       "30       4150.0    MALE  \n",
-       "32       4700.0    MALE  \n",
-       "38       3900.0    MALE  \n",
-       "40       3800.0    MALE  \n",
-       "42       3350.0  FEMALE  \n",
-       "44       4250.0    MALE  \n",
-       "45       3050.0  FEMALE  \n",
-       "46       4100.0    MALE  \n",
-       "49       3500.0  FEMALE  \n",
-       "53       4600.0    MALE  \n",
-       "58       3700.0  FEMALE  \n",
-       "60       3250.0  FEMALE  \n",
-       "62       3100.0  FEMALE  \n",
-       "63       3650.0    MALE  \n",
-       "...\n",
-       "\n",
-       "[146 rows x 6 columns]"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# filter down to the data we want to analyze\n",
     "adelie_data = df[df.species == \"Adelie Penguin (Pygoscelis adeliae)\"]\n",
@@ -880,56 +147,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Query job 43c8fdc2-0bc3-4607-a36d-5bee87c894d8 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:43c8fdc2-0bc3-4607-a36d-5bee87c894d8&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 97e0c84d-aa6a-4197-9377-740d973ea44d is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:97e0c84d-aa6a-4197-9377-740d973ea44d&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 726b9a5e-48a1-4ced-ac34-fa028dcb2bf4 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:726b9a5e-48a1-4ced-ac34-fa028dcb2bf4&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "LinearRegression()"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from bigframes.ml.linear_model import LinearRegression\n",
     "\n",
@@ -942,104 +162,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Query job 28975567-2526-40f7-a7be-9dee6f782b4e is DONE. 9.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:28975567-2526-40f7-a7be-9dee6f782b4e&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 5c71d3d9-0e1c-45bd-866f-1f98f056260d is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:5c71d3d9-0e1c-45bd-866f-1f98f056260d&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 890767f7-a83b-469a-9f3e-abd5667f8202 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:890767f7-a83b-469a-9f3e-abd5667f8202&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>mean_absolute_error</th>\n",
-       "      <th>mean_squared_error</th>\n",
-       "      <th>mean_squared_log_error</th>\n",
-       "      <th>median_absolute_error</th>\n",
-       "      <th>r2_score</th>\n",
-       "      <th>explained_variance</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>223.878763</td>\n",
-       "      <td>78553.601634</td>\n",
-       "      <td>0.005614</td>\n",
-       "      <td>181.330911</td>\n",
-       "      <td>0.623951</td>\n",
-       "      <td>0.623951</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>1 rows × 6 columns</p>\n",
-       "</div>[1 rows x 6 columns in total]"
-      ],
-      "text/plain": [
-       "   mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-       "0           223.878763        78553.601634                0.005614   \n",
-       "\n",
-       "   median_absolute_error  r2_score  explained_variance  \n",
-       "0             181.330911  0.623951            0.623951  \n",
-       "\n",
-       "[1 rows x 6 columns]"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# check how the model performed\n",
     "model.score(feature_columns, label_columns)"
@@ -1047,103 +172,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Query job d59df3e8-cf87-4340-a4c7-a27c3abfcc50 is DONE. 29.1 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:d59df3e8-cf87-4340-a4c7-a27c3abfcc50&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job 5af493aa-96f9-434f-a101-ec855f4de694 is DONE. 8 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:5af493aa-96f9-434f-a101-ec855f4de694&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job e2076bc3-3966-4c45-8265-c461756a7782 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:e2076bc3-3966-4c45-8265-c461756a7782&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Query job e9cdfca7-30f6-4e93-95fb-244896e7c2ab is DONE. 16 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:e9cdfca7-30f6-4e93-95fb-244896e7c2ab&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>predicted_body_mass_g</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>334</th>\n",
-       "      <td>5891.735118</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>1 rows × 1 columns</p>\n",
-       "</div>[1 rows x 1 columns in total]"
-      ],
-      "text/plain": [
-       "     predicted_body_mass_g\n",
-       "334            5891.735118\n",
-       "\n",
-       "[1 rows x 1 columns]"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# use the model to predict the missing labels\n",
     "model.predict(missing_body_mass)"
@@ -1159,32 +190,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Copy job cb4ef454-10df-4325-b9cb-6084df3ac9d5 is DONE. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:cb4ef454-10df-4325-b9cb-6084df3ac9d5&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "LinearRegression(optimize_strategy='NORMAL_EQUATION')"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# save the model to a permanent location in BigQuery, so we can use it in future sessions (and elsewhere in BQ)\n",
     "model.to_gbq(penguins_model, replace=True)"
@@ -1199,20 +207,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "LinearRegression(optimize_strategy='NORMAL_EQUATION')"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# WARNING - until b/281709360 is fixed & pipeline is updated, pipelines will load as models,\n",
     "# and details of their transform steps will be lost (the loaded model will behave the same)\n",
diff --git a/notebooks/remote_functions/remote_function.ipynb b/notebooks/remote_functions/remote_function.ipynb
index 2114311e10..e2bc88ecae 100644
--- a/notebooks/remote_functions/remote_function.ipynb
+++ b/notebooks/remote_functions/remote_function.ipynb
@@ -174,7 +174,7 @@
    "source": [
     "# User defined function\n",
     "# https://www.codespeedy.com/find-nth-prime-number-in-python/\n",
-    "def nth_prime(n):\n",
+    "def nth_prime(n: int) -> int:\n",
     "    prime_numbers = [2,3]\n",
     "    i=3\n",
     "    if(0<n<=2):\n",
@@ -627,8 +627,8 @@
     "\n",
     "# User defined function\n",
     "# https://www.codespeedy.com/find-nth-prime-number-in-python/\n",
-    "@pd.remote_function([int], int, reuse=False)\n",
-    "def nth_prime(n):\n",
+    "@pd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n",
+    "def nth_prime(n: int) -> int:\n",
     "    prime_numbers = [2,3]\n",
     "    i=3\n",
     "    if(0<n<=2):\n",
@@ -1179,7 +1179,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.4"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/remote_functions/remote_function_usecases.ipynb b/notebooks/remote_functions/remote_function_usecases.ipynb
index a053dd3fbb..03ae652095 100644
--- a/notebooks/remote_functions/remote_function_usecases.ipynb
+++ b/notebooks/remote_functions/remote_function_usecases.ipynb
@@ -259,7 +259,7 @@
         }
       ],
       "source": [
-        "@bpd.remote_function(reuse=False)\n",
+        "@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n",
         "def duration_category(duration_minutes: int) -> str:\n",
         "    if duration_minutes < 90:\n",
         "        return \"short\"\n",
@@ -466,7 +466,7 @@
         }
       ],
       "source": [
-        "@bpd.remote_function(reuse=False)\n",
+        "@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n",
         "def duration_category(duration_minutes: int) -> str:\n",
         "    if duration_minutes < 90:\n",
         "        return DURATION_CATEGORY_SHORT\n",
@@ -675,7 +675,7 @@
         }
       ],
       "source": [
-        "@bpd.remote_function(reuse=False)\n",
+        "@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n",
         "def duration_category(duration_minutes: int) -> str:\n",
         "    duration_hours = mymath.ceil(duration_minutes / 60)\n",
         "    return f\"{duration_hours}h\"\n",
@@ -886,7 +886,7 @@
         }
       ],
       "source": [
-        "@bpd.remote_function(reuse=False)\n",
+        "@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n",
         "def duration_category(duration_minutes: int) -> str:\n",
         "    duration_hours = get_hour_ceiling(duration_minutes)\n",
         "    return f\"{duration_hours} hrs\"\n",
@@ -1068,7 +1068,7 @@
         }
       ],
       "source": [
-        "@bpd.remote_function(reuse=False, packages=[\"cryptography\"])\n",
+        "@bpd.remote_function(reuse=False, packages=[\"cryptography\"], cloud_function_service_account=\"default\")\n",
         "def get_hash(input: str) -> str:\n",
         "    from cryptography.fernet import Fernet\n",
         "\n",
@@ -1271,7 +1271,7 @@
         }
       ],
       "source": [
-        "@bpd.remote_function(reuse=False, packages=[\"humanize\"])\n",
+        "@bpd.remote_function(reuse=False, packages=[\"humanize\"], cloud_function_service_account=\"default\")\n",
         "def duration_category(duration_minutes: int) -> str:\n",
         "    timedelta = dt.timedelta(minutes=duration_minutes)\n",
         "    return humanize.naturaldelta(timedelta)\n",
@@ -1442,7 +1442,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.19"
+      "version": "3.11.4"
     }
   },
   "nbformat": 4,
diff --git a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb
index 78f0d27474..605f879bc7 100644
--- a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb
+++ b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb
@@ -286,7 +286,9 @@
    "source": [
     "@bpd.remote_function(packages=[\"anthropic[vertex]\", \"google-auth[requests]\"],\n",
     "                     max_batching_rows=1, \n",
-    "                     bigquery_connection=\"bigframes-dev.us-east5.bigframes-rf-conn\") # replace with your connection\n",
+    "                     bigquery_connection=\"bigframes-dev.us-east5.bigframes-rf-conn\", # replace with your connection\n",
+    "                     cloud_function_service_account=\"default\",\n",
+    ")\n",
     "def anthropic_transformer(message: str) -> str:\n",
     "  from anthropic import AnthropicVertex\n",
     "  client = AnthropicVertex(region=LOCATION, project_id=PROJECT)\n",
diff --git a/noxfile.py b/noxfile.py
index bcab34d0c0..bf9a435b0f 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -29,7 +29,9 @@
 import nox.sessions
 
 BLACK_VERSION = "black==22.3.0"
+FLAKE8_VERSION = "flake8==7.1.2"
 ISORT_VERSION = "isort==5.12.0"
+MYPY_VERSION = "mypy==1.15.0"
 
 # TODO: switch to 3.13 once remote functions / cloud run adds a runtime for it (internal issue 333742751)
 LATEST_FULLY_SUPPORTED_PYTHON = "3.12"
@@ -67,7 +69,6 @@
 UNIT_TEST_STANDARD_DEPENDENCIES = [
     "mock",
     "asyncmock",
-    "freezegun",
     PYTEST_VERSION,
     "pytest-cov",
     "pytest-asyncio",
@@ -135,7 +136,7 @@ def lint(session):
     Returns a failure if the linters find linting errors or sufficiently
     serious code quality issues.
     """
-    session.install("flake8", BLACK_VERSION, ISORT_VERSION)
+    session.install(FLAKE8_VERSION, BLACK_VERSION, ISORT_VERSION)
     session.run(
         "isort",
         "--check",
@@ -184,6 +185,14 @@ def lint_setup_py(session):
     session.install("docutils", "pygments")
     session.run("python", "setup.py", "check", "--restructuredtext", "--strict")
 
+    session.install("twine", "wheel")
+    shutil.rmtree("build", ignore_errors=True)
+    shutil.rmtree("dist", ignore_errors=True)
+    session.run("python", "setup.py", "sdist")
+    session.run(
+        "python", "-m", "twine", "check", *pathlib.Path("dist").glob("*.tar.gz")
+    )
+
 
 def install_unittest_dependencies(session, install_test_extra, *constraints):
     standard_deps = UNIT_TEST_STANDARD_DEPENDENCIES + UNIT_TEST_DEPENDENCIES
@@ -256,7 +265,7 @@ def mypy(session):
     deps = (
         set(
             [
-                "mypy",
+                MYPY_VERSION,
                 # TODO: update to latest pandas-stubs once we resolve bigframes issues.
                 "pandas-stubs<=2.2.3.241126",
                 "types-protobuf",
@@ -773,7 +782,8 @@ def notebook(session: nox.Session):
         "notebooks/vertex_sdk/sdk2_bigframes_tensorflow.ipynb",  # Needs BUCKET_URI.
         # The experimental notebooks imagine features that don't yet
         # exist or only exist as temporary prototypes.
-        "notebooks/experimental/longer_ml_demo.ipynb",
+        "notebooks/experimental/ai_operators.ipynb",
+        "notebooks/experimental/multimodal_dataframe.ipynb",
         "notebooks/experimental/semantic_operators.ipynb",
         # The notebooks that are added for more use cases, such as backing a
         # blog post, which may take longer to execute and need not be
diff --git a/owlbot.py b/owlbot.py
index 159df04abd..fa5491ee20 100644
--- a/owlbot.py
+++ b/owlbot.py
@@ -42,6 +42,8 @@
 s.move(
     templated_files,
     excludes=[
+        # Need a combined LICENSE for all vendored packages.
+        "LICENSE",
         # Multi-processing note isn't relevant, as bigframes is responsible for
         # creating clients, not the end user.
         "docs/multiprocessing.rst",
diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py
index 9171ac78a4..5cba045ce4 100644
--- a/samples/snippets/conftest.py
+++ b/samples/snippets/conftest.py
@@ -76,6 +76,11 @@ def dataset_id_eu(bigquery_client: bigquery.Client, project_id: str) -> Iterator
     bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)
 
 
+@pytest.fixture(scope="session")
+def gcs_dst_bucket() -> str:
+    return "gs://bigframes_blob_test"
+
+
 @pytest.fixture
 def random_model_id(
     bigquery_client: bigquery.Client, project_id: str, dataset_id: str
diff --git a/samples/snippets/create_multiple_timeseries_forecasting_model_test.py b/samples/snippets/create_multiple_timeseries_forecasting_model_test.py
index b749c37d50..0ce38e1a85 100644
--- a/samples/snippets/create_multiple_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_multiple_timeseries_forecasting_model_test.py
@@ -73,26 +73,103 @@ def test_multiple_timeseries_forecasting_model(random_model_id: str) -> None:
     from bigframes.ml import forecasting
     import bigframes.pandas as bpd
 
+    model = forecasting.ARIMAPlus(
+        # To reduce the query runtime with the compromise of a potential slight
+        # drop in model quality, you could decrease the value of the
+        # auto_arima_max_order. This shrinks the search space of hyperparameter
+        # tuning in the auto.ARIMA algorithm.
+        auto_arima_max_order=5,
+    )
+
     df = bpd.read_gbq("bigquery-public-data.new_york.citibike_trips")
 
+    # This query creates twelve time series models, one for each of the twelve
+    # Citi Bike start stations in the input data. If you remove this row
+    # filter, there would be 600+ time series to forecast.
+    df = df[df["start_station_name"].str.contains("Central Park")]
+
     features = bpd.DataFrame(
         {
-            "num_trips": df.starttime,
+            "start_station_name": df["start_station_name"],
+            "num_trips": df["starttime"],
             "date": df["starttime"].dt.date,
         }
     )
-    num_trips = features.groupby(["date"], as_index=False).count()
-    model = forecasting.ARIMAPlus()
+    num_trips = features.groupby(
+        ["start_station_name", "date"],
+        as_index=False,
+    ).count()
 
     X = num_trips["date"].to_frame()
     y = num_trips["num_trips"].to_frame()
 
-    model.fit(X, y)
+    model.fit(
+        X,
+        y,
+        # The input data that you want to get forecasts for,
+        # in this case the Citi Bike station, as represented by the
+        # start_station_name column.
+        id_col=num_trips["start_station_name"].to_frame(),
+    )
+
     # The model.fit() call above created a temporary model.
     # Use the to_gbq() method to write to a permanent location.
-
     model.to_gbq(
         your_model_id,  # For example: "bqml_tutorial.nyc_citibike_arima_model",
         replace=True,
     )
     # [END bigquery_dataframes_bqml_arima_multiple_step_3_fit]
+
+    # [START bigquery_dataframes_bqml_arima_multiple_step_4_evaluate]
+    # Evaluate the time series models by using the summary() function. The summary()
+    # function shows you the evaluation metrics of all the candidate models evaluated
+    # during the process of automatic hyperparameter tuning.
+    summary = model.summary()
+    print(summary.peek())
+
+    # Expected output:
+    #    start_station_name                  non_seasonal_p  non_seasonal_d   non_seasonal_q  has_drift  log_likelihood           AIC     variance ...
+    # 1         Central Park West & W 72 St               0               1                5      False    -1966.449243   3944.898487  1215.689281 ...
+    # 8            Central Park W & W 96 St               0               0                5      False     -274.459923    562.919847   655.776577 ...
+    # 9        Central Park West & W 102 St               0               0                0      False     -226.639918    457.279835    258.83582 ...
+    # 11        Central Park West & W 76 St               1               1                2      False    -1700.456924   3408.913848   383.254161 ...
+    # 4   Grand Army Plaza & Central Park S               0               1                5      False    -5507.553498  11027.106996   624.138741 ...
+    # [END bigquery_dataframes_bqml_arima_multiple_step_4_evaluate]
+
+    # [START bigquery_dataframes_bqml_arima_multiple_step_5_coefficients]
+    coef = model.coef_
+    print(coef.peek())
+
+    # Expected output:
+    #    start_station_name                                              ar_coefficients                                   ma_coefficients intercept_or_drift
+    # 5    Central Park West & W 68 St                                                [] [-0.41014089  0.21979212 -0.59854213 -0.251438...                0.0
+    # 6         Central Park S & 6 Ave                                                [] [-0.71488957 -0.36835772  0.61008532  0.183290...                0.0
+    # 0    Central Park West & W 85 St                                                [] [-0.39270166 -0.74494638  0.76432596  0.489146...                0.0
+    # 3    W 82 St & Central Park West                         [-0.50219511 -0.64820817]             [-0.20665325  0.67683137 -0.68108631]                0.0
+    # 11  W 106 St & Central Park West [-0.70442887 -0.66885553 -0.25030325 -0.34160669]                                                []                0.0
+    # [END bigquery_dataframes_bqml_arima_multiple_step_5_coefficients]
+
+    # [START bigquery_dataframes_bqml_arima_multiple_step_6_forecast]
+    prediction = model.predict(horizon=3, confidence_level=0.9)
+
+    print(prediction.peek())
+    # Expected output:
+    #            forecast_timestamp                             start_station_name  forecast_value  standard_error  confidence_level ...
+    # 4   2016-10-01 00:00:00+00:00                         Central Park S & 6 Ave      302.377201       32.572948               0.9 ...
+    # 14  2016-10-02 00:00:00+00:00  Central Park North & Adam Clayton Powell Blvd      263.917567       45.284082               0.9 ...
+    # 1   2016-09-25 00:00:00+00:00                    Central Park West & W 85 St      189.574706       39.874856               0.9 ...
+    # 20  2016-10-02 00:00:00+00:00                    Central Park West & W 72 St      175.474862       40.940794               0.9 ...
+    # 12  2016-10-01 00:00:00+00:00                   W 106 St & Central Park West        63.88163       18.088868               0.9 ...
+    # [END bigquery_dataframes_bqml_arima_multiple_step_6_forecast]
+    # [START bigquery_dataframes_bqml_arima_multiple_step_7_explain]
+    explain = model.predict_explain(horizon=3, confidence_level=0.9)
+
+    print(explain.peek(5))
+    # Expected output:
+    #   time_series_timestamp	        start_station_name	            time_series_type	    time_series_data	    time_series_adjusted_data	    standard_error	    confidence_level	    prediction_interval_lower_bound	    prediction_interval_upper_bound	    trend	    seasonal_period_yearly	    seasonal_period_quarterly	    seasonal_period_monthly	    seasonal_period_weekly	    seasonal_period_daily	    holiday_effect	    spikes_and_dips	    step_changes	    residual
+    # 0	2013-07-01 00:00:00+00:00	Central Park S & 6 Ave	                history	                  69.0	                   154.168527	              32.572948	             <NA>	                        <NA>	                            <NA>	                 0.0	          35.477484	                       <NA>	                        <NA>	                  -28.402102	                 <NA>	                <NA>	               0.0	         -85.168527	        147.093145
+    # 1	2013-07-01 00:00:00+00:00	Grand Army Plaza & Central Park S	    history	                  79.0	                      79.0	                  24.982769	             <NA>	                        <NA>	                            <NA>	                 0.0	          43.46428	                       <NA>	                        <NA>	                  -30.01599	                     <NA>	                <NA>	               0.0	            0.0	             65.55171
+    # 2	2013-07-02 00:00:00+00:00	Central Park S & 6 Ave	                history	                  180.0	                   204.045651	              32.572948	             <NA>	                        <NA>	                            <NA>	              147.093045	      72.498327	                       <NA>	                        <NA>	                  -15.545721	                 <NA>	                <NA>	               0.0	         -85.168527	         61.122876
+    # 3	2013-07-02 00:00:00+00:00	Grand Army Plaza & Central Park S	    history	                  129.0	                    99.556269	              24.982769	             <NA>	                        <NA>	                            <NA>	               65.551665	      45.836432	                       <NA>	                        <NA>	                  -11.831828	                 <NA>	                <NA>	               0.0	            0.0	             29.443731
+    # 4	2013-07-03 00:00:00+00:00	Central Park S & 6 Ave	                history	                  115.0	                   205.968236	              32.572948	             <NA>	                        <NA>	                            <NA>	               191.32754	      59.220766	                       <NA>	                        <NA>	                  -44.580071	                 <NA>	                <NA>	               0.0	         -85.168527	        -5.799709
+    # [END bigquery_dataframes_bqml_arima_multiple_step_7_explain]
diff --git a/samples/snippets/gen_ai_model_test.py b/samples/snippets/gen_ai_model_test.py
deleted file mode 100644
index 5cdcd6d3a7..0000000000
--- a/samples/snippets/gen_ai_model_test.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright 2023 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-def test_llm_model() -> None:
-    # Determine project id, in this case prefer the one set in the environment
-    # variable GOOGLE_CLOUD_PROJECT (if any)
-    import os
-
-    PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev")
-    REGION = "us"
-    CONN_NAME = "bigframes-default-connection"
-
-    # [START bigquery_dataframes_gen_ai_model]
-    from bigframes.ml.llm import PaLM2TextGenerator
-    import bigframes.pandas as bpd
-
-    # Create the LLM model
-    session = bpd.get_global_session()
-    connection = f"{PROJECT_ID}.{REGION}.{CONN_NAME}"
-    model = PaLM2TextGenerator(session=session, connection_name=connection)
-
-    df_api = bpd.read_csv("gs://cloud-samples-data/vertex-ai/bigframe/df.csv")
-
-    # Prepare the prompts and send them to the LLM model for prediction
-    df_prompt_prefix = "Generate Pandas sample code for DataFrame."
-    df_prompt = df_prompt_prefix + df_api["API"]
-
-    # Predict using the model
-    df_pred = model.predict(df_prompt.to_frame(), max_output_tokens=1024)
-    # [END bigquery_dataframes_gen_ai_model]
-    assert df_pred["ml_generate_text_llm_result"] is not None
-    assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None
diff --git a/samples/snippets/multimodal_test.py b/samples/snippets/multimodal_test.py
new file mode 100644
index 0000000000..e5236317e2
--- /dev/null
+++ b/samples/snippets/multimodal_test.py
@@ -0,0 +1,118 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
+    # destination folder must be in a GCS bucket that the BQ connection service account (default or user provided) has write access to.
+    dst_bucket = gcs_dst_bucket
+    # [START bigquery_dataframes_multimodal_dataframe_create]
+    import bigframes
+
+    # Flag to enable the feature
+    bigframes.options.experiments.blob = True
+
+    import bigframes.pandas as bpd
+
+    # Create blob columns from wildcard path.
+    df_image = bpd.from_glob_path(
+        "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*", name="image"
+    )
+    # Other ways are: from string uri column
+    # df = bpd.DataFrame({"uri": ["gs://<my_bucket>/<my_file_0>", "gs://<my_bucket>/<my_file_1>"]})
+    # df["blob_col"] = df["uri"].str.to_blob()
+
+    # From an existing object table
+    # df = bpd.read_gbq_object_table("<my_object_table>", name="blob_col")
+
+    # Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame
+    df_image = df_image.head(5)
+    df_image
+    # [END bigquery_dataframes_multimodal_dataframe_create]
+
+    # [START bigquery_dataframes_multimodal_dataframe_merge]
+    # Combine unstructured data with structured data
+    df_image["author"] = ["alice", "bob", "bob", "alice", "bob"]  # type: ignore
+    df_image["content_type"] = df_image["image"].blob.content_type()
+    df_image["size"] = df_image["image"].blob.size()
+    df_image["updated"] = df_image["image"].blob.updated()
+    df_image
+
+    # Filter images and display, you can also display audio and video types
+    df_image[df_image["author"] == "alice"]["image"].blob.display()
+    # [END bigquery_dataframes_multimodal_dataframe_merge]
+
+    # [START bigquery_dataframes_multimodal_dataframe_image_transform]
+    df_image["blurred"] = df_image["image"].blob.image_blur(
+        (20, 20), dst=f"{dst_bucket}/image_blur_transformed/"
+    )
+    df_image["resized"] = df_image["image"].blob.image_resize(
+        (300, 200), dst=f"{dst_bucket}/image_resize_transformed/"
+    )
+    df_image["normalized"] = df_image["image"].blob.image_normalize(
+        alpha=50.0,
+        beta=150.0,
+        norm_type="minmax",
+        dst=f"{dst_bucket}/image_normalize_transformed/",
+    )
+
+    # You can also chain functions together
+    df_image["blur_resized"] = df_image["blurred"].blob.image_resize(
+        (300, 200), dst=f"{dst_bucket}/image_blur_resize_transformed/"
+    )
+    df_image
+    # [END bigquery_dataframes_multimodal_dataframe_image_transform]
+
+    # [START bigquery_dataframes_multimodal_dataframe_ai]
+    from bigframes.ml import llm
+
+    gemini = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002")
+
+    # Deal with first 2 images as example
+    df_image = df_image.head(2)
+
+    # Ask the same question on the images
+    df_image = df_image.head(2)
+    answer = gemini.predict(df_image, prompt=["what item is it?", df_image["image"]])
+    answer[["ml_generate_text_llm_result", "image"]]
+
+    # Ask different questions
+    df_image["question"] = [  # type: ignore
+        "what item is it?",
+        "what color is the picture?",
+    ]
+    answer_alt = gemini.predict(
+        df_image, prompt=[df_image["question"], df_image["image"]]
+    )
+    answer_alt[["ml_generate_text_llm_result", "image"]]
+
+    # Generate embeddings on images
+    embed_model = llm.MultimodalEmbeddingGenerator()
+    embeddings = embed_model.predict(df_image["image"])
+    embeddings
+    # [END bigquery_dataframes_multimodal_dataframe_ai]
+
+    # [START bigquery_dataframes_multimodal_dataframe_pdf_chunk]
+    # PDF chunking
+    df_pdf = bpd.from_glob_path(
+        "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*", name="pdf"
+    )
+    df_pdf["chunked"] = df_pdf["pdf"].blob.pdf_chunk()
+    chunked = df_pdf["chunked"].explode()
+    chunked
+    # [END bigquery_dataframes_multimodal_dataframe_pdf_chunk]
+    assert df_image is not None
+    assert answer is not None
+    assert answer_alt is not None
+    assert embeddings is not None
+    assert chunked is not None
diff --git a/samples/snippets/remote_function.py b/samples/snippets/remote_function.py
index c35daf35fc..3a7031ef89 100644
--- a/samples/snippets/remote_function.py
+++ b/samples/snippets/remote_function.py
@@ -47,9 +47,8 @@ def run_remote_function_and_read_gbq_function(project_id: str) -> None:
     # of the penguins, which is a real number, into a category, which is a
     # string.
     @bpd.remote_function(
-        float,
-        str,
         reuse=False,
+        cloud_function_service_account="default",
     )
     def get_bucket(num: float) -> str:
         if not num:
@@ -91,10 +90,9 @@ def get_bucket(num: float) -> str:
     # as a remote function. The custom function in this example has external
     # package dependency, which can be specified via `packages` parameter.
     @bpd.remote_function(
-        str,
-        str,
         reuse=False,
         packages=["cryptography"],
+        cloud_function_service_account="default",
     )
     def get_hash(input: str) -> str:
         from cryptography.fernet import Fernet
diff --git a/samples/snippets/text_generation_test.py b/samples/snippets/text_generation_test.py
deleted file mode 100644
index c4df1dde3b..0000000000
--- a/samples/snippets/text_generation_test.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2024 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-def test_llm_text_generation() -> None:
-    # Determine project id, in this case prefer the one set in the environment
-    # variable GOOGLE_CLOUD_PROJECT (if any)
-    import os
-
-    PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev")
-    LOCATION = "US"
-
-    # [START bigquery_dataframes_generate_text_tutorial_create_remote_model]
-    import bigframes
-    from bigframes.ml.llm import PaLM2TextGenerator
-
-    bigframes.options.bigquery.project = PROJECT_ID
-    bigframes.options.bigquery.location = LOCATION
-
-    model = PaLM2TextGenerator()
-    # [END bigquery_dataframes_generate_text_tutorial_create_remote_model]
-    assert model is not None
-
-    # [START bigquery_dataframes_generate_text_tutorial_perform_keyword_extraction]
-    import bigframes.pandas as bpd
-
-    df = bpd.read_gbq("bigquery-public-data.imdb.reviews", max_results=5)
-    df_prompt_prefix = "Extract the key words from the text below: "
-    df_prompt = df_prompt_prefix + df["review"]
-
-    # Predict using the model
-    df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100)
-    df_pred.peek(5)
-    # [END bigquery_dataframes_generate_text_tutorial_perform_keyword_extraction]
-    # peek() is used to show a preview of the results. If the output
-    # of this sample changes, also update the screenshot for the associated
-    # tutorial on cloud.google.com.
-    assert df_pred["ml_generate_text_llm_result"] is not None
-    assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None
-
-    # [START bigquery_dataframes_generate_text_tutorial_perform_sentiment_analysis]
-    import bigframes.pandas as bpd
-
-    df = bpd.read_gbq("bigquery-public-data.imdb.reviews", max_results=5)
-    df_prompt_prefix = "perform sentiment analysis on the following text, return one the following categories: positive, negative: "
-    df_prompt = df_prompt_prefix + df["review"]
-
-    # Predict using the model
-    df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100)
-    df_pred.peek(5)
-    # [END bigquery_dataframes_generate_text_tutorial_perform_sentiment_analysis]
-    # peek() is used to show a preview of the results. If the output
-    # of this sample changes, also update the screenshot for the associated
-    # tutorial on cloud.google.com.
-
-    assert df_pred["ml_generate_text_llm_result"] is not None
-    assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None
diff --git a/scratch/.gitignore b/scratch/.gitignore
new file mode 100644
index 0000000000..b813ccd98e
--- /dev/null
+++ b/scratch/.gitignore
@@ -0,0 +1,2 @@
+# Ignore all files in this directory.
+*
diff --git a/setup.py b/setup.py
index 34e013c9a3..edc77e11b6 100644
--- a/setup.py
+++ b/setup.py
@@ -41,20 +41,19 @@
     "google-auth >=2.15.0,<3.0dev",
     "google-cloud-bigtable >=2.24.0",
     "google-cloud-pubsub >=2.21.4",
-    "google-cloud-bigquery[bqstorage,pandas] >=3.18.0",
+    "google-cloud-bigquery[bqstorage,pandas] >=3.31.0",
     "google-cloud-functions >=1.12.0",
     "google-cloud-bigquery-connection >=1.12.0",
     "google-cloud-iam >=2.12.1",
     "google-cloud-resource-manager >=1.10.3",
     "google-cloud-storage >=2.0.0",
-    # Upper bound due to no windows build for 1.1.2
-    "jellyfish >=0.8.9,<1.1.2",
     "numpy >=1.24.0",
     "pandas >=1.5.3",
-    "pandas-gbq >=0.26.0",
+    "pandas-gbq >=0.26.1",
     "pyarrow >=15.0.2",
     "pydata-google-auth >=1.8.2",
     "requests >=2.27.1",
+    "shapely >=1.8.5",
     "sqlglot >=23.6.3",
     "tabulate >=0.9",
     "ipywidgets >=7.7.1",
@@ -63,7 +62,6 @@
     "db-dtypes >=1.4.2",
     # For vendored ibis-framework.
     "atpublic>=2.3,<6",
-    "parsy>=2,<3",
     "python-dateutil>=2.8.2,<3",
     "pytz>=2022.7",
     "toolz>=0.11,<2",
@@ -79,10 +77,10 @@
     # Packages required for basic development flow.
     "dev": [
         "pytest",
-        "pytest-mock",
         "pre-commit",
         "nox",
         "google-cloud-testutils",
+        "freezegun",
     ],
 }
 extras["all"] = list(sorted(frozenset(itertools.chain.from_iterable(extras.values()))))
@@ -117,6 +115,7 @@
     version=version_id,
     description=description,
     long_description=readme,
+    long_description_content_type="text/x-rst",
     author="Google LLC",
     author_email="bigframes-feedback@google.com",
     license="Apache 2.0",
diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt
index 8c7c69efa7..dff245d176 100644
--- a/testing/constraints-3.9.txt
+++ b/testing/constraints-3.9.txt
@@ -6,20 +6,20 @@ geopandas==0.12.2
 google-auth==2.15.0
 google-cloud-bigtable==2.24.0
 google-cloud-pubsub==2.21.4
-google-cloud-bigquery==3.18.0
+google-cloud-bigquery==3.31.0
 google-cloud-functions==1.12.0
 google-cloud-bigquery-connection==1.12.0
 google-cloud-iam==2.12.1
 google-cloud-resource-manager==1.10.3
 google-cloud-storage==2.0.0
-jellyfish==0.8.9
 numpy==1.24.0
 pandas==1.5.3
-pandas-gbq==0.26.0
+pandas-gbq==0.26.1
 pyarrow==15.0.2
 pydata-google-auth==1.8.2
 requests==2.27.1
 scikit-learn==1.2.2
+shapely==1.8.5
 sqlglot==23.6.3
 tabulate==0.9
 ipywidgets==7.7.1
@@ -28,7 +28,6 @@ matplotlib==3.7.1
 db-dtypes==1.4.2
 # For vendored ibis-framework.
 atpublic==2.3
-parsy==2.0
 python-dateutil==2.8.2
 pytz==2022.7
 toolz==0.11
diff --git a/tests/data/ratings.jsonl b/tests/data/ratings.jsonl
new file mode 100644
index 0000000000..b7cd350d08
--- /dev/null
+++ b/tests/data/ratings.jsonl
@@ -0,0 +1,20 @@
+{"user_id": 1, "item_id": 2, "rating": 4.0}
+{"user_id": 1, "item_id": 5, "rating": 3.0}
+{"user_id": 2, "item_id": 1, "rating": 5.0}
+{"user_id": 2, "item_id": 3, "rating": 2.0}
+{"user_id": 3, "item_id": 4, "rating": 4.5}
+{"user_id": 3, "item_id": 7, "rating": 3.5}
+{"user_id": 4, "item_id": 2, "rating": 1.0}
+{"user_id": 4, "item_id": 8, "rating": 5.0}
+{"user_id": 5, "item_id": 3, "rating": 4.0}
+{"user_id": 5, "item_id": 9, "rating": 2.5}
+{"user_id": 6, "item_id": 1, "rating": 3.0}
+{"user_id": 6, "item_id": 6, "rating": 4.5}
+{"user_id": 7, "item_id": 5, "rating": 5.0}
+{"user_id": 7, "item_id": 10, "rating": 1.5}
+{"user_id": 8, "item_id": 4, "rating": 2.0}
+{"user_id": 8, "item_id": 7, "rating": 4.0}
+{"user_id": 9, "item_id": 2, "rating": 3.5}
+{"user_id": 9, "item_id": 9, "rating": 5.0}
+{"user_id": 10, "item_id": 3, "rating": 4.5}
+{"user_id": 10, "item_id": 8, "rating": 2.5}
diff --git a/tests/data/ratings_schema.json b/tests/data/ratings_schema.json
new file mode 100644
index 0000000000..9fd0101ec8
--- /dev/null
+++ b/tests/data/ratings_schema.json
@@ -0,0 +1,17 @@
+[
+    {
+      "mode": "NULLABLE",
+      "name": "user_id",
+      "type": "STRING"
+    },
+    {
+      "mode": "NULLABLE",
+      "name": "item_id",
+      "type": "INT64"
+    },
+    {
+      "mode": "NULLABLE",
+      "name": "rating",
+      "type": "FLOAT"
+    }
+]
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 398ee8a6b2..19f2a79b65 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -185,8 +185,27 @@ def session_tokyo(tokyo_location: str) -> Generator[bigframes.Session, None, Non
 
 
 @pytest.fixture(scope="session")
-def bq_connection(bigquery_client: bigquery.Client) -> str:
-    return f"{bigquery_client.project}.{bigquery_client.location}.bigframes-rf-conn"
+def test_session() -> Generator[bigframes.Session, None, None]:
+    context = bigframes.BigQueryOptions(
+        client_endpoints_override={
+            "bqclient": "https://test-bigquery.sandbox.google.com",
+            "bqconnectionclient": "test-bigqueryconnection.sandbox.googleapis.com",
+            "bqstoragereadclient": "test-bigquerystorage-grpc.sandbox.googleapis.com",
+        },
+    )
+    session = bigframes.Session(context=context)
+    yield session
+    session.close()
+
+
+@pytest.fixture(scope="session")
+def bq_connection_name() -> str:
+    return "bigframes-rf-conn"
+
+
+@pytest.fixture(scope="session")
+def bq_connection(bigquery_client: bigquery.Client, bq_connection_name: str) -> str:
+    return f"{bigquery_client.project}.{bigquery_client.location}.{bq_connection_name}"
 
 
 @pytest.fixture(scope="session", autouse=True)
@@ -315,6 +334,7 @@ def load_test_data_tables(
         ("repeated", "repeated_schema.json", "repeated.jsonl"),
         ("json", "json_schema.json", "json.jsonl"),
         ("penguins", "penguins_schema.json", "penguins.jsonl"),
+        ("ratings", "ratings_schema.json", "ratings.jsonl"),
         ("time_series", "time_series_schema.json", "time_series.jsonl"),
         ("hockey_players", "hockey_players.json", "hockey_players.jsonl"),
         ("matrix_2by3", "matrix_2by3.json", "matrix_2by3.jsonl"),
@@ -411,6 +431,11 @@ def penguins_table_id(test_data_tables) -> str:
     return test_data_tables["penguins"]
 
 
+@pytest.fixture(scope="session")
+def ratings_table_id(test_data_tables) -> str:
+    return test_data_tables["ratings"]
+
+
 @pytest.fixture(scope="session")
 def urban_areas_table_id(test_data_tables) -> str:
     return test_data_tables["urban_areas"]
@@ -460,7 +485,7 @@ def nested_structs_df(
 
 
 @pytest.fixture(scope="session")
-def nested_structs_pandas_df() -> pd.DataFrame:
+def nested_structs_pandas_df(nested_structs_pandas_type: pd.ArrowDtype) -> pd.DataFrame:
     """pd.DataFrame pointing at test data."""
 
     df = pd.read_json(
@@ -468,6 +493,7 @@ def nested_structs_pandas_df() -> pd.DataFrame:
         lines=True,
     )
     df = df.set_index("id")
+    df["person"] = df["person"].astype(nested_structs_pandas_type)
     return df
 
 
@@ -763,6 +789,14 @@ def penguins_df_null_index(
     return unordered_session.read_gbq(penguins_table_id)
 
 
+@pytest.fixture(scope="session")
+def ratings_df_default_index(
+    ratings_table_id: str, session: bigframes.Session
+) -> bigframes.dataframe.DataFrame:
+    """DataFrame pointing at test data."""
+    return session.read_gbq(ratings_table_id)
+
+
 @pytest.fixture(scope="session")
 def time_series_df_default_index(
     time_series_table_id: str, session: bigframes.Session
@@ -890,8 +924,8 @@ def llm_text_pandas_df():
 
 
 @pytest.fixture(scope="session")
-def llm_text_df(session, llm_text_pandas_df):
-    return session.read_pandas(llm_text_pandas_df)
+def llm_text_df(test_session, llm_text_pandas_df):
+    return test_session.read_pandas(llm_text_pandas_df)
 
 
 @pytest.fixture(scope="session")
@@ -1464,13 +1498,14 @@ def images_uris() -> list[str]:
 
 @pytest.fixture(scope="session")
 def images_mm_df(
-    images_gcs_path, session: bigframes.Session, bq_connection: str
+    images_uris, test_session: bigframes.Session, bq_connection: str
 ) -> bpd.DataFrame:
     bigframes.options.experiments.blob = True
 
-    return session.from_glob_path(
-        images_gcs_path, name="blob_col", connection=bq_connection
+    blob_series = bpd.Series(images_uris, session=test_session).str.to_blob(
+        connection=bq_connection
     )
+    return blob_series.rename("blob_col").to_frame()
 
 
 @pytest.fixture()
@@ -1489,8 +1524,10 @@ def pdf_gcs_path() -> str:
 
 @pytest.fixture(scope="session")
 def pdf_mm_df(
-    pdf_gcs_path, session: bigframes.Session, bq_connection: str
+    pdf_gcs_path, test_session: bigframes.Session, bq_connection: str
 ) -> bpd.DataFrame:
     bigframes.options.experiments.blob = True
 
-    return session.from_glob_path(pdf_gcs_path, name="pdf", connection=bq_connection)
+    return test_session.from_glob_path(
+        pdf_gcs_path, name="pdf", connection=bq_connection
+    )
diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py
index d428299a96..a2c3f2b85f 100644
--- a/tests/system/large/blob/test_function.py
+++ b/tests/system/large/blob/test_function.py
@@ -25,12 +25,6 @@
 from bigframes import dtypes
 import bigframes.pandas as bpd
 
-# TODO(shobs): restore these tests after the managed udf cleanup issue is
-# resolved in the test project
-pytestmark = pytest.mark.skip(
-    reason="temporarily disable to debug managed udf cleanup in the test project"
-)
-
 
 @pytest.fixture(scope="function")
 def images_output_folder() -> Generator[str, None, None]:
@@ -61,11 +55,11 @@ def test_blob_image_blur_to_series(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
     images_output_uris: list[str],
-    session: bigframes.Session,
+    test_session: bigframes.Session,
 ):
     bigframes.options.experiments.blob = True
 
-    series = bpd.Series(images_output_uris, session=session).str.to_blob(
+    series = bpd.Series(images_output_uris, session=test_session).str.to_blob(
         connection=bq_connection
     )
 
@@ -135,11 +129,11 @@ def test_blob_image_resize_to_series(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
     images_output_uris: list[str],
-    session: bigframes.Session,
+    test_session: bigframes.Session,
 ):
     bigframes.options.experiments.blob = True
 
-    series = bpd.Series(images_output_uris, session=session).str.to_blob(
+    series = bpd.Series(images_output_uris, session=test_session).str.to_blob(
         connection=bq_connection
     )
 
@@ -211,11 +205,11 @@ def test_blob_image_normalize_to_series(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
     images_output_uris: list[str],
-    session: bigframes.Session,
+    test_session: bigframes.Session,
 ):
     bigframes.options.experiments.blob = True
 
-    series = bpd.Series(images_output_uris, session=session).str.to_blob(
+    series = bpd.Series(images_output_uris, session=test_session).str.to_blob(
         connection=bq_connection
     )
 
diff --git a/tests/system/large/functions/test_managed_function.py b/tests/system/large/functions/test_managed_function.py
index eabafd96fb..a15bce83ad 100644
--- a/tests/system/large/functions/test_managed_function.py
+++ b/tests/system/large/functions/test_managed_function.py
@@ -16,17 +16,14 @@
 import pandas
 import pyarrow
 import pytest
+import test_utils.prefixer
 
 import bigframes
 import bigframes.exceptions as bfe
 import bigframes.pandas as bpd
 from tests.system.utils import cleanup_function_assets
 
-# TODO(shobs): restore these tests after the managed udf cleanup issue is
-# resolved in the test project
-pytestmark = pytest.mark.skip(
-    reason="temporarily disable to debug managed udf cleanup in the test project"
-)
+prefixer = test_utils.prefixer.Prefixer("bigframes", "")
 
 
 def test_managed_function_multiply_with_ibis(
@@ -43,6 +40,7 @@ def test_managed_function_multiply_with_ibis(
             input_types=[int, int],
             output_type=int,
             dataset=dataset_id,
+            name=prefixer.create_prefix(),
         )
         def multiply(x, y):
             return x * y
@@ -93,6 +91,7 @@ def test_managed_function_stringify_with_ibis(
             input_types=[int],
             output_type=str,
             dataset=dataset_id,
+            name=prefixer.create_prefix(),
         )
         def stringify(x):
             return f"I got {x}"
@@ -129,7 +128,10 @@ def stringify(x):
 def test_managed_function_array_output(session, scalars_dfs, dataset_id):
     try:
 
-        @session.udf(dataset=dataset_id)
+        @session.udf(
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
+        )
         def featurize(x: int) -> list[float]:
             return [float(i) for i in [x, x + 1, x + 2]]
 
@@ -166,13 +168,10 @@ def featurize(x: int) -> list[float]:
         cleanup_function_assets(featurize, session.bqclient, ignore_failures=False)
 
 
-def test_managed_function_series_apply(
-    session,
-    scalars_dfs,
-):
+def test_managed_function_series_apply(session, dataset_id, scalars_dfs):
     try:
 
-        @session.udf()
+        @session.udf(dataset=dataset_id, name=prefixer.create_prefix())
         def foo(x: int) -> bytes:
             return bytes(abs(x))
 
@@ -223,13 +222,14 @@ def foo(x: int) -> bytes:
 
 def test_managed_function_series_apply_array_output(
     session,
+    dataset_id,
     scalars_dfs,
 ):
     try:
 
         with pytest.warns(bfe.PreviewWarning, match="udf is in preview."):
 
-            @session.udf()
+            @session.udf(dataset=dataset_id, name=prefixer.create_prefix())
             def foo_list(x: int) -> list[float]:
                 return [float(abs(x)), float(abs(x) + 1)]
 
@@ -252,7 +252,7 @@ def foo_list(x: int) -> list[float]:
         cleanup_function_assets(foo_list, session.bqclient, ignore_failures=False)
 
 
-def test_managed_function_series_combine(session, scalars_dfs):
+def test_managed_function_series_combine(session, dataset_id, scalars_dfs):
     try:
         # This function is deliberately written to not work with NA input.
         def add(x: int, y: int) -> int:
@@ -267,7 +267,9 @@ def add(x: int, y: int) -> int:
         # make sure there are NA values in the test column.
         assert any([pandas.isna(val) for val in bf_df[int_col_name_with_nulls]])
 
-        add_managed_func = session.udf()(add)
+        add_managed_func = session.udf(
+            dataset=dataset_id, name=prefixer.create_prefix()
+        )(add)
 
         # with nulls in the series the managed function application would fail.
         with pytest.raises(
@@ -310,7 +312,7 @@ def add(x: int, y: int) -> int:
         )
 
 
-def test_managed_function_series_combine_array_output(session, scalars_dfs):
+def test_managed_function_series_combine_array_output(session, dataset_id, scalars_dfs):
     try:
 
         def add_list(x: int, y: int) -> list[int]:
@@ -325,7 +327,9 @@ def add_list(x: int, y: int) -> list[int]:
         # Make sure there are NA values in the test column.
         assert any([pandas.isna(val) for val in bf_df[int_col_name_with_nulls]])
 
-        add_list_managed_func = session.udf()(add_list)
+        add_list_managed_func = session.udf(
+            dataset=dataset_id, name=prefixer.create_prefix()
+        )(add_list)
 
         # After filtering out nulls the managed function application should work
         # similar to pandas.
@@ -373,7 +377,7 @@ def add_list(x: int, y: int) -> list[int]:
         )
 
 
-def test_managed_function_dataframe_map(session, scalars_dfs):
+def test_managed_function_dataframe_map(session, dataset_id, scalars_dfs):
     try:
 
         def add_one(x):
@@ -382,6 +386,8 @@ def add_one(x):
         mf_add_one = session.udf(
             input_types=[int],
             output_type=int,
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
         )(add_one)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -407,9 +413,7 @@ def add_one(x):
         cleanup_function_assets(mf_add_one, session.bqclient, ignore_failures=False)
 
 
-def test_managed_function_dataframe_map_array_output(
-    session, scalars_dfs, dataset_id_permanent
-):
+def test_managed_function_dataframe_map_array_output(session, scalars_dfs, dataset_id):
     try:
 
         def add_one_list(x):
@@ -418,6 +422,8 @@ def add_one_list(x):
         mf_add_one_list = session.udf(
             input_types=[int],
             output_type=list[int],
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
         )(add_one_list)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -448,7 +454,7 @@ def add_one_list(x):
         )
 
 
-def test_managed_function_dataframe_apply_axis_1(session, scalars_dfs):
+def test_managed_function_dataframe_apply_axis_1(session, dataset_id, scalars_dfs):
     try:
         scalars_df, scalars_pandas_df = scalars_dfs
         series = scalars_df["int64_too"]
@@ -460,6 +466,8 @@ def add_ints(x, y):
         add_ints_mf = session.udf(
             input_types=[int, int],
             output_type=int,
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
         )(add_ints)
         assert add_ints_mf.bigframes_bigquery_function  # type: ignore
 
@@ -484,7 +492,7 @@ def add_ints(x, y):
         cleanup_function_assets(add_ints_mf, session.bqclient, ignore_failures=False)
 
 
-def test_managed_function_dataframe_apply_axis_1_array_output(session):
+def test_managed_function_dataframe_apply_axis_1_array_output(session, dataset_id):
     bf_df = bigframes.dataframe.DataFrame(
         {
             "Id": [1, 2, 3],
@@ -504,7 +512,12 @@ def test_managed_function_dataframe_apply_axis_1_array_output(session):
 
     try:
 
-        @session.udf(input_types=[int, float, str], output_type=list[str])
+        @session.udf(
+            input_types=[int, float, str],
+            output_type=list[str],
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
+        )
         def foo(x, y, z):
             return [str(x), str(y), z]
 
@@ -587,3 +600,45 @@ def foo(x, y, z):
     finally:
         # Clean up the gcp assets created for the managed function.
         cleanup_function_assets(foo, session.bqclient, ignore_failures=False)
+
+
+@pytest.mark.parametrize(
+    "connection_fixture",
+    [
+        "bq_connection_name",
+        "bq_connection",
+    ],
+)
+def test_managed_function_with_connection(
+    session, scalars_dfs, dataset_id, request, connection_fixture
+):
+    try:
+        bigquery_connection = request.getfixturevalue(connection_fixture)
+
+        @session.udf(
+            bigquery_connection=bigquery_connection,
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
+        )
+        def foo(x: int) -> int:
+            return x + 10
+
+        # Function should still work normally.
+        assert foo(-2) == 8
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+
+        bf_result_col = scalars_df["int64_too"].apply(foo)
+        bf_result = (
+            scalars_df["int64_too"].to_frame().assign(result=bf_result_col).to_pandas()
+        )
+
+        pd_result_col = scalars_pandas_df["int64_too"].apply(foo)
+        pd_result = (
+            scalars_pandas_df["int64_too"].to_frame().assign(result=pd_result_col)
+        )
+
+        pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+    finally:
+        # Clean up the gcp assets created for the managed function.
+        cleanup_function_assets(foo, session.bqclient, ignore_failures=False)
diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py
index 1e5e7ede26..426813b0ff 100644
--- a/tests/system/large/functions/test_remote_function.py
+++ b/tests/system/large/functions/test_remote_function.py
@@ -17,11 +17,9 @@
 import inspect
 import math  # must keep this at top level to test udf referring global import
 import os.path
-import re
 import shutil
 import tempfile
 import textwrap
-import typing
 import warnings
 
 import google.api_core.exceptions
@@ -111,11 +109,14 @@ def test_remote_function_multiply_with_ibis(
     try:
 
         @session.remote_function(
+            # Make sure that the input/output types can be used positionally.
+            # This avoids the worst of the breaking change from 1.x to 2.x.
             [int, int],
             int,
             dataset_id,
-            bq_cf_connection,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )
         def multiply(x, y):
             return x * y
@@ -165,11 +166,14 @@ def test_remote_function_stringify_with_ibis(
     try:
 
         @session.remote_function(
+            # Make sure that the input/output types can be used positionally.
+            # This avoids the worst of the breaking change from 1.x to 2.x.
             [int],
             str,
             dataset_id,
-            bq_cf_connection,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )
         def stringify(x):
             return f"I got {x}"
@@ -213,11 +217,14 @@ def func(x, y):
             return x * abs(y % 4)
 
         remote_func = session.remote_function(
+            # Make sure that the input/output types can be used positionally.
+            # This avoids the worst of the breaking change from 1.x to 2.x.
             [str, int],
             str,
             dataset_id,
-            bq_cf_connection,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )(func)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -250,11 +257,14 @@ def func(x, y):
             return [len(x), abs(y % 4)]
 
         remote_func = session.remote_function(
+            # Make sure that the input/output types can be used positionally.
+            # This avoids the worst of the breaking change from 1.x to 2.x.
             [str, int],
             list[int],
             dataset_id,
-            bq_cf_connection,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )(func)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -284,11 +294,14 @@ def test_remote_function_decorator_with_bigframes_series(
     try:
 
         @session.remote_function(
+            # Make sure that the input/output types can be used positionally.
+            # This avoids the worst of the breaking change from 1.x to 2.x.
             [int],
             int,
             dataset_id,
-            bq_cf_connection,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )
         def square(x):
             return x * x
@@ -330,11 +343,14 @@ def add_one(x):
             return x + 1
 
         remote_add_one = session.remote_function(
+            # Make sure that the input/output types can be used positionally.
+            # This avoids the worst of the breaking change from 1.x to 2.x.
             [int],
             int,
             dataset_id,
-            bq_cf_connection,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )(add_one)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -380,7 +396,14 @@ def test_remote_function_input_types(session, scalars_dfs, input_types):
         def add_one(x):
             return x + 1
 
-        remote_add_one = session.remote_function(input_types, int, reuse=False)(add_one)
+        remote_add_one = session.remote_function(
+            # Make sure that the input/output types can be used positionally.
+            # This avoids the worst of the breaking change from 1.x to 2.x.
+            input_types,
+            int,
+            reuse=False,
+            cloud_function_service_account="default",
+        )(add_one)
         assert remote_add_one.input_dtypes == (bigframes.dtypes.INT_DTYPE,)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -406,11 +429,14 @@ def test_remote_function_explicit_dataset_not_created(
     try:
 
         @session.remote_function(
+            # Make sure that the input/output types can be used positionally.
+            # This avoids the worst of the breaking change from 1.x to 2.x.
             [int],
             int,
-            dataset_id_not_created,
-            bq_cf_connection,
+            dataset=dataset_id_not_created,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )
         def square(x):
             return x * x
@@ -459,11 +485,14 @@ def sign(num):
             return NO_SIGN
 
         remote_sign = session.remote_function(
+            # Make sure that the input/output types can be used positionally.
+            # This avoids the worst of the breaking change from 1.x to 2.x.
             [int],
             int,
             dataset_id,
-            bq_cf_connection,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )(sign)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -506,11 +535,14 @@ def circumference(radius):
             return 2 * mymath.pi * radius
 
         remote_circumference = session.remote_function(
+            # Make sure that the input/output types can be used positionally.
+            # This avoids the worst of the breaking change from 1.x to 2.x.
             [float],
             float,
             dataset_id,
-            bq_cf_connection,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )(circumference)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -555,11 +587,12 @@ def find_team(num):
             return _team_pi
 
         remote_find_team = session.remote_function(
-            [float],
-            str,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[float],
+            output_type=str,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )(find_team)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -627,11 +660,12 @@ def add_one(x):
         # The first time both the cloud function and the bq remote function don't
         # exist and would be created
         remote_add_one = session.remote_function(
-            [int],
-            int,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[int],
+            output_type=int,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             reuse=True,
+            cloud_function_service_account="default",
         )(add_one_uniq)
 
         # There should have been excactly one cloud function created at this point
@@ -697,11 +731,12 @@ def inner_test():
         # exist even though the remote function exists, and goes ahead and recreates
         # the cloud function
         remote_add_one = session.remote_function(
-            [int],
-            int,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[int],
+            output_type=int,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             reuse=True,
+            cloud_function_service_account="default",
         )(add_one_uniq)
 
         # There should be excactly one cloud function again
@@ -743,11 +778,12 @@ def is_odd(num):
             return flag
 
         is_odd_remote = session.remote_function(
-            [int],
-            bool,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[int],
+            output_type=bool,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )(is_odd)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -783,11 +819,12 @@ def is_odd(num):
             return flag
 
         is_odd_remote = session.remote_function(
-            [int],
-            bool,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[int],
+            output_type=bool,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )(is_odd)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -817,11 +854,12 @@ def test_remote_udf_lambda(session, scalars_dfs, dataset_id, bq_cf_connection):
         add_one_lambda = lambda x: x + 1  # noqa: E731
 
         add_one_lambda_remote = session.remote_function(
-            [int],
-            int,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[int],
+            output_type=int,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )(add_one_lambda)
 
         scalars_df, scalars_pandas_df = scalars_dfs
@@ -872,12 +910,13 @@ def square(x):
 
         # Create the remote function with the name provided explicitly
         square_remote = session.remote_function(
-            [int],
-            int,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[int],
+            output_type=int,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
             name=rf_name,
+            cloud_function_service_account="default",
         )(square)
 
         # The remote function should reflect the explicitly provided name
@@ -925,12 +964,13 @@ def pd_np_foo(x):
 
         # Create the remote function with the name provided explicitly
         pd_np_foo_remote = session.remote_function(
-            [int],
-            float,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[int],
+            output_type=float,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             reuse=False,
             packages=["numpy", "pandas >= 2.0.0"],
+            cloud_function_service_account="default",
         )(pd_np_foo)
 
         # The behavior of the created remote function should be as expected
@@ -1005,11 +1045,12 @@ def test_internal(rf, udf):
 
         # Create a new remote function with the name provided explicitly
         square_remote1 = session.remote_function(
-            [int],
-            int,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[int],
+            output_type=int,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             name=rf_name,
+            cloud_function_service_account="default",
         )(square_uniq)
 
         # The remote function should reflect the explicitly provided name
@@ -1030,11 +1071,12 @@ def test_internal(rf, udf):
         # explicitly. Since reuse is True by default, the previously created
         # remote function with the same name will be reused.
         square_remote2 = session.remote_function(
-            [int],
-            int,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[int],
+            output_type=int,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             name=rf_name,
+            cloud_function_service_account="default",
         )(square_uniq)
 
         # The new remote function should still reflect the explicitly provided name
@@ -1074,11 +1116,12 @@ def plusone(x):
         # created remote function with the same name should not be reused since
         # this time it is a different user code.
         plusone_remote = session.remote_function(
-            [int],
-            int,
-            dataset_id,
-            bq_cf_connection,
+            input_types=[int],
+            output_type=int,
+            dataset=dataset_id,
+            bigquery_connection=bq_cf_connection,
             name=rf_name,
+            cloud_function_service_account="default",
         )(plusone_uniq)
 
         # The new remote function should still reflect the explicitly provided name
@@ -1139,7 +1182,13 @@ def test_remote_function_via_session_context_connection_setter(
         # unique dataset_id, even though the cloud function would be reused, the bq
         # remote function would still be created, making use of the bq connection
         # set in the BigQueryOptions above.
-        @session.remote_function([int], int, dataset=dataset_id, reuse=False)
+        @session.remote_function(
+            input_types=[int],
+            output_type=int,
+            dataset=dataset_id,
+            reuse=False,
+            cloud_function_service_account="default",
+        )
         def square(x):
             return x * x
 
@@ -1174,7 +1223,13 @@ def square(x):
 def test_remote_function_default_connection(session, scalars_dfs, dataset_id):
     try:
 
-        @session.remote_function([int], int, dataset=dataset_id, reuse=False)
+        @session.remote_function(
+            input_types=[int],
+            output_type=int,
+            dataset=dataset_id,
+            reuse=False,
+            cloud_function_service_account="default",
+        )
         def square(x):
             return x * x
 
@@ -1209,7 +1264,13 @@ def square(x):
 def test_remote_function_runtime_error(session, scalars_dfs, dataset_id):
     try:
 
-        @session.remote_function([int], int, dataset=dataset_id, reuse=False)
+        @session.remote_function(
+            input_types=[int],
+            output_type=int,
+            dataset=dataset_id,
+            reuse=False,
+            cloud_function_service_account="default",
+        )
         def square(x):
             return x * x
 
@@ -1233,7 +1294,12 @@ def test_remote_function_anonymous_dataset(session, scalars_dfs):
         # function in the bigframes session's anonymous dataset. Use reuse=False
         # param to make sure parallel instances of the test don't step over each
         # other due to the common anonymous dataset.
-        @session.remote_function([int], int, reuse=False)
+        @session.remote_function(
+            input_types=[int],
+            output_type=int,
+            reuse=False,
+            cloud_function_service_account="default",
+        )
         def square(x):
             return x * x
 
@@ -1290,14 +1356,27 @@ def test_remote_function_via_session_custom_sa(scalars_dfs):
 
     try:
 
+        # TODO(shobs): Figure out why the default ingress setting
+        # (internal-only) does not work here
         @rf_session.remote_function(
-            [int], int, reuse=False, cloud_function_service_account=gcf_service_account
+            input_types=[int],
+            output_type=int,
+            reuse=False,
+            cloud_function_service_account=gcf_service_account,
+            cloud_function_ingress_settings="all",
         )
         def square_num(x):
             if x is None:
                 return x
             return x * x
 
+        # assert that the GCF is created with the intended SA
+        gcf = rf_session.cloudfunctionsclient.get_function(
+            name=square_num.bigframes_cloud_function
+        )
+        assert gcf.service_config.service_account_email == gcf_service_account
+
+        # assert that the function works as expected on data
         scalars_df, scalars_pandas_df = scalars_dfs
 
         bf_int64_col = scalars_df["int64_col"]
@@ -1309,12 +1388,6 @@ def square_num(x):
         pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
 
         assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
-
-        # Assert that the GCF is created with the intended SA
-        gcf = rf_session.cloudfunctionsclient.get_function(
-            name=square_num.bigframes_cloud_function
-        )
-        assert gcf.service_config.service_account_email == gcf_service_account
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_function_assets(
@@ -1322,38 +1395,12 @@ def square_num(x):
         )
 
 
-@pytest.mark.parametrize(
-    ("remote_function_args"),
-    [
-        pytest.param(
-            {},
-            id="no-set",
-        ),
-        pytest.param(
-            {"cloud_function_service_account": None},
-            id="set-none",
-        ),
-    ],
-)
-def test_remote_function_warns_default_cloud_function_service_account(
-    session, remote_function_args
-):
-    with pytest.warns(FutureWarning) as record:
-        session.remote_function(**remote_function_args)
-
-    len(
-        [
-            warn
-            for warn in record
-            if re.search(
-                (
-                    "You have not explicitly set a user-managed.*Using the default Compute Engine.*service account"
-                ),
-                typing.cast(FutureWarning, warn.message).args[0],
-                re.DOTALL,
-            )
-        ]
-    ) == 1
+def test_remote_function_throws_none_cloud_function_service_account(session):
+    with pytest.raises(
+        ValueError,
+        match='^You must provide a user managed cloud_function_service_account, or "default" if you would like to let the default service account be used.$',
+    ):
+        session.remote_function(cloud_function_service_account=None)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -1378,9 +1425,10 @@ def test_remote_function_with_gcf_cmek():
     try:
 
         @session.remote_function(
-            [int],
-            int,
+            input_types=[int],
+            output_type=int,
             reuse=False,
+            cloud_function_service_account="default",
             cloud_function_kms_key_name=cmek,
             cloud_function_docker_repository=docker_repository,
         )
@@ -1452,10 +1500,24 @@ def square_num(x):
                 return x
             return x * x
 
+        # TODO(shobs): See if the test vpc can be configured to make this flow
+        # work with the default ingress setting (internal-only)
         square_num_remote = rf_session.remote_function(
-            [int], int, reuse=False, cloud_function_vpc_connector=gcf_vpc_connector
+            input_types=[int],
+            output_type=int,
+            reuse=False,
+            cloud_function_service_account="default",
+            cloud_function_vpc_connector=gcf_vpc_connector,
+            cloud_function_ingress_settings="all",
         )(square_num)
 
+        # assert that the GCF is created with the intended vpc connector
+        gcf = rf_session.cloudfunctionsclient.get_function(
+            name=square_num_remote.bigframes_cloud_function
+        )
+        assert gcf.service_config.vpc_connector == gcf_vpc_connector
+
+        # assert that the function works as expected on data
         scalars_df, scalars_pandas_df = scalars_dfs
 
         bf_int64_col = scalars_df["int64_col"]
@@ -1467,12 +1529,6 @@ def square_num(x):
         pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
 
         assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
-
-        # Assert that the GCF is created with the intended vpc connector
-        gcf = rf_session.cloudfunctionsclient.get_function(
-            name=square_num_remote.bigframes_cloud_function
-        )
-        assert gcf.service_config.vpc_connector == gcf_vpc_connector
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_function_assets(
@@ -1495,7 +1551,11 @@ def square(x):
             return x * x
 
         square_remote = session.remote_function(
-            [int], int, reuse=False, max_batching_rows=max_batching_rows
+            input_types=[int],
+            output_type=int,
+            reuse=False,
+            max_batching_rows=max_batching_rows,
+            cloud_function_service_account="default",
         )(square)
 
         bq_routine = session.bqclient.get_routine(
@@ -1534,7 +1594,11 @@ def square(x):
             return x * x
 
         square_remote = session.remote_function(
-            [int], int, reuse=False, **timeout_args
+            input_types=[int],
+            output_type=int,
+            reuse=False,
+            cloud_function_service_account="default",
+            **timeout_args,
         )(square)
 
         # Assert that the GCF is created with the intended maximum timeout
@@ -1560,7 +1624,13 @@ def square(x):
 def test_remote_function_gcf_timeout_max_supported_exceeded(session):
     with pytest.raises(ValueError):
 
-        @session.remote_function([int], int, reuse=False, cloud_function_timeout=1201)
+        @session.remote_function(
+            input_types=[int],
+            output_type=int,
+            reuse=False,
+            cloud_function_service_account="default",
+            cloud_function_timeout=1201,
+        )
         def square(x):
             return x * x
 
@@ -1583,7 +1653,11 @@ def square(x):
             return x * x
 
         square_remote = session.remote_function(
-            [int], int, reuse=False, **max_instances_args
+            input_types=[int],
+            output_type=int,
+            reuse=False,
+            cloud_function_service_account="default",
+            **max_instances_args,
         )(square)
 
         # Assert that the GCF is created with the intended max instance count
@@ -1632,7 +1706,10 @@ def serialize_row(row):
             )
 
         serialize_row_remote = session.remote_function(
-            bigframes.series.Series, str, reuse=False
+            input_types=bigframes.series.Series,
+            output_type=str,
+            reuse=False,
+            cloud_function_service_account="default",
         )(serialize_row)
 
         assert getattr(serialize_row_remote, "is_row_processor")
@@ -1678,7 +1755,10 @@ def analyze(row):
             )
 
         analyze_remote = session.remote_function(
-            bigframes.series.Series, str, reuse=False
+            input_types=bigframes.series.Series,
+            output_type=str,
+            reuse=False,
+            cloud_function_service_account="default",
         )(analyze)
 
         assert getattr(analyze_remote, "is_row_processor")
@@ -1799,7 +1879,10 @@ def serialize_row(row):
             )
 
         serialize_row_remote = session.remote_function(
-            bigframes.series.Series, str, reuse=False
+            input_types=bigframes.series.Series,
+            output_type=str,
+            reuse=False,
+            cloud_function_service_account="default",
         )(serialize_row)
 
         assert getattr(serialize_row_remote, "is_row_processor")
@@ -1856,7 +1939,10 @@ def float_parser(row):
             return float(row["text"])
 
         float_parser_remote = session.remote_function(
-            bigframes.series.Series, float, reuse=False
+            input_types=bigframes.series.Series,
+            output_type=float,
+            reuse=False,
+            cloud_function_service_account="default",
         )(float_parser)
 
         assert getattr(float_parser_remote, "is_row_processor")
@@ -1901,7 +1987,9 @@ def test_remote_function_gcf_memory(
         def square(x: int) -> int:
             return x * x
 
-        square_remote = session.remote_function(reuse=False, **memory_mib_args)(square)
+        square_remote = session.remote_function(
+            reuse=False, cloud_function_service_account="default", **memory_mib_args
+        )(square)
 
         # Assert that the GCF is created with the intended memory
         gcf = session.cloudfunctionsclient.get_function(
@@ -1936,7 +2024,11 @@ def test_remote_function_gcf_memory_unsupported(session, memory_mib):
         match="Invalid value specified for container memory",
     ):
 
-        @session.remote_function(reuse=False, cloud_function_memory_mib=memory_mib)
+        @session.remote_function(
+            reuse=False,
+            cloud_function_service_account="default",
+            cloud_function_memory_mib=memory_mib,
+        )
         def square(x: int) -> int:
             return x * x
 
@@ -1947,7 +2039,7 @@ def test_remote_function_unnamed_removed_w_session_cleanup():
     session = bigframes.connect()
 
     # create an unnamed remote function in the session
-    @session.remote_function(reuse=False)
+    @session.remote_function(reuse=False, cloud_function_service_account="default")
     def foo(x: int) -> int:
         return x + 1
 
@@ -1989,7 +2081,9 @@ def test_remote_function_named_perists_w_session_cleanup():
         name = test_utils.prefixer.Prefixer("bigframes", "").create_prefix()
 
         # create an unnamed remote function in the session
-        @session.remote_function(reuse=False, name=name)
+        @session.remote_function(
+            reuse=False, name=name, cloud_function_service_account="default"
+        )
         def foo(x: int) -> int:
             return x + 1
 
@@ -2030,14 +2124,16 @@ def test_remote_function_clean_up_by_session_id():
         # without it, and later confirm that the former is deleted when the session
         # is cleaned up by session id, but the latter remains
         ## unnamed
-        @session.remote_function(reuse=False)
+        @session.remote_function(reuse=False, cloud_function_service_account="default")
         def foo_unnamed(x: int) -> int:
             return x + 1
 
         ## named
         rf_name = test_utils.prefixer.Prefixer("bigframes", "").create_prefix()
 
-        @session.remote_function(reuse=False, name=rf_name)
+        @session.remote_function(
+            reuse=False, name=rf_name, cloud_function_service_account="default"
+        )
         def foo_named(x: int) -> int:
             return x + 2
 
@@ -2104,7 +2200,12 @@ def test_df_apply_axis_1_multiple_params(session):
 
     try:
 
-        @session.remote_function([int, float, str], str, reuse=False)
+        @session.remote_function(
+            input_types=[int, float, str],
+            output_type=str,
+            reuse=False,
+            cloud_function_service_account="default",
+        )
         def foo(x, y, z):
             return f"I got {x}, {y} and {z}"
 
@@ -2179,7 +2280,12 @@ def test_df_apply_axis_1_multiple_params_array_output(session):
 
     try:
 
-        @session.remote_function([int, float, str], list[str], reuse=False)
+        @session.remote_function(
+            input_types=[int, float, str],
+            output_type=list[str],
+            reuse=False,
+            cloud_function_service_account="default",
+        )
         def foo(x, y, z):
             return [str(x), str(y), z]
 
@@ -2259,7 +2365,12 @@ def test_df_apply_axis_1_single_param_non_series(session):
 
     try:
 
-        @session.remote_function([int], str, reuse=False)
+        @session.remote_function(
+            input_types=[int],
+            output_type=str,
+            reuse=False,
+            cloud_function_service_account="default",
+        )
         def foo(x):
             return f"I got {x}"
 
@@ -2313,7 +2424,7 @@ def test_df_apply_axis_1_array_output(session, scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     try:
 
-        @session.remote_function(reuse=False)
+        @session.remote_function(reuse=False, cloud_function_service_account="default")
         def generate_stats(row: pandas.Series) -> list[int]:
             import pandas as pd
 
@@ -2356,13 +2467,13 @@ def generate_stats(row: pandas.Series) -> list[int]:
     [
         pytest.param(
             {},
-            functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL,
-            True,
+            functions_v2.ServiceConfig.IngressSettings.ALLOW_INTERNAL_ONLY,
+            False,
             id="no-set",
         ),
         pytest.param(
             {"cloud_function_ingress_settings": None},
-            functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL,
+            functions_v2.ServiceConfig.IngressSettings.ALLOW_INTERNAL_ONLY,
             True,
             id="set-none",
         ),
@@ -2402,17 +2513,16 @@ def square(x: int) -> int:
                 return x * x
 
             square_remote = session.remote_function(
-                reuse=False, **ingress_settings_args
+                reuse=False,
+                cloud_function_service_account="default",
+                **ingress_settings_args,
             )(square)
 
         default_ingress_setting_warnings = [
             warn
             for warn in record
-            if isinstance(warn.message, FutureWarning)
-            and "`cloud_function_ingress_settings` are set to 'all' by default"
-            in warn.message.args[0]
-            and "will change to 'internal-only' for enhanced security in future"
-            in warn.message.args[0]
+            if isinstance(warn.message, UserWarning)
+            and "The `cloud_function_ingress_settings` is being set to 'internal-only' by default."
         ]
         assert len(default_ingress_setting_warnings) == (
             1 if expect_default_ingress_setting_warning else 0
@@ -2443,7 +2553,11 @@ def test_remote_function_ingress_settings_unsupported(session):
         ValueError, match="'unknown' not one of the supported ingress settings values"
     ):
 
-        @session.remote_function(reuse=False, cloud_function_ingress_settings="unknown")
+        @session.remote_function(
+            reuse=False,
+            cloud_function_service_account="default",
+            cloud_function_ingress_settings="unknown",
+        )
         def square(x: int) -> int:
             return x * x
 
@@ -2475,6 +2589,7 @@ def add_one(x: int) -> int:
                 dataset=dataset_id,
                 bigquery_connection=bq_cf_connection,
                 reuse=False,
+                cloud_function_service_account="default",
             )(add_one)
 
             temporary_bigquery_remote_function = (
@@ -2552,6 +2667,7 @@ def add_one(x: int) -> int:
                 bigquery_connection=bq_cf_connection,
                 reuse=False,
                 name=name,
+                cloud_function_service_account="default",
             )(add_one)
 
             persistent_bigquery_remote_function = (
@@ -2619,6 +2735,7 @@ def test_remote_function_array_output(
             dataset=dataset_id,
             bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )
         def featurize(x: int) -> list[array_dtype]:  # type: ignore
             return [array_dtype(i) for i in [x, x + 1, x + 2]]
@@ -2657,6 +2774,7 @@ def test_remote_function_array_output_partial_ordering_mode(
             dataset=dataset_id,
             bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )
         def featurize(x: float) -> list[float]:  # type: ignore
             return [x, x + 1, x + 2]
@@ -2698,6 +2816,7 @@ def test_remote_function_array_output_multiindex(
             dataset=dataset_id,
             bigquery_connection=bq_cf_connection,
             reuse=False,
+            cloud_function_service_account="default",
         )
         def featurize(x: int) -> list[float]:
             return [x, x + 0.5, x + 0.33]
@@ -2720,3 +2839,33 @@ def featurize(x: int) -> list[float]:
         cleanup_function_assets(
             featurize, session.bqclient, session.cloudfunctionsclient
         )
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_remote_function_connection_path_format(
+    session, scalars_dfs, dataset_id, bq_cf_connection
+):
+    try:
+
+        @session.remote_function(
+            dataset=dataset_id,
+            bigquery_connection=f"projects/{session.bqclient.project}/locations/{session._location}/connections/{bq_cf_connection}",
+            reuse=False,
+            cloud_function_service_account="default",
+        )
+        def foo(x: int) -> int:
+            return x + 1
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+
+        bf_int64_col = scalars_df["int64_too"]
+        bf_result = bf_int64_col.apply(foo).to_pandas()
+
+        pd_int64_col = scalars_pandas_df["int64_too"]
+        pd_result = pd_int64_col.apply(foo)
+
+        # ignore any dtype disparity
+        pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False)
+    finally:
+        # clean up the gcp assets created for the remote function
+        cleanup_function_assets(foo, session.bqclient, session.cloudfunctionsclient)
diff --git a/tests/system/large/ml/test_decomposition.py b/tests/system/large/ml/test_decomposition.py
index 49aa985189..d1a5f9f2aa 100644
--- a/tests/system/large/ml/test_decomposition.py
+++ b/tests/system/large/ml/test_decomposition.py
@@ -163,3 +163,49 @@ def test_decomposition_configure_fit_load_none_component(
         in reloaded_model._bqml_model.model_name
     )
     assert reloaded_model.n_components == 7
+
+
+def test_decomposition_mf_configure_fit_load(
+    session, ratings_df_default_index, dataset_id
+):
+    model = decomposition.MatrixFactorization(
+        num_factors=6,
+        feedback_type="explicit",
+        user_col="user_id",
+        item_col="item_id",
+        rating_col="rating",
+        l2_reg=9.83,
+    )
+
+    model.fit(ratings_df_default_index)
+
+    reloaded_model = model.to_gbq(
+        f"{dataset_id}.temp_configured_mf_model", replace=True
+    )
+
+    new_ratings = session.read_pandas(
+        pd.DataFrame(
+            {
+                "user_id": ["11", "12", "13"],
+                "item_id": [1, 2, 3],
+                "rating": [1.0, 2.0, 3.0],
+            }
+        )
+    )
+
+    reloaded_model.score(new_ratings)
+
+    result = reloaded_model.predict(new_ratings).to_pandas()
+
+    assert reloaded_model._bqml_model is not None
+    assert (
+        f"{dataset_id}.temp_configured_mf_model"
+        in reloaded_model._bqml_model.model_name
+    )
+    assert result is not None
+    assert reloaded_model.feedback_type == "explicit"
+    assert reloaded_model.num_factors == 6
+    assert reloaded_model.user_col == "user_id"
+    assert reloaded_model.item_col == "item_id"
+    assert reloaded_model.rating_col == "rating"
+    assert reloaded_model.l2_reg == 9.83
diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py
index 96215c5e47..be98902007 100644
--- a/tests/system/large/ml/test_linear_model.py
+++ b/tests/system/large/ml/test_linear_model.py
@@ -222,8 +222,8 @@ def test_unordered_mode_linear_regression_configure_fit_score_predict(
     start_execution_count = end_execution_count
     result = model.score(X_train, y_train).to_pandas()
     end_execution_count = df._block._expr.session._metrics.execution_count
-    # The score function and to_pandas each initiate one query.
-    assert end_execution_count - start_execution_count == 2
+    # The score function and to_pandas reuse same result.
+    assert end_execution_count - start_execution_count == 1
 
     utils.check_pandas_df_schema_and_index(
         result, columns=utils.ML_REGRESSION_METRICS, index=1
diff --git a/tests/system/large/test_dataframe_io.py b/tests/system/large/test_dataframe_io.py
index 76a7001fe3..ee9daa4e31 100644
--- a/tests/system/large/test_dataframe_io.py
+++ b/tests/system/large/test_dataframe_io.py
@@ -44,7 +44,7 @@ def test_to_pandas_batches_override_global_option(
                     page_size=500, max_results=1500, allow_large_results=True
                 )
             )
-            assert len(w) == 2
+            assert len(w) == 1
             assert issubclass(w[0].category, FutureWarning)
             assert "The query result size has exceeded 10 GB." in str(w[0].message)
 
diff --git a/tests/system/large/test_location.py b/tests/system/large/test_location.py
index 7801f5dada..d4428c1f95 100644
--- a/tests/system/large/test_location.py
+++ b/tests/system/large/test_location.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import typing
-import warnings
 
 from google.cloud import bigquery
+from google.cloud.bigquery_storage import types as bqstorage_types
+import pandas
+import pandas.testing
 import pytest
 
 import bigframes
@@ -41,6 +43,7 @@ def _assert_bq_execution_location(
 
     assert typing.cast(bigquery.QueryJob, df.query_job).location == expected_location
 
+    # Ensure operation involving BQ client suceeds
     result = (
         df[["name", "number"]]
         .groupby("name")
@@ -53,6 +56,35 @@ def _assert_bq_execution_location(
         typing.cast(bigquery.QueryJob, result.query_job).location == expected_location
     )
 
+    expected_result = pandas.DataFrame(
+        {"number": [444, 222]}, index=pandas.Index(["aaa", "bbb"], name="name")
+    )
+    pandas.testing.assert_frame_equal(
+        expected_result, result.to_pandas(), check_dtype=False, check_index_type=False
+    )
+
+    # Ensure BQ Storage Read client operation succceeds
+    table = result.query_job.destination
+    requested_session = bqstorage_types.ReadSession(  # type: ignore[attr-defined]
+        table=f"projects/{table.project}/datasets/{table.dataset_id}/tables/{table.table_id}",
+        data_format=bqstorage_types.DataFormat.ARROW,  # type: ignore[attr-defined]
+    )
+    read_session = session.bqstoragereadclient.create_read_session(
+        parent=f"projects/{table.project}",
+        read_session=requested_session,
+        max_stream_count=1,
+    )
+    reader = session.bqstoragereadclient.read_rows(read_session.streams[0].name)
+    frames = []
+    for message in reader.rows().pages:
+        frames.append(message.to_dataframe())
+    read_dataframe = pandas.concat(frames)
+    # normalize before comparing since we lost some of the bigframes column
+    # naming abtractions in the direct read of the destination table
+    read_dataframe = read_dataframe.set_index("name")
+    read_dataframe.columns = result.columns
+    pandas.testing.assert_frame_equal(expected_result, read_dataframe)
+
 
 def test_bq_location_default():
     session = bigframes.Session()
@@ -119,22 +151,14 @@ def test_bq_location_non_canonical(set_location, resolved_location):
     sorted(bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS),
 )
 def test_bq_rep_endpoints(bigquery_location):
-    with warnings.catch_warnings(record=True) as record:
-        warnings.simplefilter("always")
-        session = bigframes.Session(
-            context=bigframes.BigQueryOptions(
-                location=bigquery_location, use_regional_endpoints=True
-            )
-        )
-        assert (
-            len([warn for warn in record if isinstance(warn.message, FutureWarning)])
-            == 0
+    session = bigframes.Session(
+        context=bigframes.BigQueryOptions(
+            location=bigquery_location, use_regional_endpoints=True
         )
+    )
 
-    # Verify that location and endpoints are correctly set for the BigQuery API
+    # Verify that location and endpoint is correctly set for the BigQuery API
     # client
-    # TODO(shobs): Figure out if the same can be verified for the other API
-    # clients.
     assert session.bqclient.location == bigquery_location
     assert (
         session.bqclient._connection.API_BASE_URL
@@ -143,36 +167,52 @@ def test_bq_rep_endpoints(bigquery_location):
         )
     )
 
+    # Verify that endpoint is correctly set for the BigQuery Storage API client
+    # TODO(shobs): Figure out if we can verify that location is set in the
+    # BigQuery Storage API client.
+    assert (
+        session.bqstoragereadclient.api_endpoint
+        == f"bigquerystorage.{bigquery_location}.rep.googleapis.com"
+    )
+
     # assert that bigframes session honors the location
     _assert_bq_execution_location(session)
 
 
+def test_clients_provider_no_location():
+    with pytest.raises(ValueError, match="Must set location to use regional endpoints"):
+        bigframes.session.clients.ClientsProvider(use_regional_endpoints=True)
+
+
 @pytest.mark.parametrize(
     "bigquery_location",
     # Sort the set to avoid nondeterminism.
-    sorted(bigframes.constants.LEP_ENABLED_BIGQUERY_LOCATIONS),
+    sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS),
 )
-def test_bq_lep_endpoints(bigquery_location):
-    # We are not testing BigFrames Session for LEP endpoints because it involves
-    # query execution using the endpoint, which requires the project to be
-    # allowlisted for LEP access. We could hardcode one project which is
-    # allowlisted but then not every open source developer will have access to
-    # that. Let's rely on just creating the clients for LEP.
-    with pytest.warns(FutureWarning) as record:
-        clients_provider = bigframes.session.clients.ClientsProvider(
+def test_clients_provider_use_regional_endpoints_non_rep_locations(bigquery_location):
+    with pytest.raises(
+        ValueError,
+        match=f"not .*available in the location {bigquery_location}",
+    ):
+        bigframes.session.clients.ClientsProvider(
             location=bigquery_location, use_regional_endpoints=True
         )
-        assert len(record) == 1
-        assert bigquery_location in typing.cast(Warning, record[0].message).args[0]
 
-    # Verify that location and endpoints are correctly set for the BigQuery API
-    # client
-    # TODO(shobs): Figure out if the same can be verified for the other API
-    # clients.
-    assert clients_provider.bqclient.location == bigquery_location
-    assert (
-        clients_provider.bqclient._connection.API_BASE_URL
-        == "https://{location}-bigquery.googleapis.com".format(
-            location=bigquery_location
+
+@pytest.mark.parametrize(
+    "bigquery_location",
+    # Sort the set to avoid nondeterminism.
+    sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS),
+)
+def test_session_init_fails_to_use_regional_endpoints_non_rep_endpoints(
+    bigquery_location,
+):
+    with pytest.raises(
+        ValueError,
+        match=f"not .*available in the location {bigquery_location}",
+    ):
+        bigframes.Session(
+            context=bigframes.BigQueryOptions(
+                location=bigquery_location, use_regional_endpoints=True
+            )
         )
-    )
diff --git a/tests/system/large/test_session.py b/tests/system/large/test_session.py
index e117cf0327..1dac8c851e 100644
--- a/tests/system/large/test_session.py
+++ b/tests/system/large/test_session.py
@@ -72,12 +72,12 @@ def test_close(session: bigframes.Session):
     )
     full_id_1 = bigframes.session._io.bigquery.create_temp_table(
         session.bqclient,
-        session._temp_storage_manager.allocate_temp_table(),
+        session._anon_dataset_manager.allocate_temp_table(),
         expiration,
     )
     full_id_2 = bigframes.session._io.bigquery.create_temp_table(
         session.bqclient,
-        session._temp_storage_manager.allocate_temp_table(),
+        session._anon_dataset_manager.allocate_temp_table(),
         expiration,
     )
 
@@ -112,12 +112,12 @@ def test_clean_up_by_session_id():
     )
     bigframes.session._io.bigquery.create_temp_table(
         session.bqclient,
-        session._temp_storage_manager.allocate_temp_table(),
+        session._anon_dataset_manager.allocate_temp_table(),
         expiration,
     )
     bigframes.session._io.bigquery.create_temp_table(
         session.bqclient,
-        session._temp_storage_manager.allocate_temp_table(),
+        session._anon_dataset_manager.allocate_temp_table(),
         expiration,
     )
 
@@ -157,10 +157,11 @@ def test_clean_up_via_context_manager(session_creator):
     with session_creator() as session:
         bqclient = session.bqclient
 
-        full_id_1 = session._temp_storage_manager.allocate_and_create_temp_table(
+        full_id_1 = session._anon_dataset_manager.create_temp_table(
             [bigquery.SchemaField("a", "INT64")], cluster_cols=[]
         )
-        full_id_2 = session._temp_storage_manager.allocate_and_create_temp_table(
+        assert session._session_resource_manager is not None
+        full_id_2 = session._session_resource_manager.create_temp_table(
             [bigquery.SchemaField("b", "STRING")], cluster_cols=["b"]
         )
 
diff --git a/tests/system/load/test_large_tables.py b/tests/system/load/test_large_tables.py
index 472be3d2ad..ee49c2703e 100644
--- a/tests/system/load/test_large_tables.py
+++ b/tests/system/load/test_large_tables.py
@@ -75,17 +75,19 @@ def test_index_repr_large_table():
 
 
 def test_to_pandas_batches_large_table():
-    df = bpd.read_gbq("load_testing.scalars_1tb")
+    df = bpd.read_gbq("load_testing.scalars_100gb")
     _, expected_column_count = df.shape
 
     # download only a few batches, since 1tb would be too much
-    iterable = df.to_pandas_batches(page_size=500, max_results=1500)
+    iterable = df.to_pandas_batches(
+        page_size=500, max_results=1500, allow_large_results=True
+    )
     # use page size since client library doesn't support
     # streaming only part of the dataframe via bqstorage
     for pdf in iterable:
         batch_row_count, batch_column_count = pdf.shape
         assert batch_column_count == expected_column_count
-        assert batch_row_count > 0
+        assert 0 < batch_row_count <= 500
 
 
 @pytest.mark.skip(reason="See if it caused kokoro build aborted.")
diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py
index 57fc878643..00f690ed54 100644
--- a/tests/system/small/bigquery/test_json.py
+++ b/tests/system/small/bigquery/test_json.py
@@ -36,11 +36,7 @@ def test_json_set_at_json_path(json_path, expected_json):
     actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
     expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 @pytest.mark.parametrize(
@@ -60,11 +56,7 @@ def test_json_set_at_json_value_type(json_value, expected_json):
     actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b", json_value)])
     expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 def test_json_set_w_more_pairs():
@@ -77,11 +69,7 @@ def test_json_set_w_more_pairs():
     expected_json = ['{"a": 3, "b": 2}', '{"a": 4, "b": 2}', '{"a": 5, "b": 2, "c": 1}']
     expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 def test_json_set_w_invalid_value_type():
@@ -114,11 +102,7 @@ def test_json_extract_from_json():
     actual = bbq.json_extract(s, "$.a.b")
     expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE)
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 def test_json_extract_from_string():
@@ -129,11 +113,7 @@ def test_json_extract_from_string():
     actual = bbq.json_extract(s, "$.a.b")
     expected = bpd.Series(["[1,2]", None, "0"], dtype=pd.StringDtype(storage="pyarrow"))
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 def test_json_extract_w_invalid_series_type():
@@ -165,11 +145,7 @@ def test_json_extract_array_from_json():
     expected.index.name = None
     expected.name = None
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 def test_json_extract_array_from_json_strings():
@@ -183,11 +159,7 @@ def test_json_extract_array_from_json_strings():
         dtype=pd.ArrowDtype(pa.list_(pa.string())),
     )
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 def test_json_extract_array_from_json_array_strings():
@@ -201,11 +173,7 @@ def test_json_extract_array_from_json_array_strings():
         dtype=pd.ArrowDtype(pa.list_(pa.string())),
     )
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 def test_json_extract_array_w_invalid_series_type():
@@ -219,11 +187,7 @@ def test_json_extract_string_array_from_json_strings():
     actual = bbq.json_extract_string_array(s, "$.a")
     expected = bpd.Series([["ab", "2", "3 xy"], [], ["4", "5"]])
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 def test_json_extract_string_array_from_array_strings():
@@ -231,11 +195,7 @@ def test_json_extract_string_array_from_array_strings():
     actual = bbq.json_extract_string_array(s)
     expected = bpd.Series([["1", "2", "3"], [], ["4", "5"]])
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 def test_json_extract_string_array_as_float_array_from_array_strings():
@@ -243,11 +203,7 @@ def test_json_extract_string_array_as_float_array_from_array_strings():
     actual = bbq.json_extract_string_array(s, value_dtype=dtypes.FLOAT_DTYPE)
     expected = bpd.Series([[1, 2.5, 3], [], [4, 5]])
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd.testing.assert_series_equal(
-        actual.to_pandas(allow_large_results=True),
-        expected.to_pandas(allow_large_results=True),
-    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
 
 
 def test_json_extract_string_array_w_invalid_series_type():
diff --git a/tests/system/small/bigquery/test_struct.py b/tests/system/small/bigquery/test_struct.py
index 4970964edd..58c822f642 100644
--- a/tests/system/small/bigquery/test_struct.py
+++ b/tests/system/small/bigquery/test_struct.py
@@ -53,10 +53,9 @@ def test_struct_from_dataframe(columns_arg):
     srs = series.Series(
         columns_arg,
     )
-    # Use allow_large_results=True, due to b/403028465
     pd.testing.assert_series_equal(
-        srs.to_pandas(allow_large_results=True),
-        bbq.struct(srs.struct.explode()).to_pandas(allow_large_results=True),
+        srs.to_pandas(),
+        bbq.struct(srs.struct.explode()).to_pandas(),
         check_index_type=False,
         check_dtype=False,
     )
diff --git a/tests/system/small/blob/test_io.py b/tests/system/small/blob/test_io.py
index c30f7674af..c496e5d631 100644
--- a/tests/system/small/blob/test_io.py
+++ b/tests/system/small/blob/test_io.py
@@ -19,11 +19,11 @@
 
 
 def test_blob_create_from_uri_str(
-    bq_connection: str, session: bigframes.Session, images_uris
+    bq_connection: str, test_session: bigframes.Session, images_uris
 ):
     bigframes.options.experiments.blob = True
 
-    uri_series = bpd.Series(images_uris, session=session)
+    uri_series = bpd.Series(images_uris, session=test_session)
     blob_series = uri_series.str.to_blob(connection=bq_connection)
 
     pd_blob_df = blob_series.struct.explode().to_pandas()
@@ -42,14 +42,21 @@ def test_blob_create_from_uri_str(
 
 
 def test_blob_create_from_glob_path(
-    bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
+    bq_connection: str, test_session: bigframes.Session, images_gcs_path, images_uris
 ):
     bigframes.options.experiments.blob = True
 
-    blob_df = session.from_glob_path(
+    blob_df = test_session.from_glob_path(
         images_gcs_path, connection=bq_connection, name="blob_col"
     )
-    pd_blob_df = blob_df["blob_col"].struct.explode().to_pandas()
+    pd_blob_df = (
+        blob_df["blob_col"]
+        .struct.explode()
+        .to_pandas()
+        .sort_values("uri")
+        .reset_index(drop=True)
+    )
+
     expected_df = pd.DataFrame(
         {
             "uri": images_uris,
@@ -65,14 +72,20 @@ def test_blob_create_from_glob_path(
 
 
 def test_blob_create_read_gbq_object_table(
-    bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
+    bq_connection: str, test_session: bigframes.Session, images_gcs_path, images_uris
 ):
     bigframes.options.experiments.blob = True
 
-    obj_table = session._create_object_table(images_gcs_path, bq_connection)
+    obj_table = test_session._create_object_table(images_gcs_path, bq_connection)
 
-    blob_df = session.read_gbq_object_table(obj_table, name="blob_col")
-    pd_blob_df = blob_df["blob_col"].struct.explode().to_pandas()
+    blob_df = test_session.read_gbq_object_table(obj_table, name="blob_col")
+    pd_blob_df = (
+        blob_df["blob_col"]
+        .struct.explode()
+        .to_pandas()
+        .sort_values("uri")
+        .reset_index(drop=True)
+    )
     expected_df = pd.DataFrame(
         {
             "uri": images_uris,
diff --git a/tests/system/small/blob/test_properties.py b/tests/system/small/blob/test_properties.py
index c7704ec86d..767dbe37b7 100644
--- a/tests/system/small/blob/test_properties.py
+++ b/tests/system/small/blob/test_properties.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import db_dtypes  # type: ignore
 import pandas as pd
 
 import bigframes
+import bigframes.dtypes as dtypes
 import bigframes.pandas as bpd
 
 
@@ -55,33 +55,28 @@ def test_blob_version(images_mm_df: bpd.DataFrame):
 
 
 def test_blob_metadata(images_mm_df: bpd.DataFrame):
-    # allow_large_result=False incompatible with json b/401630655
-    with bigframes.option_context(
-        "bigquery.allow_large_results", True, "experiments.blob", True
-    ):
+    with bigframes.option_context("experiments.blob", True):
         actual = images_mm_df["blob_col"].blob.metadata().to_pandas()
         expected = pd.Series(
             [
-                {
-                    "content_type": "image/jpeg",
-                    "md5_hash": "e130ad042261a1883cd2cc06831cf748",
-                    "size": 338390,
-                    "updated": 1739574332000000,
-                },
-                {
-                    "content_type": "image/jpeg",
-                    "md5_hash": "e2ae3191ff2b809fd0935f01a537c650",
-                    "size": 43333,
-                    "updated": 1739574332000000,
-                },
+                (
+                    '{"content_type":"image/jpeg",'
+                    '"md5_hash":"e130ad042261a1883cd2cc06831cf748",'
+                    '"size":338390,'
+                    '"updated":1739574332000000}'
+                ),
+                (
+                    '{"content_type":"image/jpeg",'
+                    '"md5_hash":"e2ae3191ff2b809fd0935f01a537c650",'
+                    '"size":43333,'
+                    '"updated":1739574332000000}'
+                ),
             ],
             name="metadata",
-            dtype=db_dtypes.JSONDtype(),
-        )
-
-        pd.testing.assert_series_equal(
-            actual, expected, check_dtype=False, check_index_type=False
+            dtype=dtypes.JSON_DTYPE,
         )
+        expected.index = expected.index.astype(dtypes.INT_DTYPE)
+        pd.testing.assert_series_equal(actual, expected)
 
 
 def test_blob_content_type(images_mm_df: bpd.DataFrame):
diff --git a/tests/system/small/functions/test_remote_function.py b/tests/system/small/functions/test_remote_function.py
index 0af7f4e42e..51e0459014 100644
--- a/tests/system/small/functions/test_remote_function.py
+++ b/tests/system/small/functions/test_remote_function.py
@@ -25,6 +25,7 @@
 import test_utils.prefixer
 
 import bigframes
+import bigframes.clients
 import bigframes.dtypes
 import bigframes.exceptions
 from bigframes.functions import _utils as bff_utils
@@ -93,6 +94,11 @@ def session_with_bq_connection(bq_cf_connection) -> bigframes.Session:
     return session
 
 
+def get_bq_connection_id_path_format(connection_id_dot_format):
+    fields = connection_id_dot_format.split(".")
+    return f"projects/{fields[0]}/locations/{fields[1]}/connections/{fields[2]}"
+
+
 @pytest.mark.flaky(retries=2, delay=120)
 def test_remote_function_direct_no_session_param(
     bigquery_client,
@@ -107,8 +113,8 @@ def square(x):
         return x * x
 
     square = bff.remote_function(
-        int,
-        int,
+        input_types=int,
+        output_type=int,
         bigquery_client=bigquery_client,
         bigquery_connection_client=bigqueryconnection_client,
         cloud_functions_client=cloudfunctions_client,
@@ -118,6 +124,7 @@ def square(x):
         # See e2e tests for tests that actually deploy the Cloud Function.
         reuse=True,
         name=get_function_name(square),
+        cloud_function_service_account="default",
     )(square)
 
     # Function should still work normally.
@@ -154,11 +161,8 @@ def square(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_remote_function_direct_no_session_param_location_specified(
-    bigquery_client,
-    bigqueryconnection_client,
-    cloudfunctions_client,
-    resourcemanager_client,
+def test_remote_function_connection_w_location(
+    session,
     scalars_dfs,
     dataset_id_permanent,
     bq_cf_connection_location,
@@ -167,17 +171,15 @@ def square(x):
         return x * x
 
     square = bff.remote_function(
-        int,
-        int,
-        bigquery_client=bigquery_client,
-        bigquery_connection_client=bigqueryconnection_client,
-        cloud_functions_client=cloudfunctions_client,
-        resource_manager_client=resourcemanager_client,
+        input_types=int,
+        output_type=int,
+        session=session,
         dataset=dataset_id_permanent,
         bigquery_connection=bq_cf_connection_location,
         # See e2e tests for tests that actually deploy the Cloud Function.
         reuse=True,
         name=get_function_name(square),
+        cloud_function_service_account="default",
     )(square)
 
     # Function should still work normally.
@@ -208,11 +210,8 @@ def square(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_remote_function_direct_no_session_param_location_mismatched(
-    bigquery_client,
-    bigqueryconnection_client,
-    cloudfunctions_client,
-    resourcemanager_client,
+def test_remote_function_connection_w_location_mismatched(
+    session,
     dataset_id_permanent,
     bq_cf_connection_location_mismatched,
 ):
@@ -221,31 +220,41 @@ def square(x):
         # connection doesn't match the location of the dataset.
         return x * x  # pragma: NO COVER
 
-    with pytest.raises(
-        ValueError,
-        match=re.escape("The location does not match BigQuery connection location:"),
-    ):
-        bff.remote_function(
-            int,
-            int,
-            bigquery_client=bigquery_client,
-            bigquery_connection_client=bigqueryconnection_client,
-            cloud_functions_client=cloudfunctions_client,
-            resource_manager_client=resourcemanager_client,
-            dataset=dataset_id_permanent,
-            bigquery_connection=bq_cf_connection_location_mismatched,
-            # See e2e tests for tests that actually deploy the Cloud Function.
-            reuse=True,
-            name=get_function_name(square),
-        )(square)
+    bq_cf_connection_location_mismatched_path_fmt = get_bq_connection_id_path_format(
+        bigframes.clients.get_canonical_bq_connection_id(
+            bq_cf_connection_location_mismatched,
+            session.bqclient.project,
+            session._location,
+        )
+    )
+    connection_ids = [
+        bq_cf_connection_location_mismatched,
+        bq_cf_connection_location_mismatched_path_fmt,
+    ]
+
+    for connection_id in connection_ids:
+        with pytest.raises(
+            ValueError,
+            match=re.escape(
+                "The location does not match BigQuery connection location:"
+            ),
+        ):
+            bff.remote_function(
+                input_types=int,
+                output_type=int,
+                session=session,
+                dataset=dataset_id_permanent,
+                bigquery_connection=connection_id,
+                # See e2e tests for tests that actually deploy the Cloud Function.
+                reuse=True,
+                name=get_function_name(square),
+                cloud_function_service_account="default",
+            )(square)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_remote_function_direct_no_session_param_location_project_specified(
-    bigquery_client,
-    bigqueryconnection_client,
-    cloudfunctions_client,
-    resourcemanager_client,
+def test_remote_function_connection_w_location_project(
+    session,
     scalars_dfs,
     dataset_id_permanent,
     bq_cf_connection_location_project,
@@ -254,17 +263,15 @@ def square(x):
         return x * x
 
     square = bff.remote_function(
-        int,
-        int,
-        bigquery_client=bigquery_client,
-        bigquery_connection_client=bigqueryconnection_client,
-        cloud_functions_client=cloudfunctions_client,
-        resource_manager_client=resourcemanager_client,
+        input_types=int,
+        output_type=int,
+        session=session,
         dataset=dataset_id_permanent,
         bigquery_connection=bq_cf_connection_location_project,
         # See e2e tests for tests that actually deploy the Cloud Function.
         reuse=True,
         name=get_function_name(square),
+        cloud_function_service_account="default",
     )(square)
 
     # Function should still work normally.
@@ -295,11 +302,8 @@ def square(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_remote_function_direct_no_session_param_project_mismatched(
-    bigquery_client,
-    bigqueryconnection_client,
-    cloudfunctions_client,
-    resourcemanager_client,
+def test_remote_function_connection_w_project_mismatched(
+    session,
     dataset_id_permanent,
     bq_cf_connection_location_project_mismatched,
 ):
@@ -308,25 +312,38 @@ def square(x):
         # connection doesn't match the project of the dataset.
         return x * x  # pragma: NO COVER
 
-    with pytest.raises(
-        ValueError,
-        match=re.escape(
-            "The project_id does not match BigQuery connection gcp_project_id:"
-        ),
-    ):
-        bff.remote_function(
-            int,
-            int,
-            bigquery_client=bigquery_client,
-            bigquery_connection_client=bigqueryconnection_client,
-            cloud_functions_client=cloudfunctions_client,
-            resource_manager_client=resourcemanager_client,
-            dataset=dataset_id_permanent,
-            bigquery_connection=bq_cf_connection_location_project_mismatched,
-            # See e2e tests for tests that actually deploy the Cloud Function.
-            reuse=True,
-            name=get_function_name(square),
-        )(square)
+    bq_cf_connection_location_project_mismatched_path_fmt = (
+        get_bq_connection_id_path_format(
+            bigframes.clients.get_canonical_bq_connection_id(
+                bq_cf_connection_location_project_mismatched,
+                session.bqclient.project,
+                session._location,
+            )
+        )
+    )
+    connection_ids = [
+        bq_cf_connection_location_project_mismatched,
+        bq_cf_connection_location_project_mismatched_path_fmt,
+    ]
+
+    for connection_id in connection_ids:
+        with pytest.raises(
+            ValueError,
+            match=re.escape(
+                "The project_id does not match BigQuery connection gcp_project_id:"
+            ),
+        ):
+            bff.remote_function(
+                input_types=int,
+                output_type=int,
+                session=session,
+                dataset=dataset_id_permanent,
+                bigquery_connection=connection_id,
+                # See e2e tests for tests that actually deploy the Cloud Function.
+                reuse=True,
+                name=get_function_name(square),
+                cloud_function_service_account="default",
+            )(square)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -337,11 +354,12 @@ def square(x):
         return x * x
 
     square = bff.remote_function(
-        int,
-        int,
+        input_types=int,
+        output_type=int,
         session=session_with_bq_connection,
         dataset=dataset_id_permanent,
         name=get_function_name(square),
+        cloud_function_service_account="default",
     )(square)
 
     # Function should still work normally.
@@ -386,7 +404,11 @@ def square(x):
     # udf is same as the one used in other tests in this file so the underlying
     # cloud function would be common and quickly reused.
     square = session_with_bq_connection.remote_function(
-        int, int, dataset_id_permanent, name=get_function_name(square)
+        input_types=int,
+        output_type=int,
+        dataset=dataset_id_permanent,
+        name=get_function_name(square),
+        cloud_function_service_account="default",
     )(square)
 
     # Function should still work normally.
@@ -424,13 +446,14 @@ def square(x):
         return x * x
 
     square = session.remote_function(
-        int,
-        int,
-        dataset_id_permanent,
-        bq_cf_connection,
+        input_types=int,
+        output_type=int,
+        dataset=dataset_id_permanent,
+        bigquery_connection=bq_cf_connection,
         # See e2e tests for tests that actually deploy the Cloud Function.
         reuse=True,
         name=get_function_name(square),
+        cloud_function_service_account="default",
     )(square)
 
     # Function should still work normally.
@@ -468,7 +491,11 @@ def add_one(x):
         return x + 1
 
     remote_add_one = session_with_bq_connection.remote_function(
-        [int], int, dataset_id_permanent, name=get_function_name(add_one)
+        input_types=[int],
+        output_type=int,
+        dataset=dataset_id_permanent,
+        name=get_function_name(add_one),
+        cloud_function_service_account="default",
     )(add_one)
 
     scalars_df, scalars_pandas_df = scalars_dfs
@@ -499,7 +526,11 @@ def add_one(x):
         return x + 1
 
     remote_add_one = session_with_bq_connection.remote_function(
-        [int], int, dataset_id_permanent, name=get_function_name(add_one)
+        input_types=[int],
+        output_type=int,
+        dataset=dataset_id_permanent,
+        name=get_function_name(add_one),
+        cloud_function_service_account="default",
     )(add_one)
 
     scalars_df, scalars_pandas_df = scalars_dfs
@@ -530,7 +561,11 @@ def add_one(x):
         return x + 1
 
     remote_add_one = session_with_bq_connection.remote_function(
-        [int], int, dataset_id_permanent, name=get_function_name(add_one)
+        input_types=[int],
+        output_type=int,
+        dataset=dataset_id_permanent,
+        name=get_function_name(add_one),
+        cloud_function_service_account="default",
     )(add_one)
 
     scalars_df, scalars_pandas_df = scalars_dfs
@@ -576,6 +611,7 @@ def bytes_to_hex(mybytes: bytes) -> bytes:
         dataset=dataset_id_permanent,
         name=get_function_name(bytes_to_hex, package_requirements=packages),
         packages=packages,
+        cloud_function_service_account="default",
     )(bytes_to_hex)
     bf_result = scalars_df.bytes_col.map(remote_bytes_to_hex).to_pandas()
 
@@ -618,10 +654,11 @@ def add_one(x):
             return x + 1  # pragma: NO COVER
 
         session.remote_function(
-            [int],
-            int,
+            input_types=[int],
+            output_type=int,
             dataset=dataset_id_permanent,
             name=get_function_name(add_one),
+            cloud_function_service_account="default",
         )(add_one)
 
 
@@ -651,8 +688,8 @@ def square1(x):
         return x * x
 
     square1 = bff.remote_function(
-        [int],
-        int,
+        input_types=[int],
+        output_type=int,
         bigquery_client=bigquery_client,
         bigquery_connection_client=bigqueryconnection_client,
         dataset=dataset_id_permanent,
@@ -661,6 +698,7 @@ def square1(x):
         bigquery_connection=bq_cf_connection,
         reuse=True,
         name=get_function_name(square1),
+        cloud_function_service_account="default",
     )(square1)
 
     # Function should still work normally.
@@ -1135,10 +1173,11 @@ def add_ints(row):
         match="input_types=Series is in preview.",
     ):
         add_ints_remote = session.remote_function(
-            bigframes.series.Series,
-            int,
-            dataset_id_permanent,
+            input_types=bigframes.series.Series,
+            output_type=int,
+            dataset=dataset_id_permanent,
             name=get_function_name(add_ints, is_row_processor=True),
+            cloud_function_service_account="default",
         )(add_ints)
         assert add_ints_remote.bigframes_remote_function  # type: ignore
         assert add_ints_remote.bigframes_bigquery_function  # type: ignore
@@ -1187,10 +1226,11 @@ def add_ints(row):
         return row["int64_col"] + row["int64_too"]
 
     add_ints_remote = session.remote_function(
-        bigframes.series.Series,
-        int,
-        dataset_id_permanent,
+        input_types=bigframes.series.Series,
+        output_type=int,
+        dataset=dataset_id_permanent,
         name=get_function_name(add_ints, is_row_processor=True),
+        cloud_function_service_account="default",
     )(add_ints)
 
     bf_result = (
@@ -1226,10 +1266,11 @@ def add_numbers(row):
         return row["x"] + row["y"]
 
     add_numbers_remote = session.remote_function(
-        bigframes.series.Series,
-        float,
-        dataset_id_permanent,
+        input_types=bigframes.series.Series,
+        output_type=float,
+        dataset=dataset_id_permanent,
         name=get_function_name(add_numbers, is_row_processor=True),
+        cloud_function_service_account="default",
     )(add_numbers)
 
     bf_result = bf_df.apply(add_numbers_remote, axis=1).to_pandas()
@@ -1279,10 +1320,11 @@ def echo_len(row):
         return len(row)
 
     echo_len_remote = session.remote_function(
-        bigframes.series.Series,
-        float,
-        dataset_id_permanent,
+        input_types=bigframes.series.Series,
+        output_type=float,
+        dataset=dataset_id_permanent,
         name=get_function_name(echo_len, is_row_processor=True),
+        cloud_function_service_account="default",
     )(echo_len)
 
     for column in columns_with_not_supported_dtypes:
@@ -1315,7 +1357,9 @@ def should_mask(name: str) -> bool:
     assert "name" in inspect.signature(should_mask).parameters
 
     should_mask = session.remote_function(
-        dataset=dataset_id_permanent, name=get_function_name(should_mask)
+        dataset=dataset_id_permanent,
+        name=get_function_name(should_mask),
+        cloud_function_service_account="default",
     )(should_mask)
 
     s = bigframes.series.Series(["Alice", "Bob", "Caroline"])
@@ -1374,7 +1418,9 @@ def is_odd(x: int) -> bool:
 
     # create a remote function
     is_odd_remote = session.remote_function(
-        dataset=dataset_id_permanent, name=get_function_name(is_odd)
+        dataset=dataset_id_permanent,
+        name=get_function_name(is_odd),
+        cloud_function_service_account="default",
     )(is_odd)
 
     # with nulls in the series the remote function application would fail
@@ -1424,7 +1470,9 @@ def add(x: int, y: int) -> int:
 
     # create a remote function
     add_remote = session.remote_function(
-        dataset=dataset_id_permanent, name=get_function_name(add)
+        dataset=dataset_id_permanent,
+        name=get_function_name(add),
+        cloud_function_service_account="default",
     )(add)
 
     # with nulls in the series the remote function application would fail
@@ -1477,7 +1525,9 @@ def add(x: int, y: int, z: float) -> float:
 
     # create a remote function
     add_remote = session.remote_function(
-        dataset=dataset_id_permanent, name=get_function_name(add)
+        dataset=dataset_id_permanent,
+        name=get_function_name(add),
+        cloud_function_service_account="default",
     )(add)
 
     # pandas does not support nary functions, so let's create a proxy function
@@ -1533,6 +1583,7 @@ def is_long_duration(minutes: int) -> bool:
     is_long_duration = unordered_session.remote_function(
         dataset=dataset_id_permanent,
         name=get_function_name(is_long_duration),
+        cloud_function_service_account="default",
     )(is_long_duration)
 
     method = getattr(df["duration_minutes"], method)
@@ -1551,7 +1602,9 @@ def combiner(x: int, y: int) -> int:
         return x
 
     combiner = unordered_session.remote_function(
-        dataset=dataset_id_permanent, name=get_function_name(combiner)
+        dataset=dataset_id_permanent,
+        name=get_function_name(combiner),
+        cloud_function_service_account="default",
     )(combiner)
 
     df = scalars_df_index[["int64_col", "int64_too", "float64_col", "string_col"]]
@@ -1567,7 +1620,9 @@ def processor(x: int, y: int, z: float, w: str) -> str:
         return f"I got x={x}, y={y}, z={z} and w={w}"
 
     processor = unordered_session.remote_function(
-        dataset=dataset_id_permanent, name=get_function_name(processor)
+        dataset=dataset_id_permanent,
+        name=get_function_name(processor),
+        cloud_function_service_account="default",
     )(processor)
 
     df = scalars_df_index[["int64_col", "int64_too", "float64_col", "string_col"]]
diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py
index b075817b07..18f3ff2675 100644
--- a/tests/system/small/geopandas/test_geoseries.py
+++ b/tests/system/small/geopandas/test_geoseries.py
@@ -17,6 +17,7 @@
 import bigframes_vendored.constants as constants
 import geopandas  # type: ignore
 from geopandas.array import GeometryDtype  # type:ignore
+import geopandas.testing  # type:ignore
 import google.api_core.exceptions
 import pandas as pd
 import pytest
@@ -188,16 +189,17 @@ def test_geo_boundary():
             LineString([(0, 0), (1, 1), (0, 1)]),
             Point(0, 1),
         ],
+        index=pd.Index([0, 1, 2, 3, 4], dtype="Int64"),
     )
 
     bf_result = bf_s.geo.boundary.to_pandas()
     pd_result = pd_s.boundary
 
-    pd.testing.assert_series_equal(
+    geopandas.testing.assert_geoseries_equal(
         bf_result,
         pd_result,
         check_series_type=False,
-        check_index=False,
+        check_index_type=False,
     )
 
 
diff --git a/tests/system/small/ml/conftest.py b/tests/system/small/ml/conftest.py
index 1843da41d7..d56874719e 100644
--- a/tests/system/small/ml/conftest.py
+++ b/tests/system/small/ml/conftest.py
@@ -202,64 +202,6 @@ def xgboost_iris_df(session, xgboost_iris_pandas_df):
     return session.read_pandas(xgboost_iris_pandas_df)
 
 
-@pytest.fixture(scope="session")
-def bqml_palm2_text_generator_model(session, bq_connection) -> core.BqmlModel:
-    options = {
-        "remote_service_type": "CLOUD_AI_LARGE_LANGUAGE_MODEL_V1",
-    }
-    return globals.bqml_model_factory().create_remote_model(
-        session=session, connection_name=bq_connection, options=options
-    )
-
-
-@pytest.fixture(scope="session")
-def palm2_text_generator_model(session, bq_connection) -> llm.PaLM2TextGenerator:
-    return llm.PaLM2TextGenerator(session=session, connection_name=bq_connection)
-
-
-@pytest.fixture(scope="session")
-def palm2_text_generator_32k_model(session, bq_connection) -> llm.PaLM2TextGenerator:
-    return llm.PaLM2TextGenerator(
-        model_name="text-bison-32k", session=session, connection_name=bq_connection
-    )
-
-
-@pytest.fixture(scope="function")
-def ephemera_palm2_text_generator_model(
-    session, bq_connection
-) -> llm.PaLM2TextGenerator:
-    return llm.PaLM2TextGenerator(session=session, connection_name=bq_connection)
-
-
-@pytest.fixture(scope="session")
-def palm2_embedding_generator_model(
-    session, bq_connection
-) -> llm.PaLM2TextEmbeddingGenerator:
-    return llm.PaLM2TextEmbeddingGenerator(
-        session=session, connection_name=bq_connection
-    )
-
-
-@pytest.fixture(scope="session")
-def palm2_embedding_generator_model_002(
-    session, bq_connection
-) -> llm.PaLM2TextEmbeddingGenerator:
-    return llm.PaLM2TextEmbeddingGenerator(
-        version="002", session=session, connection_name=bq_connection
-    )
-
-
-@pytest.fixture(scope="session")
-def palm2_embedding_generator_multilingual_model(
-    session, bq_connection
-) -> llm.PaLM2TextEmbeddingGenerator:
-    return llm.PaLM2TextEmbeddingGenerator(
-        model_name="textembedding-gecko-multilingual",
-        session=session,
-        connection_name=bq_connection,
-    )
-
-
 @pytest.fixture(scope="session")
 def linear_remote_model_params() -> dict:
     # Pre-deployed endpoint of linear reg model in Vertex.
diff --git a/tests/system/small/ml/test_core.py b/tests/system/small/ml/test_core.py
index 1827858353..3c5ba9bb18 100644
--- a/tests/system/small/ml/test_core.py
+++ b/tests/system/small/ml/test_core.py
@@ -390,27 +390,6 @@ def test_remote_model_predict(
     )
 
 
-@pytest.mark.flaky(retries=2)
-def test_model_generate_text(
-    bqml_palm2_text_generator_model: core.BqmlModel, llm_text_df
-):
-    options = {
-        "temperature": 0.5,
-        "max_output_tokens": 100,
-        "top_k": 20,
-        "top_p": 0.5,
-        "flatten_json_output": True,
-    }
-    # Until b/401630655 is resolved, json not compatible with allow_large_results=False
-    df = bqml_palm2_text_generator_model.generate_text(
-        llm_text_df, options=options
-    ).to_pandas(allow_large_results=True)
-
-    utils.check_pandas_df_schema_and_index(
-        df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
-    )
-
-
 @pytest.mark.parametrize("id_col_name", [None, "id"])
 def test_model_forecast(
     time_series_bqml_arima_plus_model: core.BqmlModel,
diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index 7e7a532f79..544889bf5a 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -24,187 +24,6 @@
 from tests.system import utils
 
 
-# Until b/401630655 is resolved, ML apis return json, not compatible with allow_large_results=False
-@pytest.fixture(scope="module", autouse=True)
-def always_create_table():
-    with bigframes.option_context("bigquery.allow_large_results", True):
-        yield
-
-
-def test_create_load_text_generator_model(
-    palm2_text_generator_model, dataset_id, bq_connection
-):
-    # Model creation doesn't return error
-    assert palm2_text_generator_model is not None
-    assert palm2_text_generator_model._bqml_model is not None
-
-    # save, load to ensure configuration was kept
-    reloaded_model = palm2_text_generator_model.to_gbq(
-        f"{dataset_id}.temp_text_model", replace=True
-    )
-    assert f"{dataset_id}.temp_text_model" == reloaded_model._bqml_model.model_name
-    assert reloaded_model.model_name == "text-bison"
-    assert reloaded_model.connection_name == bq_connection
-
-
-def test_create_load_text_generator_32k_model(
-    palm2_text_generator_32k_model, dataset_id, bq_connection
-):
-    # Model creation doesn't return error
-    assert palm2_text_generator_32k_model is not None
-    assert palm2_text_generator_32k_model._bqml_model is not None
-
-    # save, load to ensure configuration was kept
-    reloaded_model = palm2_text_generator_32k_model.to_gbq(
-        f"{dataset_id}.temp_text_model", replace=True
-    )
-    assert f"{dataset_id}.temp_text_model" == reloaded_model._bqml_model.model_name
-    assert reloaded_model.model_name == "text-bison-32k"
-    assert reloaded_model.connection_name == bq_connection
-
-
-@pytest.mark.flaky(retries=2)
-def test_create_text_generator_model_default_session(
-    bq_connection, llm_text_pandas_df, bigquery_client
-):
-    import bigframes.pandas as bpd
-
-    # Note: This starts a thread-local session.
-    with bpd.option_context(
-        "bigquery.bq_connection",
-        bq_connection,
-        "bigquery.location",
-        "US",
-    ):
-        model = llm.PaLM2TextGenerator()
-        assert model is not None
-        assert model._bqml_model is not None
-        assert (
-            model.connection_name.casefold()
-            == f"{bigquery_client.project}.us.bigframes-rf-conn"
-        )
-
-        llm_text_df = bpd.read_pandas(llm_text_pandas_df)
-
-        df = model.predict(llm_text_df).to_pandas()
-        utils.check_pandas_df_schema_and_index(
-            df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
-        )
-
-
-@pytest.mark.flaky(retries=2)
-def test_create_text_generator_32k_model_default_session(
-    bq_connection, llm_text_pandas_df, bigquery_client
-):
-    import bigframes.pandas as bpd
-
-    # Note: This starts a thread-local session.
-    with bpd.option_context(
-        "bigquery.bq_connection",
-        bq_connection,
-        "bigquery.location",
-        "US",
-    ):
-        model = llm.PaLM2TextGenerator(model_name="text-bison-32k")
-        assert model is not None
-        assert model._bqml_model is not None
-        assert (
-            model.connection_name.casefold()
-            == f"{bigquery_client.project}.us.bigframes-rf-conn"
-        )
-
-        llm_text_df = bpd.read_pandas(llm_text_pandas_df)
-
-        df = model.predict(llm_text_df).to_pandas()
-        utils.check_pandas_df_schema_and_index(
-            df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
-        )
-
-
-@pytest.mark.flaky(retries=2)
-def test_create_text_generator_model_default_connection(
-    llm_text_pandas_df, bigquery_client
-):
-    from bigframes import _config
-    import bigframes.pandas as bpd
-
-    bpd.close_session()
-    _config.options = _config.Options()  # reset configs
-
-    llm_text_df = bpd.read_pandas(llm_text_pandas_df)
-
-    model = llm.PaLM2TextGenerator()
-    assert model is not None
-    assert model._bqml_model is not None
-    assert (
-        model.connection_name.casefold()
-        == f"{bigquery_client.project}.us.bigframes-default-connection"
-    )
-
-    df = model.predict(llm_text_df).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
-    )
-
-
-# Marked as flaky only because BQML LLM is in preview, the service only has limited capacity, not stable enough.
-@pytest.mark.flaky(retries=2)
-def test_text_generator_predict_default_params_success(
-    palm2_text_generator_model, llm_text_df
-):
-    df = palm2_text_generator_model.predict(llm_text_df).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
-    )
-
-
-@pytest.mark.flaky(retries=2)
-def test_text_generator_predict_series_default_params_success(
-    palm2_text_generator_model, llm_text_df
-):
-    df = palm2_text_generator_model.predict(llm_text_df["prompt"]).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
-    )
-
-
-@pytest.mark.flaky(retries=2)
-def test_text_generator_predict_arbitrary_col_label_success(
-    palm2_text_generator_model, llm_text_df
-):
-    llm_text_df = llm_text_df.rename(columns={"prompt": "arbitrary"})
-    df = palm2_text_generator_model.predict(llm_text_df).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
-    )
-
-
-@pytest.mark.flaky(retries=2)
-def test_text_generator_predict_multiple_cols_success(
-    palm2_text_generator_model, llm_text_df: bpd.DataFrame
-):
-    df = llm_text_df.assign(additional_col=1)
-    pd_df = palm2_text_generator_model.predict(df).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        pd_df,
-        columns=utils.ML_GENERATE_TEXT_OUTPUT + ["additional_col"],
-        index=3,
-        col_exact=False,
-    )
-
-
-@pytest.mark.flaky(retries=2)
-def test_text_generator_predict_with_params_success(
-    palm2_text_generator_model, llm_text_df
-):
-    df = palm2_text_generator_model.predict(
-        llm_text_df, temperature=0.5, max_output_tokens=100, top_k=20, top_p=0.5
-    ).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
-    )
-
-
 @pytest.mark.parametrize(
     "model_name",
     ("text-embedding-005", "text-embedding-004", "text-multilingual-embedding-002"),
@@ -287,25 +106,6 @@ def test_create_load_multimodal_embedding_generator_model(
     assert reloaded_model.connection_name == bq_connection
 
 
-@pytest.mark.flaky(retries=2)
-def test_multimodal_embedding_generator_predict_default_params_success(
-    images_mm_df, session, bq_connection
-):
-    bigframes.options.experiments.blob = True
-
-    text_embedding_model = llm.MultimodalEmbeddingGenerator(
-        connection_name=bq_connection, session=session
-    )
-    df = text_embedding_model.predict(images_mm_df).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        df,
-        columns=utils.ML_MULTIMODAL_GENERATE_EMBEDDING_OUTPUT,
-        index=2,
-        col_exact=False,
-    )
-    assert len(df["ml_generate_embedding_result"][0]) == 1408
-
-
 @pytest.mark.parametrize(
     "model_name",
     (
@@ -316,6 +116,8 @@ def test_multimodal_embedding_generator_predict_default_params_success(
         "gemini-1.5-flash-001",
         "gemini-1.5-flash-002",
         "gemini-2.0-flash-exp",
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
     ),
 )
 @pytest.mark.flaky(
@@ -420,36 +222,6 @@ def test_gemini_text_generator_multi_cols_predict_success(
     )
 
 
-@pytest.mark.parametrize(
-    "model_name",
-    (
-        "gemini-1.5-pro-001",
-        "gemini-1.5-pro-002",
-        "gemini-1.5-flash-001",
-        "gemini-1.5-flash-002",
-        "gemini-2.0-flash-exp",
-    ),
-)
-@pytest.mark.flaky(retries=2)
-def test_gemini_text_generator_multimodal_input(
-    images_mm_df: bpd.DataFrame, model_name, session, bq_connection
-):
-    bigframes.options.experiments.blob = True
-
-    gemini_text_generator_model = llm.GeminiTextGenerator(
-        model_name=model_name, connection_name=bq_connection, session=session
-    )
-    pd_df = gemini_text_generator_model.predict(
-        images_mm_df, prompt=["Describe", images_mm_df["blob_col"]]
-    ).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        pd_df,
-        columns=utils.ML_GENERATE_TEXT_OUTPUT + ["blob_col"],
-        index=2,
-        col_exact=False,
-    )
-
-
 # Overrides __eq__ function for comparing as mock.call parameter
 class EqCmpAllDataFrame(bpd.DataFrame):
     def __eq__(self, other):
@@ -922,50 +694,6 @@ def test_text_embedding_generator_retry_no_progress(session, bq_connection):
     )
 
 
-@pytest.mark.flaky(retries=2)
-def test_llm_palm_score(llm_fine_tune_df_default_index):
-    model = llm.PaLM2TextGenerator(model_name="text-bison")
-
-    # Check score to ensure the model was fitted
-    score_result = model.score(
-        X=llm_fine_tune_df_default_index[["prompt"]],
-        y=llm_fine_tune_df_default_index[["label"]],
-    ).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        score_result,
-        columns=[
-            "bleu4_score",
-            "rouge-l_precision",
-            "rouge-l_recall",
-            "rouge-l_f1_score",
-            "evaluation_status",
-        ],
-        index=1,
-    )
-
-
-@pytest.mark.flaky(retries=2)
-def test_llm_palm_score_params(llm_fine_tune_df_default_index):
-    model = llm.PaLM2TextGenerator(model_name="text-bison", max_iterations=1)
-
-    # Check score to ensure the model was fitted
-    score_result = model.score(
-        X=llm_fine_tune_df_default_index["prompt"],
-        y=llm_fine_tune_df_default_index["label"],
-        task_type="classification",
-    ).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        score_result,
-        columns=[
-            "precision",
-            "recall",
-            "f1_score",
-            "label",
-            "evaluation_status",
-        ],
-    )
-
-
 @pytest.mark.flaky(retries=2)
 @pytest.mark.parametrize(
     "model_name",
@@ -1023,41 +751,6 @@ def test_llm_gemini_pro_score_params(llm_fine_tune_df_default_index, model_name)
     )
 
 
-def test_palm2_text_generator_deprecated():
-    with pytest.warns(exceptions.ApiDeprecationWarning):
-        llm.PaLM2TextGenerator()
-
-
-def test_palm2_text_embedding_deprecated():
-    with pytest.warns(exceptions.ApiDeprecationWarning):
-        try:
-            llm.PaLM2TextEmbeddingGenerator()
-        except (Exception):
-            pass
-
-
-@pytest.mark.parametrize(
-    "model_name",
-    (
-        "gemini-1.5-pro-001",
-        "gemini-1.5-pro-002",
-        "gemini-1.5-flash-001",
-        "gemini-1.5-flash-002",
-    ),
-)
-def test_gemini_text_generator_deprecated(model_name):
-    with pytest.warns(exceptions.ApiDeprecationWarning):
-        llm.GeminiTextGenerator(model_name=model_name)
-
-
-def test_gemini_pro_text_generator_deprecated():
-    with pytest.warns(exceptions.ApiDeprecationWarning):
-        try:
-            llm.GeminiTextGenerator(model_name="gemini-pro")
-        except (Exception):
-            pass
-
-
 @pytest.mark.parametrize(
     "model_name",
     (
@@ -1069,3 +762,19 @@ def test_gemini_pro_text_generator_deprecated():
 def test_gemini_preview_model_warnings(model_name):
     with pytest.warns(exceptions.PreviewWarning):
         llm.GeminiTextGenerator(model_name=model_name)
+
+
+@pytest.mark.parametrize(
+    "model_class",
+    [
+        llm.TextEmbeddingGenerator,
+        llm.MultimodalEmbeddingGenerator,
+        llm.GeminiTextGenerator,
+        llm.Claude3TextGenerator,
+    ],
+)
+def test_text_embedding_generator_no_default_model_warning(model_class):
+    message = "Since upgrading the default model can cause unintended breakages, the\ndefault model will be removed in BigFrames 3.0. Please supply an\nexplicit model to avoid this message."
+    bigframes.options.experiments.blob = True
+    with pytest.warns(FutureWarning, match=message):
+        model_class(model_name=None)
diff --git a/tests/system/small/ml/test_multimodal_llm.py b/tests/system/small/ml/test_multimodal_llm.py
new file mode 100644
index 0000000000..51e6bcb2d5
--- /dev/null
+++ b/tests/system/small/ml/test_multimodal_llm.py
@@ -0,0 +1,69 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import bigframes
+from bigframes.ml import llm
+import bigframes.pandas as bpd
+from tests.system import utils
+
+
+@pytest.mark.flaky(retries=2)
+def test_multimodal_embedding_generator_predict_default_params_success(
+    images_mm_df, test_session, bq_connection
+):
+    bigframes.options.experiments.blob = True
+
+    text_embedding_model = llm.MultimodalEmbeddingGenerator(
+        connection_name=bq_connection, session=test_session
+    )
+    df = text_embedding_model.predict(images_mm_df).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        df,
+        columns=utils.ML_MULTIMODAL_GENERATE_EMBEDDING_OUTPUT,
+        index=2,
+        col_exact=False,
+    )
+    assert len(df["ml_generate_embedding_result"][0]) == 1408
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    (
+        "gemini-1.5-pro-001",
+        "gemini-1.5-pro-002",
+        "gemini-1.5-flash-001",
+        "gemini-1.5-flash-002",
+        "gemini-2.0-flash-exp",
+    ),
+)
+@pytest.mark.flaky(retries=2)
+def test_gemini_text_generator_multimodal_input(
+    images_mm_df: bpd.DataFrame, model_name, test_session, bq_connection
+):
+    bigframes.options.experiments.blob = True
+
+    gemini_text_generator_model = llm.GeminiTextGenerator(
+        model_name=model_name, connection_name=bq_connection, session=test_session
+    )
+    pd_df = gemini_text_generator_model.predict(
+        images_mm_df, prompt=["Describe", images_mm_df["blob_col"]]
+    ).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        pd_df,
+        columns=utils.ML_GENERATE_TEXT_OUTPUT + ["blob_col"],
+        index=2,
+        col_exact=False,
+    )
diff --git a/tests/system/small/ml/test_register.py b/tests/system/small/ml/test_register.py
index 6d8ff0a712..f21567da63 100644
--- a/tests/system/small/ml/test_register.py
+++ b/tests/system/small/ml/test_register.py
@@ -14,9 +14,7 @@
 
 from typing import cast
 
-import pytest
-
-from bigframes.ml import core, imported, linear_model, llm
+from bigframes.ml import core, imported, linear_model
 
 
 def test_linear_reg_register(
@@ -53,13 +51,6 @@ def test_linear_reg_register_with_params(
     )
 
 
-def test_palm2_text_generator_register(
-    ephemera_palm2_text_generator_model: llm.PaLM2TextGenerator,
-):
-    with pytest.raises(AttributeError):
-        ephemera_palm2_text_generator_model.register()  # type: ignore
-
-
 def test_imported_tensorflow_register(
     ephemera_imported_tensorflow_model: imported.TensorFlowModel,
 ):
diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py
index ca83604dd5..0463124309 100644
--- a/tests/system/small/operations/test_datetimes.py
+++ b/tests/system/small/operations/test_datetimes.py
@@ -20,7 +20,7 @@
 import pytest
 
 import bigframes.series
-from tests.system.utils import assert_series_equal, skip_legacy_pandas
+from tests.system.utils import assert_series_equal
 
 DATETIME_COL_NAMES = [("datetime_col",), ("timestamp_col",)]
 DATE_COLUMNS = [
@@ -34,8 +34,9 @@
     ("col_name",),
     DATE_COLUMNS,
 )
-@skip_legacy_pandas
 def test_dt_day(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.day.to_pandas()
@@ -51,8 +52,9 @@ def test_dt_day(scalars_dfs, col_name):
     ("col_name",),
     DATETIME_COL_NAMES,
 )
-@skip_legacy_pandas
 def test_dt_date(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.date.to_pandas()
@@ -68,8 +70,9 @@ def test_dt_date(scalars_dfs, col_name):
     ("col_name",),
     DATE_COLUMNS,
 )
-@skip_legacy_pandas
 def test_dt_dayofweek(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.dayofweek.to_pandas()
@@ -82,8 +85,9 @@ def test_dt_dayofweek(scalars_dfs, col_name):
     ("col_name",),
     DATETIME_COL_NAMES,
 )
-@skip_legacy_pandas
 def test_dt_hour(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.hour.to_pandas()
@@ -99,8 +103,9 @@ def test_dt_hour(scalars_dfs, col_name):
     ("col_name",),
     DATETIME_COL_NAMES,
 )
-@skip_legacy_pandas
 def test_dt_minute(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.minute.to_pandas()
@@ -116,8 +121,9 @@ def test_dt_minute(scalars_dfs, col_name):
     ("col_name",),
     DATE_COLUMNS,
 )
-@skip_legacy_pandas
 def test_dt_month(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.month.to_pandas()
@@ -133,8 +139,9 @@ def test_dt_month(scalars_dfs, col_name):
     ("col_name",),
     DATE_COLUMNS,
 )
-@skip_legacy_pandas
 def test_dt_quarter(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.quarter.to_pandas()
@@ -150,8 +157,9 @@ def test_dt_quarter(scalars_dfs, col_name):
     ("col_name",),
     DATETIME_COL_NAMES,
 )
-@skip_legacy_pandas
 def test_dt_second(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.second.to_pandas()
@@ -167,8 +175,9 @@ def test_dt_second(scalars_dfs, col_name):
     ("col_name",),
     DATETIME_COL_NAMES,
 )
-@skip_legacy_pandas
 def test_dt_time(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.time.to_pandas()
@@ -184,8 +193,9 @@ def test_dt_time(scalars_dfs, col_name):
     ("col_name",),
     DATE_COLUMNS,
 )
-@skip_legacy_pandas
 def test_dt_year(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.year.to_pandas()
@@ -201,8 +211,9 @@ def test_dt_year(scalars_dfs, col_name):
     ("col_name",),
     DATETIME_COL_NAMES,
 )
-@skip_legacy_pandas
 def test_dt_tz(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.tz
@@ -215,8 +226,9 @@ def test_dt_tz(scalars_dfs, col_name):
     ("col_name",),
     DATETIME_COL_NAMES,
 )
-@skip_legacy_pandas
 def test_dt_unit(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_series: bigframes.series.Series = scalars_df[col_name]
     bf_result = bf_series.dt.unit
@@ -234,8 +246,9 @@ def test_dt_unit(scalars_dfs, col_name):
         ("datetime_col", "%H:%M"),
     ],
 )
-@skip_legacy_pandas
 def test_dt_strftime(scalars_df_index, scalars_pandas_df_index, column, date_format):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     bf_result = scalars_df_index[column].dt.strftime(date_format).to_pandas()
     pd_result = scalars_pandas_df_index[column].dt.strftime(date_format)
     pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
@@ -276,8 +289,9 @@ def test_dt_strftime_time():
     ("col_name",),
     DATETIME_COL_NAMES,
 )
-@skip_legacy_pandas
 def test_dt_normalize(scalars_dfs, col_name):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = scalars_df[col_name].dt.normalize().to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.normalize()
@@ -297,8 +311,9 @@ def test_dt_normalize(scalars_dfs, col_name):
         ("datetime_col", "us"),
     ],
 )
-@skip_legacy_pandas
 def test_dt_floor(scalars_dfs, col_name, freq):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = scalars_df[col_name].dt.floor(freq).to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.floor(freq)
diff --git a/tests/system/small/test_bq_sessions.py b/tests/system/small/test_bq_sessions.py
new file mode 100644
index 0000000000..7aad19bd8f
--- /dev/null
+++ b/tests/system/small/test_bq_sessions.py
@@ -0,0 +1,85 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from concurrent.futures import ThreadPoolExecutor
+import time
+
+import google
+import google.api_core.exceptions
+import google.cloud
+from google.cloud import bigquery
+import pytest
+
+from bigframes.session import bigquery_session
+
+TEST_SCHEMA = [
+    bigquery.SchemaField("bool field", "BOOLEAN"),
+    bigquery.SchemaField("string field", "STRING"),
+    bigquery.SchemaField("float array_field", "FLOAT", mode="REPEATED"),
+    bigquery.SchemaField(
+        "struct field",
+        "RECORD",
+        fields=(bigquery.SchemaField("int subfield", "INTEGER"),),
+    ),
+]
+
+
+@pytest.fixture
+def session_resource_manager(
+    bigquery_client,
+) -> bigquery_session.SessionResourceManager:
+    return bigquery_session.SessionResourceManager(bigquery_client, "US")
+
+
+def test_bq_session_create_temp_table_clustered(bigquery_client: bigquery.Client):
+    session_resource_manager = bigquery_session.SessionResourceManager(
+        bigquery_client, "US"
+    )
+    cluster_cols = ["string field", "bool field"]
+
+    session_table_ref = session_resource_manager.create_temp_table(
+        TEST_SCHEMA, cluster_cols=cluster_cols
+    )
+    session_resource_manager._keep_session_alive()
+
+    result_table = bigquery_client.get_table(session_table_ref)
+    assert result_table.schema == TEST_SCHEMA
+    assert result_table.clustering_fields == cluster_cols
+
+    session_resource_manager.close()
+    with pytest.raises(google.api_core.exceptions.NotFound):
+        # It may take time for the underlying tables to get cleaned up after
+        # closing the session, so wait at least 1 minute to check.
+        for _ in range(6):
+            bigquery_client.get_table(session_table_ref)
+            time.sleep(10)
+
+
+def test_bq_session_create_multi_temp_tables(bigquery_client: bigquery.Client):
+    session_resource_manager = bigquery_session.SessionResourceManager(
+        bigquery_client, "US"
+    )
+
+    def create_table():
+        return session_resource_manager.create_temp_table(TEST_SCHEMA)
+
+    with ThreadPoolExecutor() as executor:
+        results = [executor.submit(create_table) for i in range(10)]
+
+    for future in results:
+        table = future.result()
+        result_table = bigquery_client.get_table(table)
+        assert result_table.schema == TEST_SCHEMA
+
+    session_resource_manager.close()
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 8cc3be1577..e77319b551 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -38,7 +38,6 @@
     assert_pandas_df_equal,
     assert_series_equal,
     assert_series_equivalent,
-    skip_legacy_pandas,
 )
 
 
@@ -78,6 +77,23 @@ def test_df_construct_pandas_default(scalars_dfs):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("write_engine"),
+    [
+        ("bigquery_inline"),
+        ("bigquery_load"),
+        ("bigquery_streaming"),
+    ],
+)
+def test_read_pandas_all_nice_types(
+    session: bigframes.Session, scalars_pandas_df_index: pd.DataFrame, write_engine
+):
+    bf_result = session.read_pandas(
+        scalars_pandas_df_index, write_engine=write_engine
+    ).to_pandas()
+    pandas.testing.assert_frame_equal(bf_result, scalars_pandas_df_index)
+
+
 def test_df_construct_large_strings():
     data = [["hello", "w" + "o" * 50000 + "rld"]]
     bf_result = dataframe.DataFrame(data).to_pandas()
@@ -614,8 +630,9 @@ def test_drop_bigframes_index_with_na(scalars_dfs):
     pd.testing.assert_frame_equal(pd_result, bf_result)
 
 
-@skip_legacy_pandas
 def test_drop_bigframes_multiindex(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     scalars_df = scalars_df.copy()
     scalars_pandas_df = scalars_pandas_df.copy()
@@ -1146,7 +1163,6 @@ def test_assign_callable_lambda(scalars_dfs):
     assert_pandas_df_equal(bf_result, pd_result)
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("axis", "how", "ignore_index", "subset"),
     [
@@ -1160,6 +1176,8 @@ def test_assign_callable_lambda(scalars_dfs):
     ],
 )
 def test_df_dropna(scalars_dfs, axis, how, ignore_index, subset):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     df = scalars_df.dropna(axis=axis, how=how, ignore_index=ignore_index, subset=subset)
     bf_result = df.to_pandas()
@@ -1172,8 +1190,9 @@ def test_df_dropna(scalars_dfs, axis, how, ignore_index, subset):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
-@skip_legacy_pandas
 def test_df_dropna_range_columns(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     scalars_df = scalars_df.copy()
     scalars_pandas_df = scalars_pandas_df.copy()
@@ -1372,11 +1391,12 @@ def test_df_iter(
         assert bf_i == df_i
 
 
-@skip_legacy_pandas
 def test_iterrows(
     scalars_df_index,
     scalars_pandas_df_index,
 ):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df_index = scalars_df_index.add_suffix("_suffix", axis=1)
     scalars_pandas_df_index = scalars_pandas_df_index.add_suffix("_suffix", axis=1)
     for (bf_index, bf_series), (pd_index, pd_series) in zip(
@@ -1743,6 +1763,29 @@ def test_len(scalars_dfs):
     assert bf_result == pd_result
 
 
+@pytest.mark.parametrize(
+    ("n_rows",),
+    [
+        (50,),
+        (10000,),
+    ],
+)
+@pytest.mark.parametrize(
+    "write_engine",
+    ["bigquery_load", "bigquery_streaming"],
+)
+def test_df_len_local(session, n_rows, write_engine):
+    assert (
+        len(
+            session.read_pandas(
+                pd.DataFrame(np.random.randint(1, 7, n_rows), columns=["one"]),
+                write_engine=write_engine,
+            )
+        )
+        == n_rows
+    )
+
+
 def test_size(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = scalars_df.size
@@ -2356,8 +2399,9 @@ def test_df_corrwith_df_non_numeric_error(scalars_dfs):
         scalars_df[l_cols].corrwith(scalars_df[r_cols], numeric_only=False)
 
 
-@skip_legacy_pandas
 def test_df_corrwith_series(scalars_dfs_maybe_ordered):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
 
     l_cols = ["int64_col", "float64_col", "int64_too"]
@@ -2486,7 +2530,6 @@ def test_series_binop_axis_index(
     assert_pandas_df_equal(bf_result, pd_result)
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("input"),
     [
@@ -2501,6 +2544,8 @@ def test_series_binop_axis_index(
     ],
 )
 def test_listlike_binop_axis_1_in_memory_data(scalars_dfs, input):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     df_columns = ["int64_col", "float64_col", "int64_too"]
@@ -2513,8 +2558,9 @@ def test_listlike_binop_axis_1_in_memory_data(scalars_dfs, input):
     assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
 
 
-@skip_legacy_pandas
 def test_df_reverse_binop_pandas(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     pd_series = pd.Series([100, 200, 300])
@@ -2957,8 +3003,9 @@ def test_dataframe_agg_int_multi_string(scalars_dfs):
     )
 
 
-@skip_legacy_pandas
 def test_df_describe_non_temporal(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     # excluding temporal columns here because BigFrames cannot perform percentiles operations on them
     unsupported_columns = ["datetime_col", "timestamp_col", "time_col", "date_col"]
@@ -2991,9 +3038,10 @@ def test_df_describe_non_temporal(scalars_dfs):
     ).all()
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize("include", [None, "all"])
 def test_df_describe_non_numeric(scalars_dfs, include):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     # Excluding "date_col" here because in BigFrames it is used as PyArrow[date32()], which is
@@ -3021,8 +3069,9 @@ def test_df_describe_non_numeric(scalars_dfs, include):
     )
 
 
-@skip_legacy_pandas
 def test_df_describe_temporal(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     temporal_columns = ["datetime_col", "timestamp_col", "time_col", "date_col"]
@@ -3048,8 +3097,9 @@ def test_df_describe_temporal(scalars_dfs):
     )
 
 
-@skip_legacy_pandas
 def test_df_describe_mixed_types_include_all(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     numeric_columns = [
@@ -4607,13 +4657,12 @@ def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, sub
     ],
 )
 def test_df_drop_duplicates_w_json(json_df, keep):
-    bf_df = json_df.drop_duplicates(keep=keep).to_pandas(allow_large_results=True)
+    bf_df = json_df.drop_duplicates(keep=keep).to_pandas()
 
     # drop_duplicates relies on pa.compute.dictionary_encode, which is incompatible
     # with Arrow string extension types. Temporary conversion to standard Pandas
     # strings is required.
-    # allow_large_results=True for b/401630655
-    json_pandas_df = json_df.to_pandas(allow_large_results=True)
+    json_pandas_df = json_df.to_pandas()
     json_pandas_df["json_col"] = json_pandas_df["json_col"].astype(
         pd.StringDtype(storage="pyarrow")
     )
@@ -4731,8 +4780,9 @@ def test_df_to_json_local_str(scalars_df_index, scalars_pandas_df_index):
     assert bf_result == pd_result
 
 
-@skip_legacy_pandas
 def test_df_to_json_local_file(scalars_df_index, scalars_pandas_df_index):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
         scalars_df_index.to_json(bf_result_file, orient="table")
         # default_handler for arrow types that have no default conversion
@@ -4858,7 +4908,6 @@ def test_df_to_orc(scalars_df_index, scalars_pandas_df_index):
     assert bf_result == pd_result
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("expr",),
     [
@@ -4868,6 +4917,8 @@ def test_df_to_orc(scalars_df_index, scalars_pandas_df_index):
     ],
 )
 def test_df_eval(scalars_dfs, expr):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     bf_result = scalars_df.eval(expr).to_pandas()
@@ -4876,7 +4927,6 @@ def test_df_eval(scalars_dfs, expr):
     pd.testing.assert_frame_equal(bf_result, pd_result)
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("expr",),
     [
@@ -4886,6 +4936,8 @@ def test_df_eval(scalars_dfs, expr):
     ],
 )
 def test_df_query(scalars_dfs, expr):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     # local_var is referenced in expressions
     local_var = 3  # NOQA
     scalars_df, scalars_pandas_df = scalars_dfs
@@ -5204,9 +5256,7 @@ def test_query_complexity_repeated_subtrees(
     # See: https://github.com/python/cpython/issues/112282
     reason="setrecursionlimit has no effect on the Python C stack since Python 3.12.",
 )
-def test_query_complexity_repeated_analytic(
-    scalars_df_index, scalars_pandas_df_index, with_multiquery_execution
-):
+def test_query_complexity_repeated_analytic(scalars_df_index, scalars_pandas_df_index):
     bf_df = scalars_df_index[["int64_col", "int64_too"]]
     pd_df = scalars_pandas_df_index[["int64_col", "int64_too"]]
     # Uses LAG analytic operator, each in a new SELECT
@@ -5218,22 +5268,6 @@ def test_query_complexity_repeated_analytic(
     assert_pandas_df_equal(bf_result, pd_result)
 
 
-def test_to_pandas_downsampling_option_override(session):
-    df = session.read_gbq("bigframes-dev.bigframes_tests_sys.batting")
-    download_size = 1
-
-    # limits only apply for allow_large_result=True
-    df = df.to_pandas(
-        max_download_size=download_size,
-        sampling_method="head",
-        allow_large_results=True,
-    )
-
-    total_memory_bytes = df.memory_usage(deep=True).sum()
-    total_memory_mb = total_memory_bytes / (1024 * 1024)
-    assert total_memory_mb == pytest.approx(download_size, rel=0.5)
-
-
 def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_created):
     dataset_id = dataset_id_not_created
     destination_table = f"{dataset_id}.scalars_df"
@@ -5342,7 +5376,6 @@ def test_dataframe_explode_xfail(col_names):
     df.explode(col_names)
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("on", "rule", "origin"),
     [
@@ -5362,6 +5395,8 @@ def test_dataframe_explode_xfail(col_names):
 def test__resample_with_column(
     scalars_df_index, scalars_pandas_df_index, on, rule, origin
 ):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     bf_result = (
         scalars_df_index._resample(rule=rule, on=on, origin=origin)[
             ["int64_col", "int64_too"]
@@ -5377,7 +5412,6 @@ def test__resample_with_column(
     )
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("append", "level", "col", "rule"),
     [
@@ -5389,6 +5423,8 @@ def test__resample_with_column(
 def test__resample_with_index(
     scalars_df_index, scalars_pandas_df_index, append, level, col, rule
 ):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df_index = scalars_df_index.set_index(col, append=append)
     scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)
     bf_result = (
@@ -5405,7 +5441,6 @@ def test__resample_with_index(
     assert_pandas_df_equal(bf_result, pd_result)
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("rule", "origin", "data"),
     [
@@ -5445,6 +5480,8 @@ def test__resample_with_index(
     ],
 )
 def test__resample_start_time(rule, origin, data):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     col = "timestamp_col"
     scalars_df_index = bpd.DataFrame(data).set_index(col)
     scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index cd21f5094c..a69c26bc54 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -35,6 +35,7 @@
 
 import bigframes
 import bigframes.dataframe
+import bigframes.enums
 import bigframes.features
 import bigframes.pandas as bpd
 
@@ -257,7 +258,7 @@ def test_to_pandas_override_global_option(scalars_df_index):
 
         scalars_df_index.to_pandas()
         table_id = scalars_df_index._query_job.destination.table_id
-        assert table_id.startswith("bqdf")
+        assert table_id is not None
 
         # When allow_large_results=False, a query_job object should not be created.
         # Therefore, the table_id should remain unchanged.
@@ -265,14 +266,69 @@ def test_to_pandas_override_global_option(scalars_df_index):
         assert scalars_df_index._query_job.destination.table_id == table_id
 
 
+def test_to_pandas_downsampling_option_override(session):
+    df = session.read_gbq("bigframes-dev.bigframes_tests_sys.batting")
+    download_size = 1
+
+    with pytest.warns(
+        UserWarning, match="The data size .* exceeds the maximum download limit"
+    ):
+        # limits only apply for allow_large_result=True
+        df = df.to_pandas(
+            max_download_size=download_size,
+            sampling_method="head",
+            allow_large_results=True,
+        )
+
+    total_memory_bytes = df.memory_usage(deep=True).sum()
+    total_memory_mb = total_memory_bytes / (1024 * 1024)
+    assert total_memory_mb == pytest.approx(download_size, rel=0.5)
+
+
+@pytest.mark.parametrize(
+    ("kwargs", "message"),
+    [
+        pytest.param(
+            {"sampling_method": "head"},
+            r"DEPRECATED[\S\s]*sampling_method[\S\s]*DataFrame.sample",
+            id="sampling_method",
+        ),
+        pytest.param(
+            {"random_state": 10},
+            r"DEPRECATED[\S\s]*random_state[\S\s]*DataFrame.sample",
+            id="random_state",
+        ),
+        pytest.param(
+            {"max_download_size": 10},
+            r"DEPRECATED[\S\s]*max_download_size[\S\s]*DataFrame.to_pandas_batches",
+            id="max_download_size",
+        ),
+    ],
+)
+def test_to_pandas_warns_deprecated_parameters(scalars_df_index, kwargs, message):
+    with pytest.warns(FutureWarning, match=message):
+        scalars_df_index.to_pandas(
+            # limits only apply for allow_large_result=True
+            allow_large_results=True,
+            **kwargs,
+        )
+
+
+def test_to_pandas_dry_run(session, scalars_pandas_df_multi_index):
+    bf_df = session.read_pandas(scalars_pandas_df_multi_index)
+
+    result = bf_df.to_pandas(dry_run=True)
+
+    assert len(result) == 14
+
+
 def test_to_arrow_override_global_option(scalars_df_index):
     # Direct call to_arrow uses global default setting (allow_large_results=True),
-    # table has 'bqdf' prefix.
     with bigframes.option_context("bigquery.allow_large_results", True):
 
         scalars_df_index.to_arrow()
         table_id = scalars_df_index._query_job.destination.table_id
-        assert table_id.startswith("bqdf")
+        assert table_id is not None
 
         # When allow_large_results=False, a query_job object should not be created.
         # Therefore, the table_id should remain unchanged.
@@ -288,6 +344,30 @@ def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
         pd.testing.assert_series_equal(actual, expected)
 
 
+@pytest.mark.parametrize("allow_large_results", (True, False))
+def test_to_pandas_batches_w_page_size_and_max_results(session, allow_large_results):
+    """Verify to_pandas_batches() APIs returns the expected page size.
+
+    Regression test for b/407521010.
+    """
+    bf_df = session.read_gbq(
+        "bigquery-public-data.usa_names.usa_1910_2013",
+        index_col=bigframes.enums.DefaultIndexKind.NULL,
+    )
+    expected_column_count = len(bf_df.columns)
+
+    batch_count = 0
+    for pd_df in bf_df.to_pandas_batches(
+        page_size=42, allow_large_results=allow_large_results, max_results=42 * 3
+    ):
+        batch_row_count, batch_column_count = pd_df.shape
+        batch_count += 1
+        assert batch_column_count == expected_column_count
+        assert batch_row_count == 42
+
+    assert batch_count == 3
+
+
 @pytest.mark.parametrize(
     ("index",),
     [(True,), (False,)],
@@ -789,11 +869,3 @@ def test_to_sql_query_named_index_excluded(
     utils.assert_pandas_df_equal(
         roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
     )
-
-
-def test_to_pandas_dry_run(session, scalars_pandas_df_multi_index):
-    bf_df = session.read_pandas(scalars_pandas_df_multi_index)
-
-    result = bf_df.to_pandas(dry_run=True)
-
-    assert len(result) == 14
diff --git a/tests/system/small/test_encryption.py b/tests/system/small/test_encryption.py
index 8ce53c218b..97f44694b0 100644
--- a/tests/system/small/test_encryption.py
+++ b/tests/system/small/test_encryption.py
@@ -84,36 +84,6 @@ def test_session_query_job(bq_cmek, session_with_bq_cmek):
     assert table.encryption_configuration.kms_key_name == bq_cmek
 
 
-def test_session_load_job(bq_cmek, session_with_bq_cmek):
-    if not bq_cmek:  # pragma: NO COVER
-        pytest.skip("no cmek set for testing")  # pragma: NO COVER
-
-    # Session should have cmek set in the default query and load job configs
-    load_table = session_with_bq_cmek._temp_storage_manager.allocate_temp_table()
-
-    df = pandas.DataFrame({"col0": [1, 2, 3]})
-    load_job_config = bigquery.LoadJobConfig()
-    load_job_config.schema = [
-        bigquery.SchemaField(df.columns[0], bigquery.enums.SqlTypeNames.INT64)
-    ]
-
-    load_job = session_with_bq_cmek.bqclient.load_table_from_dataframe(
-        df,
-        load_table,
-        job_config=load_job_config,
-    )
-    load_job.result()
-
-    assert load_job.destination == load_table
-    assert load_job.destination_encryption_configuration.kms_key_name.startswith(
-        bq_cmek
-    )
-
-    # The load destination table should be created with the intended encryption
-    table = session_with_bq_cmek.bqclient.get_table(load_job.destination)
-    assert table.encryption_configuration.kms_key_name == bq_cmek
-
-
 def test_read_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id):
     if not bq_cmek:  # pragma: NO COVER
         pytest.skip("no cmek set for testing")  # pragma: NO COVER
@@ -194,7 +164,7 @@ def test_to_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id):
 
     # Write the result to BQ custom table and assert encryption
     session_with_bq_cmek.bqclient.get_table(output_table_id)
-    output_table_ref = session_with_bq_cmek._temp_storage_manager.allocate_temp_table()
+    output_table_ref = session_with_bq_cmek._anon_dataset_manager.allocate_temp_table()
     output_table_id = str(output_table_ref)
     df.to_gbq(output_table_id)
     output_table = session_with_bq_cmek.bqclient.get_table(output_table_id)
@@ -232,7 +202,7 @@ def test_read_pandas_large(bq_cmek, session_with_bq_cmek):
     _assert_bq_table_is_encrypted(df, bq_cmek, session_with_bq_cmek)
 
 
-def test_bqml(bq_cmek, session_with_bq_cmek, penguins_table_id):
+def test_kms_encryption_bqml(bq_cmek, session_with_bq_cmek, penguins_table_id):
     if not bq_cmek:  # pragma: NO COVER
         pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index 2e5cd18158..f1d2bacf08 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -16,7 +16,7 @@
 import pytest
 
 import bigframes.pandas as bpd
-from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
+from tests.system.utils import assert_pandas_df_equal
 
 # =================
 # DataFrame.groupby
@@ -94,7 +94,6 @@ def test_dataframe_groupby_quantile(scalars_df_index, scalars_pandas_df_index, q
     )
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("na_option", "method", "ascending"),
     [
@@ -132,6 +131,8 @@ def test_dataframe_groupby_rank(
     method,
     ascending,
 ):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     col_names = ["int64_too", "float64_col", "int64_col", "string_col"]
     bf_result = (
         scalars_df_index[col_names]
@@ -599,7 +600,6 @@ def test_series_groupby_agg_list(scalars_df_index, scalars_pandas_df_index):
     )
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("na_option", "method", "ascending"),
     [
@@ -637,6 +637,8 @@ def test_series_groupby_rank(
     method,
     ascending,
 ):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     col_names = ["int64_col", "string_col"]
     bf_result = (
         scalars_df_index[col_names]
diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py
index 535e4bc9ae..9f45c8465b 100644
--- a/tests/system/small/test_index.py
+++ b/tests/system/small/test_index.py
@@ -426,11 +426,3 @@ def test_multiindex_repr_includes_all_names(session):
     )
     index = session.read_pandas(df).set_index(["A", "B"]).index
     assert "names=['A', 'B']" in repr(index)
-
-
-def test_to_pandas_dry_run(scalars_df_index):
-    index = scalars_df_index.index
-
-    result = index.to_pandas(dry_run=True)
-
-    assert len(result) == 14
diff --git a/tests/system/small/test_index_io.py b/tests/system/small/test_index_io.py
index 85001e4ec5..fcb3fa3920 100644
--- a/tests/system/small/test_index_io.py
+++ b/tests/system/small/test_index_io.py
@@ -20,10 +20,9 @@ def test_to_pandas_override_global_option(scalars_df_index):
         bf_index = scalars_df_index.index
 
         # Direct call to_pandas uses global default setting (allow_large_results=True),
-        # table has 'bqdf' prefix.
         bf_index.to_pandas()
         table_id = bf_index._query_job.destination.table_id
-        assert table_id.startswith("bqdf")
+        assert table_id is not None
 
         # When allow_large_results=False, a query_job object should not be created.
         # Therefore, the table_id should remain unchanged.
@@ -31,6 +30,14 @@ def test_to_pandas_override_global_option(scalars_df_index):
         assert bf_index._query_job.destination.table_id == table_id
 
 
+def test_to_pandas_dry_run(scalars_df_index):
+    index = scalars_df_index.index
+
+    result = index.to_pandas(dry_run=True)
+
+    assert len(result) == 14
+
+
 def test_to_numpy_override_global_option(scalars_df_index):
     with bigframes.option_context("bigquery.allow_large_results", True):
 
@@ -40,7 +47,7 @@ def test_to_numpy_override_global_option(scalars_df_index):
         # table has 'bqdf' prefix.
         bf_index.to_numpy()
         table_id = bf_index._query_job.destination.table_id
-        assert table_id.startswith("bqdf")
+        assert table_id is not None
 
         # When allow_large_results=False, a query_job object should not be created.
         # Therefore, the table_id should remain unchanged.
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
index 1c78ac63d9..a01b7aab92 100644
--- a/tests/system/small/test_multiindex.py
+++ b/tests/system/small/test_multiindex.py
@@ -17,7 +17,7 @@
 import pytest
 
 import bigframes.pandas as bpd
-from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
+from tests.system.utils import assert_pandas_df_equal
 
 
 def test_multi_index_from_arrays():
@@ -45,8 +45,9 @@ def test_multi_index_from_arrays():
     pandas.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx)
 
 
-@skip_legacy_pandas
 def test_read_pandas_multi_index_axes():
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     index = pandas.MultiIndex.from_arrays(
         [
             pandas.Index([4, 99], dtype=pandas.Int64Dtype()),
@@ -759,8 +760,9 @@ def test_column_multi_index_binary_op(scalars_df_index, scalars_pandas_df_index)
     pandas.testing.assert_series_equal(bf_result, pd_result)
 
 
-@skip_legacy_pandas
 def test_column_multi_index_any():
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     columns = pandas.MultiIndex.from_tuples(
         [("col0", "col00"), ("col0", "col00"), ("col1", "col11")]
     )
diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py
index 6da4c6ff9c..cf41daeb51 100644
--- a/tests/system/small/test_null_index.py
+++ b/tests/system/small/test_null_index.py
@@ -18,7 +18,6 @@
 
 import bigframes.exceptions
 import bigframes.pandas as bpd
-from tests.system.utils import skip_legacy_pandas
 
 
 def test_null_index_to_gbq(session, scalars_df_null_index, dataset_id_not_created):
@@ -126,8 +125,9 @@ def test_null_index_groupby_aggregate(
     pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
 
-@skip_legacy_pandas
 def test_null_index_analytic(scalars_df_null_index, scalars_pandas_df_default_index):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     bf_result = scalars_df_null_index["int64_col"].cumsum().to_pandas()
     pd_result = scalars_pandas_df_default_index["int64_col"].cumsum()
     pd.testing.assert_series_equal(
@@ -173,7 +173,6 @@ def test_null_index_merge_left_null_index_object(
     assert got.shape == expected.shape
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("expr",),
     [
@@ -185,6 +184,8 @@ def test_null_index_merge_left_null_index_object(
 def test_null_index_df_eval(
     scalars_df_null_index, scalars_pandas_df_default_index, expr
 ):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
 
     bf_result = scalars_df_null_index.eval(expr).to_pandas()
     pd_result = scalars_pandas_df_default_index.eval(expr)
@@ -237,8 +238,9 @@ def test_null_index_merge_two_null_index_objects(
     assert got.shape == expected.shape
 
 
-@skip_legacy_pandas
 def test_null_index_stack(scalars_df_null_index, scalars_pandas_df_default_index):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     stacking_cols = ["int64_col", "int64_too"]
     bf_result = scalars_df_null_index[stacking_cols].stack().to_pandas()
     pd_result = (
diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index 2b6dfefb12..491b56d5fc 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -16,6 +16,7 @@
 import typing
 
 import pandas as pd
+import pyarrow as pa
 import pytest
 import pytz
 
@@ -39,6 +40,16 @@ def test_concat_dataframe(scalars_dfs, ordered):
     assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
 
 
+def test_concat_dataframe_w_struct_cols(nested_structs_df, nested_structs_pandas_df):
+    """Avoid regressions for internal issue 407107482"""
+    empty_bf_df = bpd.DataFrame(session=nested_structs_df._block.session)
+    bf_result = bpd.concat((empty_bf_df, nested_structs_df), ignore_index=True)
+    bf_result = bf_result.to_pandas()
+    pd_result = pd.concat((pd.DataFrame(), nested_structs_pandas_df), ignore_index=True)
+    pd_result.index = pd_result.index.astype("Int64")
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
 def test_concat_series(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = bpd.concat(
@@ -388,150 +399,229 @@ def test_merge_series(scalars_dfs, merge_how):
 
 
 def _convert_pandas_category(pd_s: pd.Series):
+    """
+    Transforms a pandas Series with Categorical dtype into a bigframes-compatible
+    Series representing intervals."
+    """
+    # When `labels=False`
+    if pd.api.types.is_integer_dtype(pd_s.dtype) or pd.api.types.is_float_dtype(
+        pd_s.dtype
+    ):
+        return pd_s.astype("Int64")
+
     if not isinstance(pd_s.dtype, pd.CategoricalDtype):
-        raise ValueError("Input must be a pandas Series with categorical data.")
+        raise ValueError(
+            f"Input must be a pandas Series with categorical data: {pd_s.dtype}"
+        )
 
-    if len(pd_s.dtype.categories) == 0:
-        return pd.Series([pd.NA] * len(pd_s), name=pd_s.name)
+    if pd.api.types.is_object_dtype(pd_s.cat.categories.dtype):
+        return pd_s.astype(pd.StringDtype(storage="pyarrow"))
 
-    pd_interval: pd.IntervalIndex = pd_s.cat.categories[pd_s.cat.codes]  # type: ignore
-    if pd_interval.closed == "left":
+    if not isinstance(pd_s.cat.categories.dtype, pd.IntervalDtype):
+        raise ValueError(
+            f"Must be a IntervalDtype with categorical data: {pd_s.cat.categories.dtype}"
+        )
+
+    if pd_s.cat.categories.dtype.closed == "left":  # type: ignore
         left_key = "left_inclusive"
         right_key = "right_exclusive"
     else:
         left_key = "left_exclusive"
         right_key = "right_inclusive"
-    return pd.Series(
-        [
-            {left_key: interval.left, right_key: interval.right}
+
+    subtype = pd_s.cat.categories.dtype.subtype  # type: ignore
+    if pd.api.types.is_float_dtype(subtype):
+        interval_dtype = pa.float64()
+    elif pd.api.types.is_integer_dtype(subtype):
+        interval_dtype = pa.int64()
+    else:
+        raise ValueError(f"Unknown category type: {subtype}")
+
+    dtype = pd.ArrowDtype(
+        pa.struct(
+            [
+                pa.field(left_key, interval_dtype, nullable=True),
+                pa.field(right_key, interval_dtype, nullable=True),
+            ]
+        )
+    )
+
+    if len(pd_s.dtype.categories) == 0:
+        data = [pd.NA] * len(pd_s)
+    else:
+        data = [
+            {left_key: interval.left, right_key: interval.right}  # type: ignore
             if pd.notna(val)
             else pd.NA
-            for val, interval in zip(pd_s, pd_interval)
-        ],
+            for val, interval in zip(pd_s, pd_s.cat.categories[pd_s.cat.codes])  # type: ignore
+        ]
+
+    return pd.Series(
+        data=data,
         name=pd_s.name,
+        dtype=dtype,
+        index=pd_s.index.astype("Int64"),
     )
 
 
 @pytest.mark.parametrize(
-    ("right"),
+    ("right", "labels"),
     [
-        pytest.param(True),
-        pytest.param(False),
+        pytest.param(True, None, id="right_w_none_labels"),
+        pytest.param(True, False, id="right_w_false_labels"),
+        pytest.param(False, None, id="left_w_none_labels"),
+        pytest.param(False, False, id="left_w_false_labels"),
     ],
 )
-def test_cut(scalars_dfs, right):
+def test_cut_by_int_bins(scalars_dfs, labels, right):
     scalars_df, scalars_pandas_df = scalars_dfs
 
-    pd_result = pd.cut(scalars_pandas_df["float64_col"], 5, labels=False, right=right)
-    bf_result = bpd.cut(scalars_df["float64_col"], 5, labels=False, right=right)
+    pd_result = pd.cut(scalars_pandas_df["float64_col"], 5, labels=labels, right=right)
+    bf_result = bpd.cut(scalars_df["float64_col"], 5, labels=labels, right=right)
 
-    # make sure the result is a supported dtype
-    assert bf_result.dtype == bpd.Int64Dtype()
-    pd_result = pd_result.astype("Int64")
+    pd_result = _convert_pandas_category(pd_result)
     pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
 
 
-@pytest.mark.parametrize(
-    ("right"),
-    [
-        pytest.param(True),
-        pytest.param(False),
-    ],
-)
-def test_cut_default_labels(scalars_dfs, right):
+def test_cut_by_int_bins_w_labels(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
 
-    pd_result = pd.cut(scalars_pandas_df["float64_col"], 5, right=right)
-    bf_result = bpd.cut(scalars_df["float64_col"], 5, right=right).to_pandas()
+    labels = ["A", "B", "C", "D", "E"]
+    pd_result = pd.cut(scalars_pandas_df["float64_col"], 5, labels=labels)
+    bf_result = bpd.cut(scalars_df["float64_col"], 5, labels=labels)
 
-    # Convert to match data format
-    pd_result_converted = _convert_pandas_category(pd_result)
-    pd.testing.assert_series_equal(
-        bf_result, pd_result_converted, check_index=False, check_dtype=False
-    )
+    pd_result = _convert_pandas_category(pd_result)
+    pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
 
 
 @pytest.mark.parametrize(
-    ("breaks", "right"),
+    ("breaks", "right", "labels"),
     [
-        pytest.param([0, 5, 10, 15, 20, 100, 1000], True, id="int_right"),
-        pytest.param([0, 5, 10, 15, 20, 100, 1000], False, id="int_left"),
-        pytest.param([0.5, 10.5, 15.5, 20.5, 100.5, 1000.5], False, id="float_left"),
-        pytest.param([0, 5, 10.5, 15.5, 20, 100, 1000.5], True, id="mixed_right"),
+        pytest.param(
+            [0, 5, 10, 15, 20, 100, 1000],
+            True,
+            None,
+            id="int_breaks_w_right_closed_and_none_labels",
+        ),
+        pytest.param(
+            [0, 5, 10, 15, 20, 100, 1000],
+            False,
+            False,
+            id="int_breaks_w_left_closed_and_false_labels",
+        ),
+        pytest.param(
+            [0.5, 10.5, 15.5, 20.5, 100.5, 1000.5],
+            False,
+            None,
+            id="float_breaks_w_left_closed_and_none_labels",
+        ),
+        pytest.param(
+            [0, 5, 10.5, 15.5, 20, 100, 1000.5],
+            True,
+            False,
+            id="mixed_types_breaks_w_right_closed_and_false_labels",
+        ),
     ],
 )
-def test_cut_numeric_breaks(scalars_dfs, breaks, right):
+def test_cut_by_numeric_breaks(scalars_dfs, breaks, right, labels):
     scalars_df, scalars_pandas_df = scalars_dfs
 
-    pd_result = pd.cut(scalars_pandas_df["float64_col"], breaks, right=right)
-    bf_result = bpd.cut(scalars_df["float64_col"], breaks, right=right).to_pandas()
+    pd_result = pd.cut(
+        scalars_pandas_df["float64_col"], breaks, right=right, labels=labels
+    )
+    bf_result = bpd.cut(
+        scalars_df["float64_col"], breaks, right=right, labels=labels
+    ).to_pandas()
 
-    # Convert to match data format
     pd_result_converted = _convert_pandas_category(pd_result)
-
-    pd.testing.assert_series_equal(
-        bf_result, pd_result_converted, check_index=False, check_dtype=False
-    )
+    pd.testing.assert_series_equal(bf_result, pd_result_converted)
 
 
-@pytest.mark.parametrize(
-    "bins",
-    [
-        pytest.param([], id="empty_list"),
-        pytest.param(
-            [1], id="single_int_list", marks=pytest.mark.skip(reason="b/404338651")
-        ),
-        pytest.param(pd.IntervalIndex.from_tuples([]), id="empty_interval_index"),
-    ],
-)
-def test_cut_w_edge_cases(scalars_dfs, bins):
+def test_cut_by_numeric_breaks_w_labels(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = bpd.cut(scalars_df["int64_too"], bins, labels=False).to_pandas()
-    if isinstance(bins, list):
-        bins = pd.IntervalIndex.from_tuples(bins)
-    pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=False)
 
-    # Convert to match data format
-    pd_result_converted = _convert_pandas_category(pd_result)
+    bins = [0, 5, 10, 15, 20]
+    labels = ["A", "B", "C", "D"]
+    pd_result = pd.cut(scalars_pandas_df["float64_col"], bins, labels=labels)
+    bf_result = bpd.cut(scalars_df["float64_col"], bins, labels=labels)
 
-    pd.testing.assert_series_equal(
-        bf_result, pd_result_converted, check_index=False, check_dtype=False
-    )
+    pd_result = _convert_pandas_category(pd_result)
+    pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
 
 
 @pytest.mark.parametrize(
-    ("bins", "right"),
+    ("bins", "right", "labels"),
     [
-        pytest.param([(-5, 2), (2, 3), (-3000, -10)], True, id="tuple_right"),
-        pytest.param([(-5, 2), (2, 3), (-3000, -10)], False, id="tuple_left"),
+        pytest.param(
+            [(-5, 2), (2, 3), (-3000, -10)], True, None, id="tuple_right_w_none_labels"
+        ),
+        pytest.param(
+            [(-5, 2), (2, 3), (-3000, -10)],
+            False,
+            False,
+            id="tuple_left_w_false_labels",
+        ),
         pytest.param(
             pd.IntervalIndex.from_tuples([(1, 2), (2, 3), (4, 5)]),
             True,
-            id="interval_right",
+            False,
+            id="interval_right_w_none_labels",
         ),
         pytest.param(
             pd.IntervalIndex.from_tuples([(1, 2), (2, 3), (4, 5)]),
             False,
-            id="interval_left",
+            None,
+            id="interval_left_w_false_labels",
         ),
     ],
 )
-def test_cut_with_interval(scalars_dfs, bins, right):
+def test_cut_by_interval_bins(scalars_dfs, bins, right, labels):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = bpd.cut(
-        scalars_df["int64_too"], bins, labels=False, right=right
+        scalars_df["int64_too"], bins, labels=labels, right=right
     ).to_pandas()
 
     if isinstance(bins, list):
         bins = pd.IntervalIndex.from_tuples(bins)
-    pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=False, right=right)
+    pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=labels, right=right)
 
-    # Convert to match data format
     pd_result_converted = _convert_pandas_category(pd_result)
+    pd.testing.assert_series_equal(bf_result, pd_result_converted)
 
-    pd.testing.assert_series_equal(
-        bf_result, pd_result_converted, check_index=False, check_dtype=False
-    )
+
+def test_cut_by_interval_bins_w_labels(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bins = pd.IntervalIndex.from_tuples([(1, 2), (2, 3), (4, 5)])
+    labels = ["A", "B", "C", "D", "E"]
+    pd_result = pd.cut(scalars_pandas_df["float64_col"], bins, labels=labels)
+    bf_result = bpd.cut(scalars_df["float64_col"], bins, labels=labels)
+
+    pd_result = _convert_pandas_category(pd_result)
+    pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
+
+
+@pytest.mark.parametrize(
+    ("bins", "labels"),
+    [
+        pytest.param([], None, id="empty_breaks"),
+        pytest.param([1], False, id="single_int_breaks"),
+        pytest.param(pd.IntervalIndex.from_tuples([]), None, id="empty_interval_index"),
+    ],
+)
+def test_cut_by_edge_cases_bins(scalars_dfs, bins, labels):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = bpd.cut(scalars_df["int64_too"], bins, labels=labels).to_pandas()
+    pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=labels)
+
+    pd_result_converted = _convert_pandas_category(pd_result)
+    pd.testing.assert_series_equal(bf_result, pd_result_converted)
+
+
+def test_cut_empty_array_raises_error():
+    bf_df = bpd.Series([])
+    with pytest.raises(ValueError, match="Cannot cut empty array"):
+        bpd.cut(bf_df, bins=5)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/system/small/test_progress_bar.py b/tests/system/small/test_progress_bar.py
index 3139ae5225..9c61c8ea5b 100644
--- a/tests/system/small/test_progress_bar.py
+++ b/tests/system/small/test_progress_bar.py
@@ -55,6 +55,19 @@ def test_progress_bar_scalar(penguins_df_default_index: bf.dataframe.DataFrame,
     with bf.option_context("display.progress_bar", "terminal"):
         penguins_df_default_index["body_mass_g"].head(10).mean()
 
+    assert capsys.readouterr().out == ""
+
+
+def test_progress_bar_scalar_allow_large_results(
+    penguins_df_default_index: bf.dataframe.DataFrame, capsys
+):
+    capsys.readouterr()  # clear output
+
+    with bf.option_context(
+        "display.progress_bar", "terminal", "bigquery.allow_large_results", "True"
+    ):
+        penguins_df_default_index["body_mass_g"].head(10).mean()
+
     assert_loading_msg_exist(capsys.readouterr().out)
 
 
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 5ca055dc43..c63bf8e12b 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -24,7 +24,7 @@
 import pandas as pd
 import pyarrow as pa  # type: ignore
 import pytest
-import shapely  # type: ignore
+import shapely.geometry  # type: ignore
 
 import bigframes.dtypes as dtypes
 import bigframes.features
@@ -34,7 +34,6 @@
     assert_pandas_df_equal,
     assert_series_equal,
     get_first_file_from_wildcard,
-    skip_legacy_pandas,
 )
 
 
@@ -229,7 +228,11 @@ def test_series_construct_from_list_escaped_strings():
 
 def test_series_construct_geodata():
     pd_series = pd.Series(
-        [shapely.Point(1, 1), shapely.Point(2, 2), shapely.Point(3, 3)],
+        [
+            shapely.geometry.Point(1, 1),
+            shapely.geometry.Point(2, 2),
+            shapely.geometry.Point(3, 3),
+        ],
         dtype=gpd.array.GeometryDtype(),
     )
 
@@ -322,24 +325,22 @@ def test_series_construct_local_unordered_has_sequential_index(unordered_session
 
 
 def test_series_construct_w_dtype_for_json():
-    # Until b/401630655 is resolved, json, not compatible with allow_large_results=False
-    with bigframes.option_context("bigquery.allow_large_results", True):
-        data = [
-            "1",
-            '"str"',
-            "false",
-            '["a", {"b": 1}, null]',
-            None,
-            '{"a": {"b": [1, 2, 3], "c": true}}',
-        ]
-        s = bigframes.pandas.Series(data, dtype=dtypes.JSON_DTYPE)
+    data = [
+        "1",
+        '"str"',
+        "false",
+        '["a", {"b": 1}, null]',
+        None,
+        '{"a": {"b": [1, 2, 3], "c": true}}',
+    ]
+    s = bigframes.pandas.Series(data, dtype=dtypes.JSON_DTYPE)
 
-        assert s[0] == "1"
-        assert s[1] == '"str"'
-        assert s[2] == "false"
-        assert s[3] == '["a",{"b":1},null]'
-        assert pd.isna(s[4])
-        assert s[5] == '{"a":{"b":[1,2,3],"c":true}}'
+    assert s[0] == "1"
+    assert s[1] == '"str"'
+    assert s[2] == "false"
+    assert s[3] == '["a",{"b":1},null]'
+    assert pd.isna(s[4])
+    assert s[5] == '{"a":{"b":[1,2,3],"c":true}}'
 
 
 def test_series_keys(scalars_dfs):
@@ -402,8 +403,7 @@ def test_get_column(scalars_dfs, col_name, expected_dtype):
 
 def test_get_column_w_json(json_df, json_pandas_df):
     series = json_df["json_col"]
-    # Until b/401630655 is resolved, json not compatible with allow_large_results=False
-    series_pandas = series.to_pandas(allow_large_results=True)
+    series_pandas = series.to_pandas()
     assert series.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
     assert series_pandas.shape[0] == json_pandas_df.shape[0]
 
@@ -1033,8 +1033,9 @@ def test_series_corr(scalars_dfs):
     assert math.isclose(pd_result, bf_result)
 
 
-@skip_legacy_pandas
 def test_series_autocorr(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = scalars_df["float64_col"].autocorr(2)
     pd_result = scalars_pandas_df["float64_col"].autocorr(2)
@@ -1683,8 +1684,9 @@ def test_binop_right_filtered(scalars_dfs):
         (pd.Series([-1.4, 2.3, None], index=[44, 2, 1]),),
     ],
 )
-@skip_legacy_pandas
 def test_series_binop_w_other_types(scalars_dfs, other):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     bf_result = (scalars_df["int64_col"].head(3) + other).to_pandas()
@@ -1704,8 +1706,9 @@ def test_series_binop_w_other_types(scalars_dfs, other):
         (pd.Series([-1.4, 2.3, None], index=[44, 2, 1]),),
     ],
 )
-@skip_legacy_pandas
 def test_series_reverse_binop_w_other_types(scalars_dfs, other):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     bf_result = (other + scalars_df["int64_col"].head(3)).to_pandas()
@@ -1717,8 +1720,9 @@ def test_series_reverse_binop_w_other_types(scalars_dfs, other):
     )
 
 
-@skip_legacy_pandas
 def test_series_combine_first(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
     int64_col = scalars_df["int64_col"].head(7)
     float64_col = scalars_df["float64_col"].tail(7)
@@ -2361,8 +2365,9 @@ def test_series_peek_filtered(scalars_dfs):
     )
 
 
-@skip_legacy_pandas
 def test_series_peek_force(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     cumsum_df = scalars_df[["int64_col", "int64_too"]].cumsum()
@@ -2376,8 +2381,9 @@ def test_series_peek_force(scalars_dfs):
     )
 
 
-@skip_legacy_pandas
 def test_series_peek_force_float(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     cumsum_df = scalars_df[["int64_col", "float64_col"]].cumsum()
@@ -2594,8 +2600,9 @@ def test_cumsum_nested(scalars_df_index, scalars_pandas_df_index):
     )
 
 
-@skip_legacy_pandas
 def test_nested_analytic_ops_align(scalars_df_index, scalars_pandas_df_index):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     col_name = "float64_col"
     # set non-unique index to check implicit alignment
     bf_series = scalars_df_index.set_index("bool_col")[col_name].fillna(0.0)
@@ -3186,8 +3193,9 @@ def test_series_to_json_local_str(scalars_df_index, scalars_pandas_df_index):
     assert bf_result == pd_result
 
 
-@skip_legacy_pandas
 def test_series_to_json_local_file(scalars_df_index, scalars_pandas_df_index):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
         scalars_df_index.int64_col.to_json(bf_result_file)
         scalars_pandas_df_index.int64_col.to_json(pd_result_file)
@@ -3478,8 +3486,9 @@ def foo(x):
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/conversion_functions
     ],
 )
-@skip_legacy_pandas
 def test_astype(scalars_df_index, scalars_pandas_df_index, column, to_type, errors):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     bf_result = scalars_df_index[column].astype(to_type, errors=errors).to_pandas()
     pd_result = scalars_pandas_df_index[column].astype(to_type)
     pd.testing.assert_series_equal(bf_result, pd_result)
@@ -3513,8 +3522,9 @@ def test_series_astype_error_error(session):
         session.read_pandas(input).astype("Float64", errors="bad_value")
 
 
-@skip_legacy_pandas
 def test_astype_numeric_to_int(scalars_df_index, scalars_pandas_df_index):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     column = "numeric_col"
     to_type = "Int64"
     bf_result = scalars_df_index[column].astype(to_type).to_pandas()
@@ -3531,10 +3541,11 @@ def test_astype_numeric_to_int(scalars_df_index, scalars_pandas_df_index):
         ("time_col", "int64[pyarrow]"),
     ],
 )
-@skip_legacy_pandas
 def test_date_time_astype_int(
     scalars_df_index, scalars_pandas_df_index, column, to_type
 ):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     bf_result = scalars_df_index[column].astype(to_type).to_pandas()
     pd_result = scalars_pandas_df_index[column].astype(to_type)
     pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
@@ -4345,6 +4356,21 @@ def test_series_explode_w_aggregate():
     assert s.explode().sum() == pd_s.explode().sum()
 
 
+def test_series_construct_empty_array():
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    s = bigframes.pandas.Series([[]])
+    expected = pd.Series(
+        [[]],
+        dtype=pd.ArrowDtype(pa.list_(pa.float64())),
+        index=pd.Index([0], dtype=pd.Int64Dtype()),
+    )
+    pd.testing.assert_series_equal(
+        expected,
+        s.to_pandas(),
+    )
+
+
 @pytest.mark.parametrize(
     ("data"),
     [
@@ -4363,7 +4389,6 @@ def test_series_explode_null(data):
     )
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("append", "level", "col", "rule"),
     [
@@ -4374,6 +4399,8 @@ def test_series_explode_null(data):
     ],
 )
 def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df_index = scalars_df_index.set_index(col, append=append)["int64_col"]
     scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)[
         "int64_col"
@@ -4384,13 +4411,13 @@ def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col
 
 
 def test_series_struct_get_field_by_attribute(
-    nested_structs_df, nested_structs_pandas_df, nested_structs_pandas_type
+    nested_structs_df, nested_structs_pandas_df
 ):
     if Version(pd.__version__) < Version("2.2.0"):
         pytest.skip("struct accessor is not supported before pandas 2.2")
 
     bf_series = nested_structs_df["person"]
-    df_series = nested_structs_pandas_df["person"].astype(nested_structs_pandas_type)
+    df_series = nested_structs_pandas_df["person"]
 
     pd.testing.assert_series_equal(
         bf_series.address.city.to_pandas(),
diff --git a/tests/system/small/test_series_io.py b/tests/system/small/test_series_io.py
index ae09a2cf5d..235ae65750 100644
--- a/tests/system/small/test_series_io.py
+++ b/tests/system/small/test_series_io.py
@@ -11,7 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import pandas as pd
+import pytest
+
 import bigframes
+import bigframes.series
 
 
 def test_to_pandas_override_global_option(scalars_df_index):
@@ -19,11 +23,10 @@ def test_to_pandas_override_global_option(scalars_df_index):
 
         bf_series = scalars_df_index["int64_col"]
 
-        # Direct call to_pandas uses global default setting (allow_large_results=True),
-        # table has 'bqdf' prefix.
+        # Direct call to_pandas uses global default setting (allow_large_results=True)
         bf_series.to_pandas()
         table_id = bf_series._query_job.destination.table_id
-        assert table_id.startswith("bqdf")
+        assert table_id is not None
 
         session = bf_series._block.session
         execution_count = session._metrics.execution_count
@@ -33,3 +36,81 @@ def test_to_pandas_override_global_option(scalars_df_index):
         bf_series.to_pandas(allow_large_results=False)
         assert bf_series._query_job.destination.table_id == table_id
         assert session._metrics.execution_count - execution_count == 1
+
+
+@pytest.mark.parametrize(
+    ("kwargs", "message"),
+    [
+        pytest.param(
+            {"sampling_method": "head"},
+            r"DEPRECATED[\S\s]*sampling_method[\S\s]*Series.sample",
+            id="sampling_method",
+        ),
+        pytest.param(
+            {"random_state": 10},
+            r"DEPRECATED[\S\s]*random_state[\S\s]*Series.sample",
+            id="random_state",
+        ),
+        pytest.param(
+            {"max_download_size": 10},
+            r"DEPRECATED[\S\s]*max_download_size[\S\s]*Series.to_pandas_batches",
+            id="max_download_size",
+        ),
+    ],
+)
+def test_to_pandas_warns_deprecated_parameters(scalars_df_index, kwargs, message):
+    s: bigframes.series.Series = scalars_df_index["int64_col"]
+    with pytest.warns(FutureWarning, match=message):
+        s.to_pandas(
+            # limits only apply for allow_large_result=True
+            allow_large_results=True,
+            **kwargs,
+        )
+
+
+@pytest.mark.parametrize(
+    ("page_size", "max_results", "allow_large_results"),
+    [
+        pytest.param(None, None, True),
+        pytest.param(2, None, False),
+        pytest.param(None, 1, True),
+        pytest.param(2, 5, False),
+        pytest.param(3, 6, True),
+        pytest.param(3, 100, False),
+        pytest.param(100, 100, True),
+    ],
+)
+def test_to_pandas_batches(scalars_dfs, page_size, max_results, allow_large_results):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_series = scalars_df["int64_col"]
+    pd_series = scalars_pandas_df["int64_col"]
+
+    total_rows = 0
+    expected_total_rows = (
+        min(max_results, len(pd_series)) if max_results else len(pd_series)
+    )
+
+    hit_last_page = False
+    for s in bf_series.to_pandas_batches(
+        page_size=page_size,
+        max_results=max_results,
+        allow_large_results=allow_large_results,
+    ):
+        assert not hit_last_page
+
+        actual_rows = s.shape[0]
+        expected_rows = (
+            min(page_size, expected_total_rows) if page_size else expected_total_rows
+        )
+
+        assert actual_rows <= expected_rows
+        if actual_rows < expected_rows:
+            assert page_size
+            hit_last_page = True
+
+        pd.testing.assert_series_equal(
+            s, pd_series[total_rows : total_rows + actual_rows]
+        )
+        total_rows += actual_rows
+
+    assert total_rows == expected_total_rows
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index e286c40450..24edc91c93 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -39,6 +39,33 @@
 from tests.system import utils
 
 
+@pytest.fixture(scope="module")
+def df_and_local_csv(scalars_df_index):
+    # The auto detects of BigQuery load job have restrictions to detect the bytes,
+    # datetime, numeric and geometry types, so they're skipped here.
+    drop_columns = ["bytes_col", "datetime_col", "numeric_col", "geography_col"]
+    scalars_df_index = scalars_df_index.drop(columns=drop_columns)
+
+    with tempfile.TemporaryDirectory() as dir:
+        # Prepares local CSV file for reading
+        path = dir + "/write_df_to_local_csv_file.csv"
+        scalars_df_index.to_csv(path, index=True)
+        yield scalars_df_index, path
+
+
+@pytest.fixture(scope="module")
+def df_and_gcs_csv(scalars_df_index, gcs_folder):
+    # The auto detects of BigQuery load job have restrictions to detect the bytes,
+    # datetime, numeric and geometry types, so they're skipped here.
+    drop_columns = ["bytes_col", "datetime_col", "numeric_col", "geography_col"]
+    scalars_df_index = scalars_df_index.drop(columns=drop_columns)
+
+    path = gcs_folder + "test_read_csv_w_write_engine*.csv"
+    read_path = utils.get_first_file_from_wildcard(path)
+    scalars_df_index.to_csv(path, index=True)
+    return scalars_df_index, read_path
+
+
 def test_read_gbq_tokyo(
     session_tokyo: bigframes.Session,
     scalars_table_tokyo: str,
@@ -630,8 +657,7 @@ def test_read_gbq_w_json(session):
                 )
             ),
     """
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    df = session.read_gbq(sql, index_col="id").to_pandas(allow_large_results=True)
+    df = session.read_gbq(sql, index_col="id")
 
     assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
 
@@ -651,17 +677,14 @@ def test_read_gbq_w_json_and_compare_w_pandas_json(session):
     df = session.read_gbq("SELECT JSON_OBJECT('foo', 10, 'bar', TRUE) AS json_col")
     assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    result = df.to_pandas(allow_large_results=True)
-
     # These JSON strings are compatible with BigQuery's JSON storage,
     pd_df = pd.DataFrame(
         {"json_col": ['{"bar":true,"foo":10}']},
         dtype=pd.ArrowDtype(db_dtypes.JSONArrowType()),
     )
     pd_df.index = pd_df.index.astype("Int64")
-    pd.testing.assert_series_equal(result.dtypes, pd_df.dtypes)
-    pd.testing.assert_series_equal(result["json_col"], pd_df["json_col"])
+    pd.testing.assert_series_equal(df.dtypes, pd_df.dtypes)
+    pd.testing.assert_series_equal(df["json_col"].to_pandas(), pd_df["json_col"])
 
 
 def test_read_gbq_w_json_in_struct(session):
@@ -697,9 +720,6 @@ def test_read_gbq_w_json_in_struct(session):
     data = df["struct_col"].struct.field("data")
     assert data.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    data = data.to_pandas(allow_large_results=True)
-
     assert data[0] == '{"boolean":true}'
     assert data[1] == '{"int":100}'
     assert data[2] == '{"float":0.98}'
@@ -738,10 +758,7 @@ def test_read_gbq_w_json_in_array(session):
     assert data.list.len()[0] == 7
     assert data.list[0].dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
 
-    # TODO(b/401630655): JSON is not compatible with allow_large_results=False
-    pd_data = data.to_pandas(allow_large_results=True)
-
-    assert pd_data[0] == [
+    assert data[0] == [
         '{"boolean":true}',
         '{"int":100}',
         '{"float":0.98}',
@@ -853,17 +870,20 @@ def test_read_pandas_tokyo(
     ["default", "bigquery_inline", "bigquery_load", "bigquery_streaming"],
 )
 def test_read_pandas_timedelta_dataframes(session, write_engine):
-    expected_df = pd.DataFrame({"my_col": pd.to_timedelta([1, 2, 3], unit="d")})
-
-    actual_result = (
-        session.read_pandas(expected_df, write_engine=write_engine)
-        .to_pandas()
-        .astype("timedelta64[ns]")
+    pytest.importorskip(
+        "pandas",
+        minversion="2.0.0",
+        reason="old versions don't support local casting to arrow duration",
     )
+    pandas_df = pd.DataFrame({"my_col": pd.to_timedelta([1, 2, 3], unit="d")})
 
-    if write_engine == "bigquery_streaming":
-        expected_df.index = pd.Index([pd.NA] * 3, dtype="Int64")
-    pd.testing.assert_frame_equal(actual_result, expected_df, check_index_type=False)
+    actual_result = session.read_pandas(
+        pandas_df, write_engine=write_engine
+    ).to_pandas()
+    expected_result = pandas_df.astype(bigframes.dtypes.TIMEDELTA_DTYPE)
+    expected_result.index = expected_result.index.astype(bigframes.dtypes.INT_DTYPE)
+
+    pd.testing.assert_frame_equal(actual_result, expected_result)
 
 
 @pytest.mark.parametrize(
@@ -873,15 +893,12 @@ def test_read_pandas_timedelta_dataframes(session, write_engine):
 def test_read_pandas_timedelta_series(session, write_engine):
     expected_series = pd.Series(pd.to_timedelta([1, 2, 3], unit="d"))
 
-    # Until b/401630655 is resolved, json not compatible with allow_large_results=False
     actual_result = (
         session.read_pandas(expected_series, write_engine=write_engine)
         .to_pandas()
         .astype("timedelta64[ns]")
     )
 
-    if write_engine == "bigquery_streaming":
-        expected_series.index = pd.Index([pd.NA] * 3, dtype="Int64")
     pd.testing.assert_series_equal(
         actual_result, expected_series, check_index_type=False
     )
@@ -889,17 +906,16 @@ def test_read_pandas_timedelta_series(session, write_engine):
 
 @pytest.mark.parametrize(
     "write_engine",
-    ["default", "bigquery_inline", "bigquery_load"],
+    ["default", "bigquery_inline", "bigquery_load", "bigquery_streaming"],
 )
 def test_read_pandas_timedelta_index(session, write_engine):
     expected_index = pd.to_timedelta(
         [1, 2, 3], unit="d"
     )  # to_timedelta returns an index
 
-    # Until b/401630655 is resolved, json not compatible with allow_large_results=False
     actual_result = (
         session.read_pandas(expected_index, write_engine=write_engine)
-        .to_pandas(allow_large_results=True)
+        .to_pandas()
         .astype("timedelta64[ns]")
     )
 
@@ -912,7 +928,7 @@ def test_read_pandas_timedelta_index(session, write_engine):
         pytest.param("default"),
         pytest.param("bigquery_load"),
         pytest.param("bigquery_streaming"),
-        pytest.param("bigquery_inline", marks=pytest.mark.xfail(raises=ValueError)),
+        pytest.param("bigquery_inline"),
     ],
 )
 def test_read_pandas_json_dataframes(session, write_engine):
@@ -926,19 +942,21 @@ def test_read_pandas_json_dataframes(session, write_engine):
         {"my_col": pd.Series(json_data, dtype=bigframes.dtypes.JSON_DTYPE)}
     )
 
-    # Until b/401630655 is resolved, json not compatible with allow_large_results=False
     actual_result = session.read_pandas(
         expected_df, write_engine=write_engine
-    ).to_pandas(allow_large_results=True)
+    ).to_pandas()
 
-    if write_engine == "bigquery_streaming":
-        expected_df.index = pd.Index([pd.NA] * 4, dtype="Int64")
     pd.testing.assert_frame_equal(actual_result, expected_df, check_index_type=False)
 
 
 @pytest.mark.parametrize(
-    "write_engine",
-    ["default", "bigquery_load"],
+    ("write_engine"),
+    [
+        pytest.param("default"),
+        pytest.param("bigquery_load"),
+        pytest.param("bigquery_streaming"),
+        pytest.param("bigquery_inline"),
+    ],
 )
 def test_read_pandas_json_series(session, write_engine):
     json_data = [
@@ -949,10 +967,9 @@ def test_read_pandas_json_series(session, write_engine):
     ]
     expected_series = pd.Series(json_data, dtype=bigframes.dtypes.JSON_DTYPE)
 
-    # Until b/401630655 is resolved, json not compatible with allow_large_results=False
     actual_result = session.read_pandas(
         expected_series, write_engine=write_engine
-    ).to_pandas(allow_large_results=True)
+    ).to_pandas()
     pd.testing.assert_series_equal(
         actual_result, expected_series, check_index_type=False
     )
@@ -963,6 +980,8 @@ def test_read_pandas_json_series(session, write_engine):
     [
         pytest.param("default"),
         pytest.param("bigquery_load"),
+        pytest.param("bigquery_streaming"),
+        pytest.param("bigquery_inline", marks=pytest.mark.xfail(raises=ValueError)),
     ],
 )
 def test_read_pandas_json_index(session, write_engine):
@@ -973,21 +992,19 @@ def test_read_pandas_json_index(session, write_engine):
         '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}',
     ]
     expected_index: pd.Index = pd.Index(json_data, dtype=bigframes.dtypes.JSON_DTYPE)
-    # Until b/401630655 is resolved, json not compatible with allow_large_results=False
     actual_result = session.read_pandas(
         expected_index, write_engine=write_engine
-    ).to_pandas(allow_large_results=True)
+    ).to_pandas()
     pd.testing.assert_index_equal(actual_result, expected_index)
 
 
 @pytest.mark.parametrize(
     ("write_engine"),
     [
-        pytest.param("default"),
         pytest.param("bigquery_load"),
     ],
 )
-def test_read_pandas_w_nested_json(session, write_engine):
+def test_read_pandas_w_nested_json_fails(session, write_engine):
     data = [
         [{"json_field": "1"}],
         [{"json_field": None}],
@@ -996,28 +1013,56 @@ def test_read_pandas_w_nested_json(session, write_engine):
     ]
     # PyArrow currently lacks support for creating structs or lists containing extension types.
     # See issue: https://github.com/apache/arrow/issues/45262
-    pa_array = pa.array(data, type=pa.list_(pa.struct([("name", pa.string())])))
+    pa_array = pa.array(data, type=pa.list_(pa.struct([("json_field", pa.string())])))
     pd_s = pd.Series(
         arrays.ArrowExtensionArray(pa_array),  # type: ignore
         dtype=pd.ArrowDtype(
-            pa.list_(pa.struct([("name", bigframes.dtypes.JSON_ARROW_TYPE)]))
+            pa.list_(pa.struct([("json_field", bigframes.dtypes.JSON_ARROW_TYPE)]))
         ),
     )
     with pytest.raises(NotImplementedError, match="Nested JSON types, found in column"):
-        # Until b/401630655 is resolved, json not compatible with allow_large_results=False
-        session.read_pandas(pd_s, write_engine=write_engine).to_pandas(
-            allow_large_results=True
-        )
+        session.read_pandas(pd_s, write_engine=write_engine)
 
 
 @pytest.mark.parametrize(
     ("write_engine"),
     [
         pytest.param("default"),
+        pytest.param("bigquery_inline"),
+        pytest.param("bigquery_streaming"),
+    ],
+)
+def test_read_pandas_w_nested_json(session, write_engine):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    data = [
+        [{"json_field": "1"}],
+        [{"json_field": None}],
+        [{"json_field": '["1","3","5"]'}],
+        [{"json_field": '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'}],
+    ]
+    pa_array = pa.array(data, type=pa.list_(pa.struct([("json_field", pa.string())])))
+    pd_s = pd.Series(
+        arrays.ArrowExtensionArray(pa_array),  # type: ignore
+        dtype=pd.ArrowDtype(
+            pa.list_(pa.struct([("json_field", bigframes.dtypes.JSON_ARROW_TYPE)]))
+        ),
+    )
+    bq_s = (
+        session.read_pandas(pd_s, write_engine=write_engine)
+        .to_pandas()
+        .reset_index(drop=True)
+    )
+    pd.testing.assert_series_equal(bq_s, pd_s)
+
+
+@pytest.mark.parametrize(
+    ("write_engine"),
+    [
         pytest.param("bigquery_load"),
     ],
 )
-def test_read_pandas_w_nested_json_index(session, write_engine):
+def test_read_pandas_w_nested_json_index_fails(session, write_engine):
     data = [
         [{"json_field": "1"}],
         [{"json_field": None}],
@@ -1026,6 +1071,34 @@ def test_read_pandas_w_nested_json_index(session, write_engine):
     ]
     # PyArrow currently lacks support for creating structs or lists containing extension types.
     # See issue: https://github.com/apache/arrow/issues/45262
+    pa_array = pa.array(data, type=pa.list_(pa.struct([("json_field", pa.string())])))
+    pd_idx: pd.Index = pd.Index(
+        arrays.ArrowExtensionArray(pa_array),  # type: ignore
+        dtype=pd.ArrowDtype(
+            pa.list_(pa.struct([("json_field", bigframes.dtypes.JSON_ARROW_TYPE)]))
+        ),
+    )
+    with pytest.raises(NotImplementedError, match="Nested JSON types, found in"):
+        session.read_pandas(pd_idx, write_engine=write_engine)
+
+
+@pytest.mark.parametrize(
+    ("write_engine"),
+    [
+        pytest.param("default"),
+        pytest.param("bigquery_inline"),
+        pytest.param("bigquery_streaming"),
+    ],
+)
+def test_read_pandas_w_nested_json_index(session, write_engine):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    data = [
+        [{"json_field": "1"}],
+        [{"json_field": None}],
+        [{"json_field": '["1","3","5"]'}],
+        [{"json_field": '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'}],
+    ]
     pa_array = pa.array(data, type=pa.list_(pa.struct([("name", pa.string())])))
     pd_idx: pd.Index = pd.Index(
         arrays.ArrowExtensionArray(pa_array),  # type: ignore
@@ -1033,16 +1106,10 @@ def test_read_pandas_w_nested_json_index(session, write_engine):
             pa.list_(pa.struct([("name", bigframes.dtypes.JSON_ARROW_TYPE)]))
         ),
     )
-    with pytest.raises(
-        NotImplementedError, match="Nested JSON types, found in the index"
-    ):
-        # Until b/401630655 is resolved, json not compatible with allow_large_results=False
-        session.read_pandas(pd_idx, write_engine=write_engine).to_pandas(
-            allow_large_results=True
-        )
+    bq_idx = session.read_pandas(pd_idx, write_engine=write_engine).to_pandas()
+    pd.testing.assert_index_equal(bq_idx, pd_idx)
 
 
-@utils.skip_legacy_pandas
 @pytest.mark.parametrize(
     ("write_engine",),
     (
@@ -1052,88 +1119,23 @@ def test_read_pandas_w_nested_json_index(session, write_engine):
         ("bigquery_streaming",),
     ),
 )
-def test_read_csv_gcs_default_engine(session, scalars_dfs, gcs_folder, write_engine):
-    scalars_df, _ = scalars_dfs
-    path = gcs_folder + "test_read_csv_gcs_default_engine_w_index*.csv"
-    read_path = utils.get_first_file_from_wildcard(path)
-    scalars_df.to_csv(path, index=False)
-    dtype = scalars_df.dtypes.to_dict()
-    dtype.pop("geography_col")
-    df = session.read_csv(
-        read_path,
-        # Convert default pandas dtypes to match BigQuery DataFrames dtypes.
-        dtype=dtype,
-        write_engine=write_engine,
-    )
+def test_read_csv_for_gcs_file_w_write_engine(session, df_and_gcs_csv, write_engine):
+    scalars_df, path = df_and_gcs_csv
 
-    # TODO(chelsealin): If we serialize the index, can more easily compare values.
-    pd.testing.assert_index_equal(df.columns, scalars_df.columns)
-
-    # The auto detects of BigQuery load job have restrictions to detect the bytes,
-    # numeric and geometry types, so they're skipped here.
-    df = df.drop(columns=["bytes_col", "numeric_col", "geography_col"])
-    scalars_df = scalars_df.drop(columns=["bytes_col", "numeric_col", "geography_col"])
-    assert df.shape[0] == scalars_df.shape[0]
-    pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes)
-
-
-def test_read_csv_gcs_bq_engine(session, scalars_dfs, gcs_folder):
-    scalars_df, _ = scalars_dfs
-    path = gcs_folder + "test_read_csv_gcs_bq_engine_w_index*.csv"
-    scalars_df.to_csv(path, index=False)
-    df = session.read_csv(
+    # Compares results for pandas and bigframes engines
+    pd_df = session.read_csv(
         path,
-        engine="bigquery",
-        index_col=bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
-    )
-
-    # TODO(chelsealin): If we serialize the index, can more easily compare values.
-    pd.testing.assert_index_equal(df.columns, scalars_df.columns)
-
-    # The auto detects of BigQuery load job have restrictions to detect the bytes,
-    # datetime, numeric and geometry types, so they're skipped here.
-    df = df.drop(columns=["bytes_col", "datetime_col", "numeric_col", "geography_col"])
-    scalars_df = scalars_df.drop(
-        columns=["bytes_col", "datetime_col", "numeric_col", "geography_col"]
+        index_col="rowindex",
+        write_engine=write_engine,
+        dtype=scalars_df.dtypes.to_dict(),
     )
-    assert df.shape[0] == scalars_df.shape[0]
-    pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes)
-
-
-@pytest.mark.parametrize(
-    "sep",
-    [
-        pytest.param(",", id="default_sep"),
-        pytest.param("\t", id="custom_sep"),
-    ],
-)
-@utils.skip_legacy_pandas
-def test_read_csv_local_default_engine(session, scalars_dfs, sep):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    with tempfile.TemporaryDirectory() as dir:
-        path = dir + "/test_read_csv_local_default_engine.csv"
-        # Using the pandas to_csv method because the BQ one does not support local write.
-        scalars_pandas_df.to_csv(path, index=False, sep=sep)
-        dtype = scalars_df.dtypes.to_dict()
-        dtype.pop("geography_col")
-        df = session.read_csv(
-            path,
-            sep=sep,
-            # Convert default pandas dtypes to match BigQuery DataFrames dtypes.
-            dtype=dtype,
-        )
+    pd.testing.assert_frame_equal(pd_df.to_pandas(), scalars_df.to_pandas())
 
-        # TODO(chelsealin): If we serialize the index, can more easily compare values.
-        pd.testing.assert_index_equal(df.columns, scalars_df.columns)
-
-        # The auto detects of BigQuery load job have restrictions to detect the bytes,
-        # numeric and geometry types, so they're skipped here.
-        df = df.drop(columns=["bytes_col", "numeric_col", "geography_col"])
-        scalars_df = scalars_df.drop(
-            columns=["bytes_col", "numeric_col", "geography_col"]
+    if write_engine in ("default", "bigquery_load"):
+        bf_df = session.read_csv(
+            path, engine="bigquery", index_col="rowindex", write_engine=write_engine
         )
-        assert df.shape[0] == scalars_df.shape[0]
-        pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes)
+        pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
 
 
 @pytest.mark.parametrize(
@@ -1143,71 +1145,77 @@ def test_read_csv_local_default_engine(session, scalars_dfs, sep):
         pytest.param("\t", id="custom_sep"),
     ],
 )
-def test_read_csv_local_bq_engine(session, scalars_dfs, sep):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    with tempfile.TemporaryDirectory() as dir:
-        path = dir + "/test_read_csv_local_bq_engine.csv"
-        # Using the pandas to_csv method because the BQ one does not support local write.
-        scalars_pandas_df.to_csv(path, index=False, sep=sep)
-        df = session.read_csv(path, engine="bigquery", sep=sep)
+def test_read_csv_for_local_file_w_sep(session, df_and_local_csv, sep):
+    scalars_df, _ = df_and_local_csv
 
-        # TODO(chelsealin): If we serialize the index, can more easily compare values.
-        pd.testing.assert_index_equal(df.columns, scalars_df.columns)
+    with tempfile.TemporaryDirectory() as dir:
+        # Prepares local CSV file for reading
+        path = dir + "/test_read_csv_for_local_file_w_sep.csv"
+        scalars_df.to_csv(path, index=True, sep=sep)
 
-        # The auto detects of BigQuery load job have restrictions to detect the bytes,
-        # datetime, numeric and geometry types, so they're skipped here.
-        df = df.drop(
-            columns=["bytes_col", "datetime_col", "numeric_col", "geography_col"]
+        # Compares results for pandas and bigframes engines
+        with open(path, "rb") as buffer:
+            bf_df = session.read_csv(
+                buffer, engine="bigquery", index_col="rowindex", sep=sep
+            )
+        with open(path, "rb") as buffer:
+            # Convert default pandas dtypes to match BigQuery DataFrames dtypes.
+            pd_df = session.read_csv(
+                buffer, index_col="rowindex", sep=sep, dtype=scalars_df.dtypes.to_dict()
+            )
+        pd.testing.assert_frame_equal(bf_df.to_pandas(), scalars_df.to_pandas())
+        pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
+
+
+def test_read_csv_w_index_col_false(session, df_and_local_csv):
+    # Compares results for pandas and bigframes engines
+    scalars_df, path = df_and_local_csv
+    with open(path, "rb") as buffer:
+        bf_df = session.read_csv(
+            buffer,
+            engine="bigquery",
+            index_col=False,
         )
-        scalars_df = scalars_df.drop(
-            columns=["bytes_col", "datetime_col", "numeric_col", "geography_col"]
+    with open(path, "rb") as buffer:
+        # Convert default pandas dtypes to match BigQuery DataFrames dtypes.
+        pd_df = session.read_csv(
+            buffer, index_col=False, dtype=scalars_df.dtypes.to_dict()
         )
-        assert df.shape[0] == scalars_df.shape[0]
-        pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes)
 
+    assert bf_df.shape[0] == scalars_df.shape[0]
+    assert bf_df.shape[0] == pd_df.shape[0]
 
-def test_read_csv_localbuffer_bq_engine(session, scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    with tempfile.TemporaryDirectory() as dir:
-        path = dir + "/test_read_csv_local_bq_engine.csv"
-        # Using the pandas to_csv method because the BQ one does not support local write.
-        scalars_pandas_df.to_csv(path, index=False)
-        with open(path, "rb") as buffer:
-            df = session.read_csv(buffer, engine="bigquery")
+    # We use a default index because of index_col=False, so the previous index
+    # column is just loaded as a column.
+    assert len(bf_df.columns) == len(scalars_df.columns) + 1
+    assert len(bf_df.columns) == len(pd_df.columns)
 
-        # TODO(chelsealin): If we serialize the index, can more easily compare values.
-        pd.testing.assert_index_equal(df.columns, scalars_df.columns)
+    # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
+    # (b/280889935) or guarantee row ordering.
+    bf_df = bf_df.set_index("rowindex").sort_index()
+    pd_df = pd_df.set_index("rowindex")
 
-        # The auto detects of BigQuery load job have restrictions to detect the bytes,
-        # datetime, numeric and geometry types, so they're skipped here.
-        df = df.drop(
-            columns=["bytes_col", "datetime_col", "numeric_col", "geography_col"]
-        )
-        scalars_df = scalars_df.drop(
-            columns=["bytes_col", "datetime_col", "numeric_col", "geography_col"]
-        )
-        assert df.shape[0] == scalars_df.shape[0]
-        pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes)
+    pd.testing.assert_frame_equal(bf_df.to_pandas(), scalars_df.to_pandas())
+    pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
 
 
-def test_read_csv_bq_engine_supports_index_col_false(
-    session, scalars_df_index, gcs_folder
-):
-    path = gcs_folder + "test_read_csv_bq_engine_supports_index_col_false*.csv"
-    read_path = utils.get_first_file_from_wildcard(path)
-    scalars_df_index.to_csv(path)
+def test_read_csv_w_index_col_column_label(session, df_and_gcs_csv):
+    scalars_df, path = df_and_gcs_csv
+    bf_df = session.read_csv(path, engine="bigquery", index_col="rowindex")
 
-    df = session.read_csv(
-        read_path,
-        # Normally, pandas uses the first column as the index. index_col=False
-        # turns off that behavior.
-        index_col=False,
+    # Convert default pandas dtypes to match BigQuery DataFrames dtypes.
+    pd_df = session.read_csv(
+        path, index_col="rowindex", dtype=scalars_df.dtypes.to_dict()
     )
-    assert df.shape[0] == scalars_df_index.shape[0]
 
-    # We use a default index because of index_col=False, so the previous index
-    # column is just loaded as a column.
-    assert len(df.columns) == len(scalars_df_index.columns) + 1
+    assert bf_df.shape == scalars_df.shape
+    assert bf_df.shape == pd_df.shape
+
+    assert len(bf_df.columns) == len(scalars_df.columns)
+    assert len(bf_df.columns) == len(pd_df.columns)
+
+    pd.testing.assert_frame_equal(bf_df.to_pandas(), scalars_df.to_pandas())
+    pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
 
 
 @pytest.mark.parametrize(
@@ -1242,155 +1250,62 @@ def test_read_csv_default_engine_throws_not_implemented_error(
         session.read_csv(read_path, **kwargs)
 
 
-def test_read_csv_gcs_default_engine_w_header(session, scalars_df_index, gcs_folder):
-    path = gcs_folder + "test_read_csv_gcs_default_engine_w_header*.csv"
-    read_path = utils.get_first_file_from_wildcard(path)
-    scalars_df_index.to_csv(path)
-
-    # Skips header=N rows, normally considers the N+1th row as the header, but overridden by
-    # passing the `names` argument. In this case, pandas will skip the N+1th row too, take
-    # the column names from `names`, and begin reading data from the N+2th row.
-    df = session.read_csv(
-        read_path,
-        header=2,
-        names=scalars_df_index.columns.to_list(),
+@pytest.mark.parametrize(
+    "header",
+    [0, 1, 5],
+)
+def test_read_csv_for_gcs_file_w_header(session, df_and_gcs_csv, header):
+    # Compares results for pandas and bigframes engines
+    scalars_df, path = df_and_gcs_csv
+    bf_df = session.read_csv(path, engine="bigquery", index_col=False, header=header)
+    pd_df = session.read_csv(
+        path, index_col=False, header=header, dtype=scalars_df.dtypes.to_dict()
     )
-    assert df.shape[0] == scalars_df_index.shape[0] - 2
-    assert len(df.columns) == len(scalars_df_index.columns)
-
-
-def test_read_csv_gcs_bq_engine_w_header(session, scalars_df_index, gcs_folder):
-    path = gcs_folder + "test_read_csv_gcs_bq_engine_w_header*.csv"
-    scalars_df_index.to_csv(path, index=False)
-
-    # Skip the header and the first 2 data rows. Note that one line of header
-    # also got added while writing the csv through `to_csv`, so we would have to
-    # pass headers=3 in the `read_csv` to skip reading the header and two rows.
-    # Without provided schema, the column names would be like `bool_field_0`,
-    # `string_field_1` and etc.
-    df = session.read_csv(path, header=3, engine="bigquery")
-    assert df.shape[0] == scalars_df_index.shape[0] - 2
-    assert len(df.columns) == len(scalars_df_index.columns)
-
-
-def test_read_csv_local_default_engine_w_header(session, scalars_pandas_df_index):
-    with tempfile.TemporaryDirectory() as dir:
-        path = dir + "/test_read_csv_local_default_engine_w_header.csv"
-        # Using the pandas to_csv method because the BQ one does not support local write.
-        scalars_pandas_df_index.to_csv(path, index=False)
-
-        # Skips header=N rows. Normally row N+1 would be the header now, but overridden by
-        # passing the `names` argument. In this case, pandas will skip row N+1 too, infer
-        # the column names from `names`, and begin reading data from row N+2.
-        df = session.read_csv(
-            path,
-            header=2,
-            names=scalars_pandas_df_index.columns.to_list(),
-        )
-        assert df.shape[0] == scalars_pandas_df_index.shape[0] - 2
-        assert len(df.columns) == len(scalars_pandas_df_index.columns)
-
-
-def test_read_csv_local_bq_engine_w_header(session, scalars_pandas_df_index):
-    with tempfile.TemporaryDirectory() as dir:
-        path = dir + "/test_read_csv_local_bq_engine_w_header.csv"
-        # Using the pandas to_csv method because the BQ one does not support local write.
-        scalars_pandas_df_index.to_csv(path, index=False)
-
-        # Skip the header and the first 2 data rows. Note that one line of
-        # header also got added while writing the csv through `to_csv`, so we
-        # would have to pass headers=3 in the `read_csv` to skip reading the
-        # header and two rows. Without provided schema, the column names would
-        # be like `bool_field_0`, `string_field_1` and etc.
-        df = session.read_csv(path, header=3, engine="bigquery")
-        assert df.shape[0] == scalars_pandas_df_index.shape[0] - 2
-        assert len(df.columns) == len(scalars_pandas_df_index.columns)
-
-
-def test_read_csv_gcs_default_engine_w_index_col_name(
-    session, scalars_df_default_index, gcs_folder
-):
-    path = gcs_folder + "test_read_csv_gcs_default_engine_w_index_col_name*.csv"
-    read_path = utils.get_first_file_from_wildcard(path)
-    scalars_df_default_index.to_csv(path)
 
-    df = session.read_csv(read_path, index_col="rowindex")
-    scalars_df_default_index = scalars_df_default_index.set_index(
-        "rowindex"
-    ).sort_index()
-    pd.testing.assert_index_equal(df.columns, scalars_df_default_index.columns)
-    assert df.index.name == "rowindex"
+    # b/408461403: workaround the issue where the slice does not work for DataFrame.
+    expected_df = session.read_pandas(scalars_df.to_pandas()[header:])
 
+    assert pd_df.shape[0] == expected_df.shape[0]
+    assert bf_df.shape[0] == pd_df.shape[0]
 
-def test_read_csv_gcs_default_engine_w_index_col_index(
-    session, scalars_df_default_index, gcs_folder
-):
-    path = gcs_folder + "test_read_csv_gcs_default_engine_w_index_col_index*.csv"
-    read_path = utils.get_first_file_from_wildcard(path)
-    scalars_df_default_index.to_csv(path)
-
-    index_col = scalars_df_default_index.columns.to_list().index("rowindex")
-    df = session.read_csv(read_path, index_col=index_col)
-    scalars_df_default_index = scalars_df_default_index.set_index(
-        "rowindex"
-    ).sort_index()
-    pd.testing.assert_index_equal(df.columns, scalars_df_default_index.columns)
-    assert df.index.name == "rowindex"
-
-
-def test_read_csv_local_default_engine_w_index_col_name(
-    session, scalars_pandas_df_default_index
-):
-    with tempfile.TemporaryDirectory() as dir:
-        path = dir + "/test_read_csv_local_default_engine_w_index_col_name"
-        # Using the pandas to_csv method because the BQ one does not support local write.
-        scalars_pandas_df_default_index.to_csv(path, index=False)
-
-        df = session.read_csv(path, index_col="rowindex")
-        scalars_pandas_df_default_index = scalars_pandas_df_default_index.set_index(
-            "rowindex"
-        ).sort_index()
-        pd.testing.assert_index_equal(
-            df.columns, scalars_pandas_df_default_index.columns
+    # We use a default index because of index_col=False, so the previous index
+    # column is just loaded as a column.
+    assert len(pd_df.columns) == len(expected_df.columns) + 1
+    assert len(bf_df.columns) == len(pd_df.columns)
+
+    # When `header > 0`, pandas and BigFrames may handle column naming differently.
+    # Pandas uses the literal content of the specified header row for column names,
+    # regardless of what it is. BigQuery, however, might generate default names based
+    # on data type (e.g.,bool_field_0,string_field_1, etc.).
+    if header == 0:
+        # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
+        # (b/280889935) or guarantee row ordering.
+        bf_df = bf_df.set_index("rowindex").sort_index()
+        pd_df = pd_df.set_index("rowindex")
+        pd.testing.assert_frame_equal(bf_df.to_pandas(), scalars_df.to_pandas())
+        pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
+
+
+def test_read_csv_w_usecols(session, df_and_local_csv):
+    # Compares results for pandas and bigframes engines
+    scalars_df, path = df_and_local_csv
+    with open(path, "rb") as buffer:
+        bf_df = session.read_csv(
+            buffer,
+            engine="bigquery",
+            usecols=["bool_col"],
         )
-        assert df.index.name == "rowindex"
-
-
-def test_read_csv_local_default_engine_w_index_col_index(
-    session, scalars_pandas_df_default_index
-):
-    with tempfile.TemporaryDirectory() as dir:
-        path = dir + "/test_read_csv_local_default_engine_w_index_col_index"
-        # Using the pandas to_csv method because the BQ one does not support local write.
-        scalars_pandas_df_default_index.to_csv(path, index=False)
-
-        index_col = scalars_pandas_df_default_index.columns.to_list().index("rowindex")
-        df = session.read_csv(path, index_col=index_col)
-        scalars_pandas_df_default_index = scalars_pandas_df_default_index.set_index(
-            "rowindex"
-        ).sort_index()
-        pd.testing.assert_index_equal(
-            df.columns, scalars_pandas_df_default_index.columns
+    with open(path, "rb") as buffer:
+        # Convert default pandas dtypes to match BigQuery DataFrames dtypes.
+        pd_df = session.read_csv(
+            buffer,
+            usecols=["bool_col"],
+            dtype=scalars_df[["bool_col"]].dtypes.to_dict(),
         )
-        assert df.index.name == "rowindex"
-
-
-@pytest.mark.parametrize(
-    "engine",
-    [
-        pytest.param("bigquery", id="bq_engine"),
-        pytest.param(None, id="default_engine"),
-    ],
-)
-def test_read_csv_gcs_w_usecols(session, scalars_df_index, gcs_folder, engine):
-    path = gcs_folder + "test_read_csv_gcs_w_usecols"
-    path = path + "_default_engine*.csv" if engine is None else path + "_bq_engine*.csv"
-    read_path = utils.get_first_file_from_wildcard(path) if engine is None else path
-    scalars_df_index.to_csv(path)
 
-    # df should only have 1 column which is bool_col.
-    df = session.read_csv(read_path, usecols=["bool_col"], engine=engine)
-    assert len(df.columns) == 1
+    # Cannot compare two dataframe due to b/408499371.
+    assert len(bf_df.columns) == 1
+    assert len(pd_df.columns) == 1
 
 
 @pytest.mark.parametrize(
@@ -1424,36 +1339,37 @@ def test_read_csv_local_w_usecols(session, scalars_pandas_df_index, engine):
         pytest.param(None, id="default_engine"),
     ],
 )
-def test_read_csv_others(session, engine):
+def test_read_csv_for_others_files(session, engine):
     uri = "https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/main/tests/data/people.csv"
     df = session.read_csv(uri, engine=engine)
     assert len(df.columns) == 3
 
 
-@pytest.mark.parametrize(
-    "engine",
-    [
-        pytest.param("bigquery", id="bq_engine"),
-        pytest.param(None, id="default_engine"),
-    ],
-)
-def test_read_csv_local_w_encoding(session, penguins_pandas_df_default_index, engine):
+def test_read_csv_local_w_encoding(session, penguins_pandas_df_default_index):
     with tempfile.TemporaryDirectory() as dir:
         path = dir + "/test_read_csv_local_w_encoding.csv"
         # Using the pandas to_csv method because the BQ one does not support local write.
-        penguins_pandas_df_default_index.to_csv(
-            path, index=False, encoding="ISO-8859-1"
-        )
+        penguins_pandas_df_default_index.index.name = "rowindex"
+        penguins_pandas_df_default_index.to_csv(path, index=True, encoding="ISO-8859-1")
 
         # File can only be read using the same character encoding as when written.
-        df = session.read_csv(path, engine=engine, encoding="ISO-8859-1")
-
-        # TODO(chelsealin): If we serialize the index, can more easily compare values.
-        pd.testing.assert_index_equal(
-            df.columns, penguins_pandas_df_default_index.columns
+        pd_df = session.read_csv(
+            path,
+            index_col="rowindex",
+            encoding="ISO-8859-1",
+            dtype=penguins_pandas_df_default_index.dtypes.to_dict(),
         )
 
-        assert df.shape[0] == penguins_pandas_df_default_index.shape[0]
+        bf_df = session.read_csv(
+            path, engine="bigquery", index_col="rowindex", encoding="ISO-8859-1"
+        )
+        # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
+        # (b/280889935) or guarantee row ordering.
+        bf_df = bf_df.sort_index()
+        pd.testing.assert_frame_equal(
+            bf_df.to_pandas(), penguins_pandas_df_default_index
+        )
+        pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
 
 
 def test_read_pickle_local(session, penguins_pandas_df_default_index, tmp_path):
@@ -1706,3 +1622,66 @@ def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder):
 
     assert df.shape[0] == scalars_df.shape[0]
     pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes)
+
+
+def test_read_gbq_test(test_session: bigframes.Session):
+    test_project_id = "bigframes-dev"
+    test_dataset_id = "test_env_only"
+    test_table_id = "one_table"
+    table_id = f"{test_project_id}.{test_dataset_id}.{test_table_id}"
+    actual = test_session.read_gbq(table_id).to_pandas()
+
+    assert actual.shape == (1, 1)
+
+
+@pytest.mark.parametrize(
+    ("query_or_table", "index_col", "columns"),
+    [
+        pytest.param(
+            "{scalars_table_id}",
+            ("int64_col", "string_col", "int64_col"),
+            ("float64_col", "bool_col"),
+            id="table_input_index_col_dup",
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+                reason="ValueError: Duplicate names within 'index_col'.",
+                strict=True,
+            ),
+        ),
+        pytest.param(
+            """SELECT int64_col, string_col, float64_col, bool_col
+               FROM `{scalars_table_id}`""",
+            ("int64_col",),
+            ("string_col", "float64_col", "string_col"),
+            id="query_input_columns_dup",
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+                reason="ValueError: Duplicate names within 'columns'.",
+                strict=True,
+            ),
+        ),
+        pytest.param(
+            "{scalars_table_id}",
+            ("int64_col", "string_col"),
+            ("float64_col", "string_col", "bool_col"),
+            id="table_input_cross_dup",
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+                reason="ValueError: Overlap between 'index_col' and 'columns'.",
+                strict=True,
+            ),
+        ),
+    ],
+)
+def test_read_gbq_duplicate_columns_xfail(
+    session: bigframes.Session,
+    scalars_table_id: str,
+    query_or_table: str,
+    index_col: tuple,
+    columns: tuple,
+):
+    session.read_gbq(
+        query_or_table.format(scalars_table_id=scalars_table_id),
+        index_col=index_col,
+        columns=columns,
+    )
diff --git a/tests/system/small/test_unordered.py b/tests/system/small/test_unordered.py
index f270d1903c..f6a56af7ff 100644
--- a/tests/system/small/test_unordered.py
+++ b/tests/system/small/test_unordered.py
@@ -19,11 +19,7 @@
 
 import bigframes.exceptions
 import bigframes.pandas as bpd
-from tests.system.utils import (
-    assert_pandas_df_equal,
-    assert_series_equal,
-    skip_legacy_pandas,
-)
+from tests.system.utils import assert_pandas_df_equal, assert_series_equal
 
 
 def test_unordered_mode_sql_no_hash(unordered_session):
@@ -77,8 +73,9 @@ def test_unordered_mode_print(unordered_session):
     print(df)
 
 
-@skip_legacy_pandas
 def test_unordered_mode_read_gbq(unordered_session):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     df = unordered_session.read_gbq(
         """SELECT
         [1, 3, 2] AS array_column,
@@ -221,7 +218,6 @@ def test_unordered_mode_no_ambiguity_warning(unordered_session):
         df.groupby("a").head(3)
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     ("rule", "origin", "data"),
     [
@@ -255,6 +251,8 @@ def test_unordered_mode_no_ambiguity_warning(unordered_session):
     ],
 )
 def test__resample_with_index(unordered_session, rule, origin, data):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
     col = "timestamp_col"
     scalars_df_index = bpd.DataFrame(data, session=unordered_session).set_index(col)
     scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
diff --git a/tests/system/small/test_window.py b/tests/system/small/test_window.py
index 68613f1372..b48bb8bc86 100644
--- a/tests/system/small/test_window.py
+++ b/tests/system/small/test_window.py
@@ -12,24 +12,42 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import datetime
+
+import numpy as np
 import pandas as pd
 import pytest
 
+from bigframes import dtypes
+
 
 @pytest.fixture(scope="module")
-def rolling_dfs(scalars_dfs):
+def rows_rolling_dfs(scalars_dfs):
     bf_df, pd_df = scalars_dfs
 
-    target_cols = ["int64_too", "float64_col", "bool_col"]
+    target_cols = ["int64_too", "float64_col", "int64_col"]
+
+    return bf_df[target_cols], pd_df[target_cols]
+
+
+@pytest.fixture(scope="module")
+def range_rolling_dfs(session):
+    values = np.arange(20)
+    pd_df = pd.DataFrame(
+        {
+            "ts_col": pd.Timestamp("20250101", tz="UTC") + pd.to_timedelta(values, "s"),
+            "int_col": values % 4,
+            "float_col": values / 2,
+        }
+    )
 
-    bf_df = bf_df[target_cols].set_index("bool_col")
-    pd_df = pd_df[target_cols].set_index("bool_col")
+    bf_df = session.read_pandas(pd_df)
 
     return bf_df, pd_df
 
 
 @pytest.fixture(scope="module")
-def rolling_series(scalars_dfs):
+def rows_rolling_series(scalars_dfs):
     bf_df, pd_df = scalars_dfs
     target_col = "int64_too"
 
@@ -37,8 +55,8 @@ def rolling_series(scalars_dfs):
 
 
 @pytest.mark.parametrize("closed", ["left", "right", "both", "neither"])
-def test_dataframe_rolling_closed_param(rolling_dfs, closed):
-    bf_df, pd_df = rolling_dfs
+def test_dataframe_rolling_closed_param(rows_rolling_dfs, closed):
+    bf_df, pd_df = rows_rolling_dfs
 
     actual_result = bf_df.rolling(window=3, closed=closed).sum().to_pandas()
 
@@ -47,38 +65,72 @@ def test_dataframe_rolling_closed_param(rolling_dfs, closed):
 
 
 @pytest.mark.parametrize("closed", ["left", "right", "both", "neither"])
-def test_dataframe_groupby_rolling_closed_param(rolling_dfs, closed):
-    bf_df, pd_df = rolling_dfs
+def test_dataframe_groupby_rolling_closed_param(rows_rolling_dfs, closed):
+    bf_df, pd_df = rows_rolling_dfs
+    # Need to specify column subset for comparison due to b/406841327
+    check_columns = ["float64_col", "int64_col"]
 
     actual_result = (
-        bf_df.groupby(level=0).rolling(window=3, closed=closed).sum().to_pandas()
+        bf_df.groupby(bf_df["int64_too"] % 2)
+        .rolling(window=3, closed=closed)
+        .sum()
+        .to_pandas()
     )
 
-    expected_result = pd_df.groupby(level=0).rolling(window=3, closed=closed).sum()
-    pd.testing.assert_frame_equal(actual_result, expected_result, check_dtype=False)
+    expected_result = (
+        pd_df.groupby(pd_df["int64_too"] % 2).rolling(window=3, closed=closed).sum()
+    )
+    pd.testing.assert_frame_equal(
+        actual_result[check_columns], expected_result, check_dtype=False
+    )
 
 
-def test_dataframe_rolling_default_closed_param(rolling_dfs):
-    bf_df, pd_df = rolling_dfs
+def test_dataframe_rolling_on(rows_rolling_dfs):
+    bf_df, pd_df = rows_rolling_dfs
 
-    actual_result = bf_df.rolling(window=3).sum().to_pandas()
+    actual_result = bf_df.rolling(window=3, on="int64_too").sum().to_pandas()
 
-    expected_result = pd_df.rolling(window=3).sum()
+    expected_result = pd_df.rolling(window=3, on="int64_too").sum()
     pd.testing.assert_frame_equal(actual_result, expected_result, check_dtype=False)
 
 
-def test_dataframe_groupby_rolling_default_closed_param(rolling_dfs):
-    bf_df, pd_df = rolling_dfs
+def test_dataframe_rolling_on_invalid_column_raise_error(rows_rolling_dfs):
+    bf_df, _ = rows_rolling_dfs
 
-    actual_result = bf_df.groupby(level=0).rolling(window=3).sum().to_pandas()
+    with pytest.raises(ValueError):
+        bf_df.rolling(window=3, on="whatever").sum()
 
-    expected_result = pd_df.groupby(level=0).rolling(window=3).sum()
-    pd.testing.assert_frame_equal(actual_result, expected_result, check_dtype=False)
+
+def test_dataframe_groupby_rolling_on(rows_rolling_dfs):
+    bf_df, pd_df = rows_rolling_dfs
+    # Need to specify column subset for comparison due to b/406841327
+    check_columns = ["float64_col", "int64_col"]
+
+    actual_result = (
+        bf_df.groupby(bf_df["int64_too"] % 2)
+        .rolling(window=3, on="float64_col")
+        .sum()
+        .to_pandas()
+    )
+
+    expected_result = (
+        pd_df.groupby(pd_df["int64_too"] % 2).rolling(window=3, on="float64_col").sum()
+    )
+    pd.testing.assert_frame_equal(
+        actual_result[check_columns], expected_result, check_dtype=False
+    )
+
+
+def test_dataframe_groupby_rolling_on_invalid_column_raise_error(rows_rolling_dfs):
+    bf_df, _ = rows_rolling_dfs
+
+    with pytest.raises(ValueError):
+        bf_df.groupby(level=0).rolling(window=3, on="whatever").sum()
 
 
 @pytest.mark.parametrize("closed", ["left", "right", "both", "neither"])
-def test_series_rolling_closed_param(rolling_series, closed):
-    bf_series, df_series = rolling_series
+def test_series_rolling_closed_param(rows_rolling_series, closed):
+    bf_series, df_series = rows_rolling_series
 
     actual_result = bf_series.rolling(window=3, closed=closed).sum().to_pandas()
 
@@ -87,8 +139,8 @@ def test_series_rolling_closed_param(rolling_series, closed):
 
 
 @pytest.mark.parametrize("closed", ["left", "right", "both", "neither"])
-def test_series_groupby_rolling_closed_param(rolling_series, closed):
-    bf_series, df_series = rolling_series
+def test_series_groupby_rolling_closed_param(rows_rolling_series, closed):
+    bf_series, df_series = rows_rolling_series
 
     actual_result = (
         bf_series.groupby(bf_series % 2)
@@ -103,24 +155,6 @@ def test_series_groupby_rolling_closed_param(rolling_series, closed):
     pd.testing.assert_series_equal(actual_result, expected_result, check_dtype=False)
 
 
-def test_series_rolling_default_closed_param(rolling_series):
-    bf_series, df_series = rolling_series
-
-    actual_result = bf_series.rolling(window=3).sum().to_pandas()
-
-    expected_result = df_series.rolling(window=3).sum()
-    pd.testing.assert_series_equal(actual_result, expected_result, check_dtype=False)
-
-
-def test_series_groupby_rolling_default_closed_param(rolling_series):
-    bf_series, df_series = rolling_series
-
-    actual_result = bf_series.groupby(bf_series % 2).rolling(window=3).sum().to_pandas()
-
-    expected_result = df_series.groupby(df_series % 2).rolling(window=3).sum()
-    pd.testing.assert_series_equal(actual_result, expected_result, check_dtype=False)
-
-
 @pytest.mark.parametrize(
     ("windowing"),
     [
@@ -146,8 +180,8 @@ def test_series_groupby_rolling_default_closed_param(rolling_series):
         pytest.param(lambda x: x.var(), id="var"),
     ],
 )
-def test_series_window_agg_ops(rolling_series, windowing, agg_op):
-    bf_series, pd_series = rolling_series
+def test_series_window_agg_ops(rows_rolling_series, windowing, agg_op):
+    bf_series, pd_series = rows_rolling_series
 
     actual_result = agg_op(windowing(bf_series)).to_pandas()
 
@@ -181,10 +215,183 @@ def test_series_window_agg_ops(rolling_series, windowing, agg_op):
         pytest.param(lambda x: x.var(), id="var"),
     ],
 )
-def test_dataframe_window_agg_ops(rolling_dfs, windowing, agg_op):
-    bf_df, pd_df = rolling_dfs
+def test_dataframe_window_agg_ops(scalars_dfs, windowing, agg_op):
+    bf_df, pd_df = scalars_dfs
+    target_columns = ["int64_too", "float64_col", "bool_col"]
+    index_column = "bool_col"
+    bf_df = bf_df[target_columns].set_index(index_column)
+    pd_df = pd_df[target_columns].set_index(index_column)
 
     bf_result = agg_op(windowing(bf_df)).to_pandas()
 
     pd_result = agg_op(windowing(pd_df))
     pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
+
+
+@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"])
+@pytest.mark.parametrize(
+    "window",  # skipped numpy timedelta because Pandas does not support it.
+    [pd.Timedelta("3s"), datetime.timedelta(seconds=3), "3s"],
+)
+@pytest.mark.parametrize("ascending", [True, False])
+def test_series_range_rolling(range_rolling_dfs, window, closed, ascending):
+    bf_df, pd_df = range_rolling_dfs
+    bf_series = bf_df.set_index("ts_col")["int_col"]
+    pd_series = pd_df.set_index("ts_col")["int_col"]
+
+    actual_result = (
+        bf_series.sort_index(ascending=ascending)
+        .rolling(window=window, closed=closed)
+        .min()
+        .to_pandas()
+    )
+
+    expected_result = (
+        pd_series.sort_index(ascending=ascending)
+        .rolling(window=window, closed=closed)
+        .min()
+    )
+    pd.testing.assert_series_equal(
+        actual_result, expected_result, check_dtype=False, check_index=False
+    )
+
+
+def test_series_groupby_range_rolling(range_rolling_dfs):
+    bf_df, pd_df = range_rolling_dfs
+    bf_series = bf_df.set_index("ts_col")["int_col"]
+    pd_series = pd_df.set_index("ts_col")["int_col"]
+
+    actual_result = (
+        bf_series.sort_index()
+        .groupby(bf_series % 2 == 0)
+        .rolling(window="3s")
+        .min()
+        .to_pandas()
+    )
+
+    expected_result = (
+        pd_series.sort_index().groupby(pd_series % 2 == 0).rolling(window="3s").min()
+    )
+    pd.testing.assert_series_equal(
+        actual_result, expected_result, check_dtype=False, check_index=False
+    )
+
+
+@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"])
+@pytest.mark.parametrize(
+    "window",  # skipped numpy timedelta because Pandas does not support it.
+    [pd.Timedelta("3s"), datetime.timedelta(seconds=3), "3s"],
+)
+@pytest.mark.parametrize("ascending", [True, False])
+def test_dataframe_range_rolling(range_rolling_dfs, window, closed, ascending):
+    bf_df, pd_df = range_rolling_dfs
+    bf_df = bf_df.set_index("ts_col")
+    pd_df = pd_df.set_index("ts_col")
+
+    actual_result = (
+        bf_df.sort_index(ascending=ascending)
+        .rolling(window=window, closed=closed)
+        .min()
+        .to_pandas()
+    )
+
+    expected_result = (
+        pd_df.sort_index(ascending=ascending)
+        .rolling(window=window, closed=closed)
+        .min()
+    )
+    # Need to cast Pandas index type. Otherwise it uses DatetimeIndex that
+    # does not exist in BigFrame
+    expected_result.index = expected_result.index.astype(dtypes.TIMESTAMP_DTYPE)
+    pd.testing.assert_frame_equal(
+        actual_result,
+        expected_result,
+        check_dtype=False,
+    )
+
+
+def test_dataframe_range_rolling_on(range_rolling_dfs):
+    bf_df, pd_df = range_rolling_dfs
+    on = "ts_col"
+
+    actual_result = bf_df.sort_values(on).rolling(window="3s", on=on).min().to_pandas()
+
+    expected_result = pd_df.sort_values(on).rolling(window="3s", on=on).min()
+    # Need to specify the column order because Pandas (seemingly)
+    # re-arranges columns alphabetically
+    cols = ["ts_col", "int_col", "float_col"]
+    pd.testing.assert_frame_equal(
+        actual_result[cols],
+        expected_result[cols],
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+def test_dataframe_groupby_range_rolling(range_rolling_dfs):
+    bf_df, pd_df = range_rolling_dfs
+    on = "ts_col"
+
+    actual_result = (
+        bf_df.sort_values(on)
+        .groupby("int_col")
+        .rolling(window="3s", on=on)
+        .min()
+        .to_pandas()
+    )
+
+    expected_result = (
+        pd_df.sort_values(on).groupby("int_col").rolling(window="3s", on=on).min()
+    )
+    expected_result.index = expected_result.index.set_names("index", level=1)
+    pd.testing.assert_frame_equal(
+        actual_result,
+        expected_result,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+def test_range_rolling_order_info_lookup(range_rolling_dfs):
+    bf_df, pd_df = range_rolling_dfs
+
+    actual_result = (
+        bf_df.set_index("ts_col")
+        .sort_index(ascending=False)["int_col"]
+        .isin(bf_df["int_col"])
+        .rolling(window="3s")
+        .count()
+        .to_pandas()
+    )
+
+    expected_result = (
+        pd_df.set_index("ts_col")
+        .sort_index(ascending=False)["int_col"]
+        .isin(pd_df["int_col"])
+        .rolling(window="3s")
+        .count()
+    )
+    pd.testing.assert_series_equal(
+        actual_result, expected_result, check_dtype=False, check_index=False
+    )
+
+
+def test_range_rolling_unsupported_index_type_raise_error(range_rolling_dfs):
+    bf_df, _ = range_rolling_dfs
+
+    with pytest.raises(ValueError):
+        bf_df["int_col"].sort_index().rolling(window="3s")
+
+
+def test_range_rolling_unsorted_index_raise_error(range_rolling_dfs):
+    bf_df, _ = range_rolling_dfs
+
+    with pytest.raises(ValueError):
+        bf_df.set_index("ts_col")["int_col"].rolling(window="3s")
+
+
+def test_range_rolling_unsorted_column_raise_error(range_rolling_dfs):
+    bf_df, _ = range_rolling_dfs
+
+    with pytest.raises(ValueError):
+        bf_df.rolling(window="3s", on="ts_col")
diff --git a/tests/system/utils.py b/tests/system/utils.py
index 891d813935..ecf9ae00f8 100644
--- a/tests/system/utils.py
+++ b/tests/system/utils.py
@@ -14,7 +14,6 @@
 
 import base64
 import decimal
-import functools
 from typing import Iterable, Optional, Set, Union
 
 import geopandas as gpd  # type: ignore
@@ -66,16 +65,6 @@
 ]
 
 
-def skip_legacy_pandas(test):
-    @functools.wraps(test)
-    def wrapper(*args, **kwds):
-        if pd.__version__.startswith("1."):
-            pytest.skip("Skips pandas 1.x as not compatible with 2.x behavior.")
-        return test(*args, **kwds)
-
-    return wrapper
-
-
 # Prefer this function for tests that run in both ordered and unordered mode
 def assert_dfs_equivalent(
     pd_df: pd.DataFrame, bf_df: bigframes.pandas.DataFrame, **kwargs
diff --git a/tests/unit/_config/test_bigquery_options.py b/tests/unit/_config/test_bigquery_options.py
index 98a74d4e4c..b8f3a612d4 100644
--- a/tests/unit/_config/test_bigquery_options.py
+++ b/tests/unit/_config/test_bigquery_options.py
@@ -183,3 +183,10 @@ def test_client_endpoints_override_set_shows_warning():
 
     with pytest.warns(UserWarning):
         options.client_endpoints_override = {"bqclient": "endpoint_address"}
+
+
+def test_default_options():
+    options = bigquery_options.BigQueryOptions()
+
+    assert options.allow_large_results is False
+    assert options.ordering_mode == "strict"
diff --git a/tests/unit/_tools/__init__.py b/tests/unit/_tools/__init__.py
new file mode 100644
index 0000000000..378d15c4be
--- /dev/null
+++ b/tests/unit/_tools/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for helper methods for processing Python objects with minimal dependencies.
+
+Please keep the dependencies used in this subpackage to a minimum to avoid the
+risk of circular dependencies.
+"""
diff --git a/tests/unit/_tools/test_strings.py b/tests/unit/_tools/test_strings.py
new file mode 100644
index 0000000000..9c83df2556
--- /dev/null
+++ b/tests/unit/_tools/test_strings.py
@@ -0,0 +1,149 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for helper methods for processing strings with minimal dependencies.
+
+Please keep the dependencies used in this subpackage to a minimum to avoid the
+risk of circular dependencies.
+"""
+
+import base64
+import random
+import sys
+import uuid
+
+import pytest
+
+from bigframes._tools import strings
+
+# To stress test some unicode comparisons.
+# https://stackoverflow.com/a/39682429/101923
+ALL_UNICODE_CHARS = "".join(chr(i) for i in range(32, 0x110000) if chr(i).isprintable())
+RANDOM_STRINGS = (
+    pytest.param(str(uuid.uuid4()), id="uuid4"),
+    pytest.param(hex(random.randint(0, sys.maxsize)), id="hex"),
+    pytest.param(
+        base64.b64encode(
+            "".join(random.choice(ALL_UNICODE_CHARS) for _ in range(100)).encode(
+                "utf-8"
+            )
+        ).decode("utf-8"),
+        id="base64",
+    ),
+    pytest.param(
+        "".join(random.choice(ALL_UNICODE_CHARS) for _ in range(8)), id="unicode8"
+    ),
+    pytest.param(
+        "".join(random.choice(ALL_UNICODE_CHARS) for _ in range(64)), id="unicode64"
+    ),
+)
+
+
+def random_char_not_equal(avoid: str):
+    random_char = avoid
+    while random_char == avoid:
+        random_char = random.choice(ALL_UNICODE_CHARS)
+    return random_char
+
+
+def random_deletion(original: str):
+    """original string with one character removed"""
+    char_index = random.randrange(len(original))
+    return original[:char_index] + original[char_index + 1 :]
+
+
+def random_insertion(original: str):
+    char_index = random.randrange(len(original))
+    random_char = random.choice(ALL_UNICODE_CHARS)
+    return original[: char_index + 1] + random_char + original[char_index + 1 :]
+
+
+@pytest.mark.parametrize(
+    ("left", "right", "expected"),
+    (
+        ("", "", 0),
+        ("abc", "abc", 0),
+        # Deletions
+        ("abcxyz", "abc", 3),
+        ("xyzabc", "abc", 3),
+        ("AXYZBC", "ABC", 3),
+        ("AXYZBC", "XYZ", 3),
+        # Insertions
+        ("abc", "abcxyz", 3),
+        ("abc", "xyzabc", 3),
+        # Substitutions
+        ("abc", "aBc", 1),
+        ("abcxyz", "aBcXyZ", 3),
+        # Combinations
+        ("abcdefxyz", "abcExyzα", 4),
+    ),
+)
+def test_levenshtein_distance(left: str, right: str, expected: int):
+    assert strings.levenshtein_distance(left, right) == expected
+
+
+@pytest.mark.parametrize(("random_string",), RANDOM_STRINGS)
+def test_levenshtein_distance_equal_strings(random_string: str):
+    """Mini fuzz test with different strings."""
+    assert strings.levenshtein_distance(random_string, random_string) == 0
+
+
+@pytest.mark.parametrize(("random_string",), RANDOM_STRINGS)
+def test_levenshtein_distance_random_deletion(random_string: str):
+    """Mini fuzz test with different strings."""
+
+    num_deleted = random.randrange(1, min(10, len(random_string)))
+    assert 1 <= num_deleted < len(random_string)
+
+    deleted = random_string
+    for _ in range(num_deleted):
+        deleted = random_deletion(deleted)
+
+    assert deleted != random_string
+    assert len(deleted) == len(random_string) - num_deleted
+    assert strings.levenshtein_distance(random_string, deleted) == num_deleted
+
+
+@pytest.mark.parametrize(("random_string",), RANDOM_STRINGS)
+def test_levenshtein_distance_random_insertion(random_string: str):
+    """Mini fuzz test with different strings."""
+
+    num_inserted = random.randrange(1, min(10, len(random_string)))
+    assert 1 <= num_inserted < len(random_string)
+
+    inserted = random_string
+    for _ in range(num_inserted):
+        inserted = random_insertion(inserted)
+
+    assert inserted != random_string
+    assert len(inserted) == len(random_string) + num_inserted
+    assert strings.levenshtein_distance(random_string, inserted) == num_inserted
+
+
+@pytest.mark.parametrize(("random_string",), RANDOM_STRINGS)
+def test_levenshtein_distance_random_substitution(random_string: str):
+    """Mini fuzz test with different strings.
+
+    Note: we don't do multiple substitutions here to avoid accidentally
+    substituting the same character twice.
+    """
+    char_index = random.randrange(len(random_string))
+    replaced_char = random_string[char_index]
+    random_char = random_char_not_equal(replaced_char)
+    substituted = (
+        random_string[:char_index] + random_char + random_string[char_index + 1 :]
+    )
+    assert substituted != random_string
+    assert len(substituted) == len(random_string)
+    assert strings.levenshtein_distance(random_string, substituted) == 1
diff --git a/tests/unit/core/compile/sqlglot/__init__.py b/tests/unit/core/compile/sqlglot/__init__.py
new file mode 100644
index 0000000000..0a2669d7a2
--- /dev/null
+++ b/tests/unit/core/compile/sqlglot/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/unit/core/compile/sqlglot/test_sqlglot_types.py b/tests/unit/core/compile/sqlglot/test_sqlglot_types.py
new file mode 100644
index 0000000000..a9108e5daf
--- /dev/null
+++ b/tests/unit/core/compile/sqlglot/test_sqlglot_types.py
@@ -0,0 +1,64 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pandas as pd
+import pyarrow as pa
+
+import bigframes.core.compile.sqlglot.sqlglot_types as sgt
+import bigframes.dtypes as dtypes
+
+
+def test_from_bigframes_simple_dtypes():
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.INT_DTYPE) == "INT64"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.FLOAT_DTYPE) == "FLOAT64"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.STRING_DTYPE) == "STRING"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.BOOL_DTYPE) == "BOOLEAN"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.DATE_DTYPE) == "DATE"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.TIME_DTYPE) == "TIME"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.DATETIME_DTYPE) == "DATETIME"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.TIMESTAMP_DTYPE) == "TIMESTAMP"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.BYTES_DTYPE) == "BYTES"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.NUMERIC_DTYPE) == "NUMERIC"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.BIGNUMERIC_DTYPE) == "BIGNUMERIC"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.JSON_DTYPE) == "JSON"
+    assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.GEO_DTYPE) == "GEOGRAPHY"
+
+
+def test_from_bigframes_struct_dtypes():
+    fields = [pa.field("int_col", pa.int64()), pa.field("bool_col", pa.bool_())]
+    struct_type = pd.ArrowDtype(pa.struct(fields))
+    expected = "STRUCT<int_col INT64, bool_col BOOLEAN>"
+    assert sgt.SQLGlotType.from_bigframes_dtype(struct_type) == expected
+
+
+def test_from_bigframes_array_dtypes():
+    int_array_type = pd.ArrowDtype(pa.list_(pa.int64()))
+    assert sgt.SQLGlotType.from_bigframes_dtype(int_array_type) == "ARRAY<INT64>"
+
+    string_array_type = pd.ArrowDtype(pa.list_(pa.string()))
+    assert sgt.SQLGlotType.from_bigframes_dtype(string_array_type) == "ARRAY<STRING>"
+
+
+def test_from_bigframes_multi_nested_dtypes():
+    fields = [
+        pa.field("string_col", pa.string()),
+        pa.field("date_col", pa.date32()),
+        pa.field("array_col", pa.list_(pa.timestamp("us"))),
+    ]
+    array_type = pd.ArrowDtype(pa.list_(pa.struct(fields)))
+
+    expected = (
+        "ARRAY<STRUCT<string_col STRING, date_col DATE, array_col ARRAY<DATETIME>>>"
+    )
+    assert sgt.SQLGlotType.from_bigframes_dtype(array_type) == expected
diff --git a/tests/unit/core/test_blocks.py b/tests/unit/core/test_blocks.py
index fb5a927e76..b1b276bda3 100644
--- a/tests/unit/core/test_blocks.py
+++ b/tests/unit/core/test_blocks.py
@@ -20,7 +20,7 @@
 
 import bigframes
 import bigframes.core.blocks as blocks
-import bigframes.session.executor
+import bigframes.session.bq_caching_executor
 
 
 @pytest.mark.parametrize(
@@ -80,7 +80,7 @@ def test_block_from_local(data):
     expected = pandas.DataFrame(data)
     mock_session = mock.create_autospec(spec=bigframes.Session)
     mock_executor = mock.create_autospec(
-        spec=bigframes.session.executor.BigQueryCachingExecutor
+        spec=bigframes.session.bq_caching_executor.BigQueryCachingExecutor
     )
 
     # hard-coded the returned dimension of the session for that each of the test case contains 3 rows.
diff --git a/tests/unit/core/test_log_adapter.py b/tests/unit/core/test_log_adapter.py
index 6bc9c91f3a..811c64a27b 100644
--- a/tests/unit/core/test_log_adapter.py
+++ b/tests/unit/core/test_log_adapter.py
@@ -40,8 +40,15 @@ def method1(self):
             pass
 
         def method2(self):
+            self.method3()
+
+        def method3(self):
             pass
 
+        @property
+        def my_field(self):
+            return 0
+
     return TestClass()
 
 
@@ -51,9 +58,49 @@ def test_method_logging(test_instance):
 
     # Check if the methods were added to the _api_methods list
     api_methods = log_adapter.get_and_reset_api_methods()
-    assert api_methods is not None
     assert "testclass-method1" in api_methods
     assert "testclass-method2" in api_methods
+    assert "testclass-method3" not in api_methods
+
+
+def test_property_logging(test_instance):
+    test_instance.my_field
+
+    # Check if the properties were added to the _api_methods list
+    api_methods = log_adapter.get_and_reset_api_methods()
+    assert "testclass-my_field" in api_methods
+
+
+def test_method_logging__include_internal_calls():
+    @log_adapter.class_logger(include_internal_calls=True)
+    class TestClass:
+        def public_method(self):
+            self._internal_method()
+
+        def _internal_method(self):
+            pass
+
+    TestClass().public_method()
+
+    api_methods = log_adapter.get_and_reset_api_methods()
+    assert "testclass-public_method" in api_methods
+    assert "testclass-_internal_method" in api_methods
+
+
+def test_method_logging__exclude_internal_calls():
+    @log_adapter.class_logger(include_internal_calls=False)
+    class TestClass:
+        def public_method(self):
+            self._internal_method()
+
+        def _internal_method(self):
+            pass
+
+    TestClass().public_method()
+
+    api_methods = log_adapter.get_and_reset_api_methods()
+    assert "testclass-public_method" in api_methods
+    assert "testclass-_internal_method" not in api_methods
 
 
 def test_add_api_method_limit(test_instance):
diff --git a/tests/unit/core/test_sql.py b/tests/unit/core/test_sql.py
index ca286cafff..913a5b61fe 100644
--- a/tests/unit/core/test_sql.py
+++ b/tests/unit/core/test_sql.py
@@ -14,15 +14,16 @@
 
 import datetime
 import decimal
+import re
 
 import pytest
-import shapely  # type: ignore
+import shapely.geometry  # type: ignore
 
 from bigframes.core import sql
 
 
 @pytest.mark.parametrize(
-    ("value", "expected"),
+    ("value", "expected_pattern"),
     (
         # Try to have some literals for each scalar data type:
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
@@ -32,44 +33,44 @@
         (False, "False"),
         (
             b"\x01\x02\x03ABC",
-            r"b'\x01\x02\x03ABC'",
+            re.escape(r"b'\x01\x02\x03ABC'"),
         ),
         (
             datetime.date(2025, 1, 1),
-            "DATE('2025-01-01')",
+            re.escape("DATE('2025-01-01')"),
         ),
         (
             datetime.datetime(2025, 1, 2, 3, 45, 6, 789123),
-            "DATETIME('2025-01-02T03:45:06.789123')",
+            re.escape("DATETIME('2025-01-02T03:45:06.789123')"),
         ),
         (
-            shapely.Point(0, 1),
-            "ST_GEOGFROMTEXT('POINT (0 1)')",
+            shapely.geometry.Point(0, 1),
+            r"ST_GEOGFROMTEXT\('POINT \(0[.]?0* 1[.]?0*\)'\)",
         ),
         # TODO: INTERVAL type (e.g. from dateutil.relativedelta)
         # TODO: JSON type (TBD what Python object that would correspond to)
-        (123, "123"),
-        (decimal.Decimal("123.75"), "CAST('123.75' AS NUMERIC)"),
+        (123, re.escape("123")),
+        (decimal.Decimal("123.75"), re.escape("CAST('123.75' AS NUMERIC)")),
         # TODO: support BIGNUMERIC by looking at precision/scale of the DECIMAL
-        (123.75, "123.75"),
+        (123.75, re.escape("123.75")),
         # TODO: support RANGE type
-        ("abc", "'abc'"),
+        ("abc", re.escape("'abc'")),
         # TODO: support STRUCT type (possibly another method?)
         (
             datetime.time(12, 34, 56, 789123),
-            "TIME(DATETIME('1970-01-01 12:34:56.789123'))",
+            re.escape("TIME(DATETIME('1970-01-01 12:34:56.789123'))"),
         ),
         (
             datetime.datetime(
                 2025, 1, 2, 3, 45, 6, 789123, tzinfo=datetime.timezone.utc
             ),
-            "TIMESTAMP('2025-01-02T03:45:06.789123+00:00')",
+            re.escape("TIMESTAMP('2025-01-02T03:45:06.789123+00:00')"),
         ),
     ),
 )
-def test_simple_literal(value, expected):
+def test_simple_literal(value, expected_pattern):
     got = sql.simple_literal(value)
-    assert got == expected
+    assert re.match(expected_pattern, got) is not None
 
 
 def test_create_vector_search_sql_simple():
diff --git a/tests/unit/functions/test_remote_function.py b/tests/unit/functions/test_remote_function.py
index d377fb4d49..259a4390bc 100644
--- a/tests/unit/functions/test_remote_function.py
+++ b/tests/unit/functions/test_remote_function.py
@@ -19,11 +19,10 @@
 import pandas
 import pytest
 
-import bigframes.core.compile.ibis_types
 import bigframes.dtypes
 import bigframes.functions.function as bff
 import bigframes.series
-from tests.unit import resources
+from bigframes.testing import mocks
 
 
 @pytest.mark.parametrize(
@@ -41,8 +40,10 @@
 )
 def test_series_input_types_to_str(series_type):
     """Check that is_row_processor=True uses str as the input type to serialize a row."""
-    session = resources.create_bigquery_session()
-    remote_function_decorator = bff.remote_function(session=session)
+    session = mocks.create_bigquery_session()
+    remote_function_decorator = bff.remote_function(
+        session=session, cloud_function_service_account="default"
+    )
 
     with pytest.warns(
         bigframes.exceptions.PreviewWarning,
@@ -78,8 +79,10 @@ def test_supported_types_correspond():
 
 
 def test_missing_input_types():
-    session = resources.create_bigquery_session()
-    remote_function_decorator = bff.remote_function(session=session)
+    session = mocks.create_bigquery_session()
+    remote_function_decorator = bff.remote_function(
+        session=session, cloud_function_service_account="default"
+    )
 
     def function_without_parameter_annotations(myparam) -> str:
         return str(myparam)
@@ -94,8 +97,10 @@ def function_without_parameter_annotations(myparam) -> str:
 
 
 def test_missing_output_type():
-    session = resources.create_bigquery_session()
-    remote_function_decorator = bff.remote_function(session=session)
+    session = mocks.create_bigquery_session()
+    remote_function_decorator = bff.remote_function(
+        session=session, cloud_function_service_account="default"
+    )
 
     def function_without_return_annotation(myparam: int):
         return str(myparam)
diff --git a/tests/unit/ml/test_compose.py b/tests/unit/ml/test_compose.py
index 450ce8d6ee..86cbb111f4 100644
--- a/tests/unit/ml/test_compose.py
+++ b/tests/unit/ml/test_compose.py
@@ -281,7 +281,7 @@ def test_customtransformer_compile_sql(mock_X):
     ]
 
 
-def create_bq_model_mock(mocker, transform_columns, feature_columns=None):
+def create_bq_model_mock(monkeypatch, transform_columns, feature_columns=None):
     properties = {"transformColumns": transform_columns}
     mock_bq_model = bigquery.Model("model_project.model_dataset.model_id")
     type(mock_bq_model)._properties = mock.PropertyMock(return_value=properties)
@@ -289,18 +289,19 @@ def create_bq_model_mock(mocker, transform_columns, feature_columns=None):
         result = [
             bigquery.standard_sql.StandardSqlField(col, None) for col in feature_columns
         ]
-        mocker.patch(
-            "google.cloud.bigquery.model.Model.feature_columns",
-            new_callable=mock.PropertyMock(return_value=result),
+        monkeypatch.setattr(
+            type(mock_bq_model),
+            "feature_columns",
+            mock.PropertyMock(return_value=result),
         )
 
     return mock_bq_model
 
 
 @pytest.fixture
-def bq_model_good(mocker):
+def bq_model_good(monkeypatch):
     return create_bq_model_mock(
-        mocker,
+        monkeypatch,
         [
             {
                 "name": "ident_culmen_length_mm",
@@ -337,9 +338,9 @@ def bq_model_good(mocker):
 
 
 @pytest.fixture
-def bq_model_merge(mocker):
+def bq_model_merge(monkeypatch):
     return create_bq_model_mock(
-        mocker,
+        monkeypatch,
         [
             {
                 "name": "labelencoded_county",
@@ -357,9 +358,9 @@ def bq_model_merge(mocker):
 
 
 @pytest.fixture
-def bq_model_no_merge(mocker):
+def bq_model_no_merge(monkeypatch):
     return create_bq_model_mock(
-        mocker,
+        monkeypatch,
         [
             {
                 "name": "ident_culmen_length_mm",
@@ -372,9 +373,9 @@ def bq_model_no_merge(mocker):
 
 
 @pytest.fixture
-def bq_model_unknown_ML(mocker):
+def bq_model_unknown_ML(monkeypatch):
     return create_bq_model_mock(
-        mocker,
+        monkeypatch,
         [
             {
                 "name": "unknownml_culmen_length_mm",
@@ -391,9 +392,9 @@ def bq_model_unknown_ML(mocker):
 
 
 @pytest.fixture
-def bq_model_flexnames(mocker):
+def bq_model_flexnames(monkeypatch):
     return create_bq_model_mock(
-        mocker,
+        monkeypatch,
         [
             {
                 "name": "Flex Name culmen_length_mm",
diff --git a/tests/unit/ml/test_golden_sql.py b/tests/unit/ml/test_golden_sql.py
index c9d147e18f..62cfe09704 100644
--- a/tests/unit/ml/test_golden_sql.py
+++ b/tests/unit/ml/test_golden_sql.py
@@ -17,10 +17,10 @@
 from google.cloud import bigquery
 import pandas as pd
 import pytest
-import pytest_mock
 
 import bigframes
-from bigframes.ml import core, linear_model
+from bigframes.ml import core, decomposition, linear_model
+import bigframes.ml.core
 import bigframes.pandas as bpd
 
 TEMP_MODEL_ID = bigquery.ModelReference.from_string(
@@ -50,10 +50,11 @@ def mock_session():
 
 
 @pytest.fixture
-def bqml_model_factory(mocker: pytest_mock.MockerFixture):
-    mocker.patch(
-        "bigframes.ml.core.BqmlModelFactory._create_model_ref",
-        return_value=TEMP_MODEL_ID,
+def bqml_model_factory(monkeypatch):
+    monkeypatch.setattr(
+        bigframes.ml.core.BqmlModelFactory,
+        "_create_model_ref",
+        mock.Mock(return_value=TEMP_MODEL_ID),
     )
     bqml_model_factory = core.BqmlModelFactory()
 
@@ -80,6 +81,7 @@ def mock_X(mock_y, mock_session):
         ["index_column_id"],
         ["index_column_label"],
     )
+    mock_X.reset_index(drop=True).cache().sql = "input_X_no_index_sql"
     mock_X.join(mock_y).sql = "input_X_y_sql"
     mock_X.join(mock_y).cache.return_value = mock_X.join(mock_y)
     mock_X.join(mock_y)._to_sql_query.return_value = (
@@ -209,3 +211,55 @@ def test_logistic_regression_score(mock_session, bqml_model, mock_X, mock_y):
     mock_session.read_gbq.assert_called_once_with(
         "SELECT * FROM ML.EVALUATE(MODEL `model_project`.`model_dataset`.`model_id`,\n  (input_X_y_sql))"
     )
+
+
+def test_decomposition_mf_default_fit(bqml_model_factory, mock_session, mock_X):
+    model = decomposition.MatrixFactorization(
+        num_factors=34,
+        feedback_type="explicit",
+        user_col="user_id",
+        item_col="item_col",
+        rating_col="rating_col",
+        l2_reg=9.83,
+    )
+    model._bqml_model_factory = bqml_model_factory
+    model.fit(mock_X)
+
+    mock_session._start_query_ml_ddl.assert_called_once_with(
+        "CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n  model_type='matrix_factorization',\n  feedback_type='explicit',\n  user_col='user_id',\n  item_col='item_col',\n  rating_col='rating_col',\n  l2_reg=9.83,\n  num_factors=34)\nAS input_X_no_index_sql"
+    )
+
+
+def test_decomposition_mf_predict(mock_session, bqml_model, mock_X):
+    model = decomposition.MatrixFactorization(
+        num_factors=34,
+        feedback_type="explicit",
+        user_col="user_id",
+        item_col="item_col",
+        rating_col="rating_col",
+        l2_reg=9.83,
+    )
+    model._bqml_model = bqml_model
+    model.predict(mock_X)
+
+    mock_session.read_gbq.assert_called_once_with(
+        "SELECT * FROM ML.RECOMMEND(MODEL `model_project`.`model_dataset`.`model_id`,\n  (input_X_sql))",
+        index_col=["index_column_id"],
+    )
+
+
+def test_decomposition_mf_score(mock_session, bqml_model, mock_X):
+    model = decomposition.MatrixFactorization(
+        num_factors=34,
+        feedback_type="explicit",
+        user_col="user_id",
+        item_col="item_col",
+        rating_col="rating_col",
+        l2_reg=9.83,
+    )
+    model._bqml_model = bqml_model
+    model.score(mock_X)
+
+    mock_session.read_gbq.assert_called_once_with(
+        "SELECT * FROM ML.EVALUATE(MODEL `model_project`.`model_dataset`.`model_id`)"
+    )
diff --git a/tests/unit/ml/test_matrix_factorization.py b/tests/unit/ml/test_matrix_factorization.py
new file mode 100644
index 0000000000..92691ba9d4
--- /dev/null
+++ b/tests/unit/ml/test_matrix_factorization.py
@@ -0,0 +1,182 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pytest
+
+from bigframes.ml import decomposition
+
+
+def test_decomposition_mf_model():
+    model = decomposition.MatrixFactorization(
+        num_factors=16,
+        feedback_type="implicit",
+        user_col="user_id",
+        item_col="item_col",
+        rating_col="rating_col",
+        l2_reg=9,
+    )
+    assert model.num_factors == 16
+    assert model.feedback_type == "implicit"
+    assert model.user_col == "user_id"
+    assert model.item_col == "item_col"
+    assert model.rating_col == "rating_col"
+
+
+def test_decomposition_mf_feedback_type_explicit():
+    model = decomposition.MatrixFactorization(
+        num_factors=16,
+        feedback_type="explicit",
+        user_col="user_id",
+        item_col="item_col",
+        rating_col="rating_col",
+        l2_reg=9.83,
+    )
+    assert model.feedback_type == "explicit"
+
+
+def test_decomposition_mf_invalid_feedback_type_raises():
+    feedback_type = "explimp"
+    with pytest.raises(
+        ValueError,
+        match="Expected feedback_type to be `explicit` or `implicit`.",
+    ):
+        decomposition.MatrixFactorization(
+            # Intentionally pass in the wrong type. This will fail if the user is using
+            # a type checker, but we can't assume that everyone is doing so, especially
+            # not in notebook environments.
+            num_factors=16,
+            feedback_type=feedback_type,  # type: ignore
+            user_col="user_id",
+            item_col="item_col",
+            rating_col="rating_col",
+            l2_reg=9.83,
+        )
+
+
+def test_decomposition_mf_num_factors_low():
+    model = decomposition.MatrixFactorization(
+        num_factors=0,
+        feedback_type="explicit",
+        user_col="user_id",
+        item_col="item_col",
+        rating_col="rating_col",
+        l2_reg=9.83,
+    )
+    assert model.num_factors == 0
+
+
+def test_decomposition_mf_negative_num_factors_raises():
+    num_factors = -2
+    with pytest.raises(
+        ValueError,
+        match=f"Expected num_factors to be a positive integer, but got {num_factors}.",
+    ):
+        decomposition.MatrixFactorization(
+            num_factors=num_factors,  # type: ignore
+            feedback_type="explicit",
+            user_col="user_id",
+            item_col="item_col",
+            rating_col="rating_col",
+            l2_reg=9.83,
+        )
+
+
+def test_decomposition_mf_invalid_num_factors_raises():
+    num_factors = 0.5
+    with pytest.raises(
+        TypeError,
+        match=f"Expected num_factors to be an int, but got {type(num_factors)}.",
+    ):
+        decomposition.MatrixFactorization(
+            num_factors=num_factors,  # type: ignore
+            feedback_type="explicit",
+            user_col="user_id",
+            item_col="item_col",
+            rating_col="rating_col",
+            l2_reg=9.83,
+        )
+
+
+def test_decomposition_mf_invalid_user_col_raises():
+    user_col = 123
+    with pytest.raises(
+        TypeError, match=f"Expected user_col to be a str, but got {type(user_col)}."
+    ):
+        decomposition.MatrixFactorization(
+            num_factors=16,
+            feedback_type="explicit",
+            user_col=user_col,  # type: ignore
+            item_col="item_col",
+            rating_col="rating_col",
+            l2_reg=9.83,
+        )
+
+
+def test_decomposition_mf_invalid_item_col_raises():
+    item_col = 123
+    with pytest.raises(
+        TypeError, match=f"Expected item_col to be STR, but got {type(item_col)}."
+    ):
+        decomposition.MatrixFactorization(
+            num_factors=16,
+            feedback_type="explicit",
+            user_col="user_id",
+            item_col=item_col,  # type: ignore
+            rating_col="rating_col",
+            l2_reg=9.83,
+        )
+
+
+def test_decomposition_mf_invalid_rating_col_raises():
+    rating_col = 4
+    with pytest.raises(
+        TypeError, match=f"Expected rating_col to be a str, but got {type(rating_col)}."
+    ):
+        decomposition.MatrixFactorization(
+            num_factors=16,
+            feedback_type="explicit",
+            user_col="user_id",
+            item_col="item_col",
+            rating_col=rating_col,  # type: ignore
+            l2_reg=9.83,
+        )
+
+
+def test_decomposition_mf_l2_reg():
+    model = decomposition.MatrixFactorization(
+        num_factors=16,
+        feedback_type="explicit",
+        user_col="user_id",
+        item_col="item_col",
+        rating_col="rating_col",
+        l2_reg=6.02,  # type: ignore
+    )
+    assert model.l2_reg == 6.02
+
+
+def test_decomposition_mf_invalid_l2_reg_raises():
+    l2_reg = "6.02"
+    with pytest.raises(
+        TypeError,
+        match=f"Expected l2_reg to be a float or int, but got {type(l2_reg)}.",
+    ):
+        decomposition.MatrixFactorization(
+            num_factors=16,
+            feedback_type="explicit",
+            user_col="user_id",
+            item_col="item_col",
+            rating_col="rating_col",
+            l2_reg=l2_reg,  # type: ignore
+        )
diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py
index fa05fffcb2..af2c7714ab 100644
--- a/tests/unit/session/test_io_bigquery.py
+++ b/tests/unit/session/test_io_bigquery.py
@@ -15,6 +15,7 @@
 import datetime
 import re
 from typing import Iterable
+from unittest import mock
 
 import google.cloud.bigquery as bigquery
 import pytest
@@ -23,14 +24,14 @@
 from bigframes.core import log_adapter
 import bigframes.pandas as bpd
 import bigframes.session._io.bigquery as io_bq
-from tests.unit import resources
+from bigframes.testing import mocks
 
 
 @pytest.fixture(scope="function")
-def mock_bq_client(mocker):
-    mock_client = mocker.Mock(spec=bigquery.Client)
-    mock_query_job = mocker.Mock(spec=bigquery.QueryJob)
-    mock_row_iterator = mocker.Mock(spec=bigquery.table.RowIterator)
+def mock_bq_client():
+    mock_client = mock.create_autospec(bigquery.Client)
+    mock_query_job = mock.create_autospec(bigquery.QueryJob)
+    mock_row_iterator = mock.create_autospec(bigquery.table.RowIterator)
 
     mock_query_job.result.return_value = mock_row_iterator
 
@@ -97,7 +98,7 @@ def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit():
         "source": "bigquery-dataframes-temp",
     }
     df = bpd.DataFrame(
-        {"col1": [1, 2], "col2": [3, 4]}, session=resources.create_bigquery_session()
+        {"col1": [1, 2], "col2": [3, 4]}, session=mocks.create_bigquery_session()
     )
     # Test running two methods
     df.head()
@@ -121,7 +122,7 @@ def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit():
 def test_create_job_configs_labels_length_limit_met_and_labels_is_none():
     log_adapter.get_and_reset_api_methods()
     df = bpd.DataFrame(
-        {"col1": [1, 2], "col2": [3, 4]}, session=resources.create_bigquery_session()
+        {"col1": [1, 2], "col2": [3, 4]}, session=mocks.create_bigquery_session()
     )
     # Test running methods more than the labels' length limit
     for i in range(100):
@@ -148,7 +149,7 @@ def test_create_job_configs_labels_length_limit_met():
         cur_labels[key] = value
     # If cur_labels length is 62, we can only add one label from api_methods
     df = bpd.DataFrame(
-        {"col1": [1, 2], "col2": [3, 4]}, session=resources.create_bigquery_session()
+        {"col1": [1, 2], "col2": [3, 4]}, session=mocks.create_bigquery_session()
     )
     # Test running two methods
     df.head()
@@ -178,7 +179,7 @@ def test_add_and_trim_labels_length_limit_met():
         cur_labels[key] = value
 
     df = bpd.DataFrame(
-        {"col1": [1, 2], "col2": [3, 4]}, session=resources.create_bigquery_session()
+        {"col1": [1, 2], "col2": [3, 4]}, session=mocks.create_bigquery_session()
     )
 
     job_config = bigquery.job.QueryJobConfig()
@@ -215,7 +216,7 @@ def test_start_query_with_client_labels_length_limit_met(
         cur_labels[key] = value
 
     df = bpd.DataFrame(
-        {"col1": [1, 2], "col2": [3, 4]}, session=resources.create_bigquery_session()
+        {"col1": [1, 2], "col2": [3, 4]}, session=mocks.create_bigquery_session()
     )
 
     job_config = bigquery.job.QueryJobConfig()
@@ -248,7 +249,7 @@ def test_create_temp_table_default_expiration():
         2023, 11, 2, 13, 44, 55, 678901, datetime.timezone.utc
     )
 
-    session = resources.create_bigquery_session()
+    session = mocks.create_bigquery_session()
     table_ref = bigquery.TableReference.from_string(
         "test-project.test_dataset.bqdf_new_random_table"
     )
diff --git a/tests/unit/session/test_io_pandas.py b/tests/unit/session/test_io_pandas.py
index 2fa07aed35..224f343c7e 100644
--- a/tests/unit/session/test_io_pandas.py
+++ b/tests/unit/session/test_io_pandas.py
@@ -29,8 +29,7 @@
 import bigframes.features
 import bigframes.pandas
 import bigframes.session._io.pandas
-
-from .. import resources
+from bigframes.testing import mocks
 
 _LIST_OF_SCALARS = [
     [1, 2, 3],
@@ -496,7 +495,7 @@ def test_arrow_to_pandas_wrong_size_dtypes(
 
 
 def test_read_pandas_with_bigframes_dataframe():
-    session = resources.create_bigquery_session()
+    session = mocks.create_bigquery_session()
     df = mock.create_autospec(bigframes.pandas.DataFrame, instance=True)
 
     with pytest.raises(
diff --git a/tests/unit/session/test_read_gbq_table.py b/tests/unit/session/test_read_gbq_table.py
index 8f01820fd3..a56b4ed7ab 100644
--- a/tests/unit/session/test_read_gbq_table.py
+++ b/tests/unit/session/test_read_gbq_table.py
@@ -20,8 +20,7 @@
 import pytest
 
 import bigframes.session._io.bigquery.read_gbq_table as bf_read_gbq_table
-
-from .. import resources
+from bigframes.testing import mocks
 
 
 @pytest.mark.parametrize(
@@ -87,7 +86,7 @@ def test_infer_unique_columns(index_cols, primary_keys, values_distinct, expecte
     bqclient.query_and_wait.return_value = (
         {"total_count": 3, "distinct_count": 3 if values_distinct else 2},
     )
-    session = resources.create_bigquery_session(
+    session = mocks.create_bigquery_session(
         bqclient=bqclient, table_schema=table.schema
     )
     table._properties["location"] = session._location
diff --git a/tests/unit/session/test_session.py b/tests/unit/session/test_session.py
index b35449f291..490ffc4108 100644
--- a/tests/unit/session/test_session.py
+++ b/tests/unit/session/test_session.py
@@ -21,16 +21,14 @@
 
 import google.api_core.exceptions
 import google.cloud.bigquery
-import google.cloud.bigquery.table
 import pandas as pd
-import pyarrow as pa
 import pytest
 
 import bigframes
 from bigframes import version
 import bigframes.enums
 import bigframes.exceptions
-from tests.unit import resources
+from bigframes.testing import mocks
 
 TABLE_REFERENCE = {
     "projectId": "my-project",
@@ -137,8 +135,8 @@
         ),
     ],
 )
-def test_read_csv_bq_engine_throws_not_implemented_error(kwargs, match):
-    session = resources.create_bigquery_session()
+def test_read_csv_w_bq_engine_raises_error(kwargs, match):
+    session = mocks.create_bigquery_session()
 
     with pytest.raises(NotImplementedError, match=match):
         session.read_csv("", **kwargs)
@@ -150,10 +148,11 @@ def test_read_csv_bq_engine_throws_not_implemented_error(kwargs, match):
         ("c",),
         ("python",),
         ("pyarrow",),
+        ("python-fwf",),
     ),
 )
-def test_read_csv_pandas_engines_index_col_sequential_int64_not_supported(engine):
-    session = resources.create_bigquery_session()
+def test_read_csv_w_pandas_engines_raises_error_for_sequential_int64_index_col(engine):
+    session = mocks.create_bigquery_session()
 
     with pytest.raises(NotImplementedError, match="index_col"):
         session.read_csv(
@@ -163,6 +162,22 @@ def test_read_csv_pandas_engines_index_col_sequential_int64_not_supported(engine
         )
 
 
+@pytest.mark.parametrize(
+    ("kwargs"),
+    [
+        pytest.param({"chunksize": 5}, id="with_chunksize"),
+        pytest.param({"iterator": True}, id="with_iterator"),
+    ],
+)
+def test_read_csv_w_pandas_engines_raises_error_for_unsupported_args(kwargs):
+    session = mocks.create_bigquery_session()
+    with pytest.raises(
+        NotImplementedError,
+        match="'chunksize' and 'iterator' arguments are not supported.",
+    ):
+        session.read_csv("path/to/csv.csv", **kwargs)
+
+
 @pytest.mark.parametrize(
     ("engine", "write_engine"),
     (
@@ -178,7 +193,7 @@ def test_read_csv_pandas_engines_index_col_sequential_int64_not_supported(engine
     ),
 )
 def test_read_csv_with_incompatible_write_engine(engine, write_engine):
-    session = resources.create_bigquery_session()
+    session = mocks.create_bigquery_session()
 
     with pytest.raises(
         NotImplementedError,
@@ -195,14 +210,14 @@ def test_read_csv_with_incompatible_write_engine(engine, write_engine):
 
 @pytest.mark.parametrize("missing_parts_table_id", [(""), ("table")])
 def test_read_gbq_missing_parts(missing_parts_table_id):
-    session = resources.create_bigquery_session()
+    session = mocks.create_bigquery_session()
 
     with pytest.raises(ValueError):
         session.read_gbq(missing_parts_table_id)
 
 
 def test_read_gbq_cached_table():
-    session = resources.create_bigquery_session()
+    session = mocks.create_bigquery_session()
     table_ref = google.cloud.bigquery.TableReference(
         google.cloud.bigquery.DatasetReference("my-project", "my_dataset"),
         "my_table",
@@ -245,7 +260,7 @@ def test_default_index_warning_raised_by_read_gbq(table):
     bqclient.project = "test-project"
     bqclient.get_table.return_value = table
     bqclient.query_and_wait.return_value = ({"total_count": 3, "distinct_count": 2},)
-    session = resources.create_bigquery_session(bqclient=bqclient)
+    session = mocks.create_bigquery_session(bqclient=bqclient)
     table._properties["location"] = session._location
 
     with pytest.warns(bigframes.exceptions.DefaultIndexWarning):
@@ -268,7 +283,7 @@ def test_default_index_warning_not_raised_by_read_gbq_index_col_sequential_int64
     bqclient.project = "test-project"
     bqclient.get_table.return_value = table
     bqclient.query_and_wait.return_value = ({"total_count": 4, "distinct_count": 3},)
-    session = resources.create_bigquery_session(bqclient=bqclient)
+    session = mocks.create_bigquery_session(bqclient=bqclient)
     table._properties["location"] = session._location
 
     # No warnings raised because we set the option allowing the default indexes.
@@ -315,7 +330,7 @@ def test_default_index_warning_not_raised_by_read_gbq_index_col_columns(
     bqclient.query_and_wait.return_value = (
         {"total_count": total_count, "distinct_count": distinct_count},
     )
-    session = resources.create_bigquery_session(
+    session = mocks.create_bigquery_session(
         bqclient=bqclient, table_schema=table.schema
     )
     table._properties["location"] = session._location
@@ -357,7 +372,7 @@ def test_default_index_warning_not_raised_by_read_gbq_primary_key(table):
     bqclient = mock.create_autospec(google.cloud.bigquery.Client, instance=True)
     bqclient.project = "test-project"
     bqclient.get_table.return_value = table
-    session = resources.create_bigquery_session(
+    session = mocks.create_bigquery_session(
         bqclient=bqclient, table_schema=table.schema
     )
     table._properties["location"] = session._location
@@ -382,7 +397,7 @@ def test_read_gbq_not_found_tables(not_found_table_id):
     bqclient.get_table.side_effect = google.api_core.exceptions.NotFound(
         "table not found"
     )
-    session = resources.create_bigquery_session(bqclient=bqclient)
+    session = mocks.create_bigquery_session(bqclient=bqclient)
 
     with pytest.raises(google.api_core.exceptions.NotFound):
         session.read_gbq(not_found_table_id)
@@ -404,7 +419,7 @@ def test_read_gbq_not_found_tables(not_found_table_id):
     ],
 )
 def test_read_gbq_external_table_no_drive_access(api_name, query_or_table):
-    session = resources.create_bigquery_session()
+    session = mocks.create_bigquery_session()
     session_query_mock = session.bqclient.query
 
     def query_mock(query, *args, **kwargs):
@@ -459,12 +474,12 @@ def today(cls):
     monkeypatch.setattr(datetime, "datetime", FakeDatetime)
 
     with pytest.warns(bigframes.exceptions.ObsoleteVersionWarning):
-        resources.create_bigquery_session()
+        mocks.create_bigquery_session()
 
 
 @mock.patch("bigframes.session.MAX_INLINE_DF_BYTES", 1)
 def test_read_pandas_inline_exceeds_limit_raises_error():
-    session = resources.create_bigquery_session()
+    session = mocks.create_bigquery_session()
     pd_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
     with pytest.raises(
         ValueError,
@@ -474,20 +489,7 @@ def test_read_pandas_inline_exceeds_limit_raises_error():
 
 
 def test_read_pandas_inline_w_interval_type_raises_error():
-    session = resources.create_bigquery_session()
+    session = mocks.create_bigquery_session()
     df = pd.DataFrame(pd.arrays.IntervalArray.from_breaks([0, 10, 20, 30, 40, 50]))
-    with pytest.raises(ValueError, match="Could not convert with a BigQuery type: "):
+    with pytest.raises(TypeError):
         session.read_pandas(df, write_engine="bigquery_inline")
-
-
-def test_read_pandas_inline_w_noninlineable_type_raises_error():
-    session = resources.create_bigquery_session()
-    data = [
-        [1, 2, 3],
-        [4, 5],
-        None,
-        [6, 7, 8, 9],
-    ]
-    s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
-    with pytest.raises(ValueError, match="Could not inline with a BigQuery type:"):
-        session.read_pandas(s, write_engine="bigquery_inline")
diff --git a/tests/unit/session/test_time.py b/tests/unit/session/test_time.py
index 87766e79bb..39a231c3ce 100644
--- a/tests/unit/session/test_time.py
+++ b/tests/unit/session/test_time.py
@@ -15,7 +15,6 @@
 import datetime
 import unittest.mock as mock
 
-import freezegun
 import google.cloud.bigquery
 import pytest
 
@@ -47,6 +46,8 @@ def query_and_wait_mock(query, *args, **kwargs):
 
 
 def test_bqsyncedclock_get_time(bq_client):
+    freezegun = pytest.importorskip("freezegun")
+
     # this initial local time is actually irrelevant, only the ticks matter
     initial_local_datetime = datetime.datetime(
         year=1, month=7, day=12, hour=15, minute=6, second=3
diff --git a/tests/unit/test_clients.py b/tests/unit/test_clients.py
index 37450ececb..032512c26e 100644
--- a/tests/unit/test_clients.py
+++ b/tests/unit/test_clients.py
@@ -17,33 +17,51 @@
 from bigframes import clients
 
 
-def test_get_connection_name_full_connection_id():
-    connection_name = clients.resolve_full_bq_connection_name(
+def test_get_canonical_bq_connection_id_connection_id_only():
+    connection_id = clients.get_canonical_bq_connection_id(
         "connection-id", default_project="default-project", default_location="us"
     )
-    assert connection_name == "default-project.us.connection-id"
+    assert connection_id == "default-project.us.connection-id"
 
 
-def test_get_connection_name_full_location_connection_id():
-    connection_name = clients.resolve_full_bq_connection_name(
+def test_get_canonical_bq_connection_id_location_and_connection_id():
+    connection_id = clients.get_canonical_bq_connection_id(
         "eu.connection-id", default_project="default-project", default_location="us"
     )
-    assert connection_name == "default-project.eu.connection-id"
+    assert connection_id == "default-project.eu.connection-id"
 
 
-def test_get_connection_name_full_all():
-    connection_name = clients.resolve_full_bq_connection_name(
+def test_get_canonical_bq_connection_id_already_canonical():
+    connection_id = clients.get_canonical_bq_connection_id(
         "my-project.eu.connection-id",
         default_project="default-project",
         default_location="us",
     )
-    assert connection_name == "my-project.eu.connection-id"
+    assert connection_id == "my-project.eu.connection-id"
 
 
-def test_get_connection_name_full_raise_value_error():
-    with pytest.raises(ValueError):
-        clients.resolve_full_bq_connection_name(
+def test_get_canonical_bq_connection_id_invalid():
+    with pytest.raises(ValueError, match="Invalid connection id format"):
+        clients.get_canonical_bq_connection_id(
             "my-project.eu.connection-id.extra_field",
             default_project="default-project",
             default_location="us",
         )
+
+
+def test_get_canonical_bq_connection_id_valid_path():
+    connection_id = clients.get_canonical_bq_connection_id(
+        "projects/project_id/locations/northamerica-northeast1/connections/connection-id",
+        default_project="default-project",
+        default_location="us",
+    )
+    assert connection_id == "project_id.northamerica-northeast1.connection-id"
+
+
+def test_get_canonical_bq_connection_id_invalid_path():
+    with pytest.raises(ValueError, match="Invalid connection id format"):
+        clients.get_canonical_bq_connection_id(
+            "/projects/project_id/locations/northamerica-northeast1/connections/connection-id",
+            default_project="default-project",
+            default_location="us",
+        )
diff --git a/tests/unit/test_daemon.py b/tests/unit/test_daemon.py
new file mode 100644
index 0000000000..6b3acd7d7d
--- /dev/null
+++ b/tests/unit/test_daemon.py
@@ -0,0 +1,42 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import time
+from unittest.mock import MagicMock
+
+from bigframes.session.bigquery_session import RecurringTaskDaemon
+
+
+def test_recurring_task_daemon_calls():
+    mock_task = MagicMock()
+    daemon = RecurringTaskDaemon(
+        task=mock_task, frequency=datetime.timedelta(seconds=0.1)
+    )
+    daemon.start()
+    time.sleep(1.0)
+    daemon.stop()
+    time.sleep(0.5)
+    # be lenient, but number of calls should be in this ballpark regardless of scheduling hiccups
+    assert mock_task.call_count > 6
+    assert mock_task.call_count < 12
+
+
+def test_recurring_task_daemon_never_started():
+    mock_task = MagicMock()
+    _ = RecurringTaskDaemon(
+        task=mock_task, frequency=datetime.timedelta(seconds=0.0001)
+    )
+    time.sleep(0.1)
+    assert mock_task.call_count == 0
diff --git a/tests/unit/test_dataframe.py b/tests/unit/test_dataframe.py
index 8d1e77510a..9d67fd33b7 100644
--- a/tests/unit/test_dataframe.py
+++ b/tests/unit/test_dataframe.py
@@ -16,14 +16,13 @@
 import pytest
 
 import bigframes.dataframe
-
-from . import resources
+from bigframes.testing import mocks
 
 
 def test_dataframe_dropna_axis_1_subset_not_implememented(
     monkeypatch: pytest.MonkeyPatch,
 ):
-    dataframe = resources.create_dataframe(monkeypatch)
+    dataframe = mocks.create_dataframe(monkeypatch)
 
     with pytest.raises(NotImplementedError, match="subset"):
         dataframe.dropna(axis=1, subset=["col1", "col2"])
@@ -51,14 +50,14 @@ def test_dataframe_setattr_with_uninitialized_object():
 
 
 def test_dataframe_to_gbq_invalid_destination(monkeypatch: pytest.MonkeyPatch):
-    dataframe = resources.create_dataframe(monkeypatch)
+    dataframe = mocks.create_dataframe(monkeypatch)
 
     with pytest.raises(ValueError, match="no_dataset_or_project"):
         dataframe.to_gbq("no_dataset_or_project")
 
 
 def test_dataframe_to_gbq_invalid_if_exists(monkeypatch: pytest.MonkeyPatch):
-    dataframe = resources.create_dataframe(monkeypatch)
+    dataframe = mocks.create_dataframe(monkeypatch)
 
     with pytest.raises(ValueError, match="notreallyanoption"):
         # Even though the type is annotated with the literals we accept, users
@@ -70,7 +69,7 @@ def test_dataframe_to_gbq_invalid_if_exists(monkeypatch: pytest.MonkeyPatch):
 def test_dataframe_to_gbq_invalid_if_exists_no_destination(
     monkeypatch: pytest.MonkeyPatch,
 ):
-    dataframe = resources.create_dataframe(monkeypatch)
+    dataframe = mocks.create_dataframe(monkeypatch)
 
     with pytest.raises(ValueError, match="append"):
         dataframe.to_gbq(if_exists="append")
@@ -83,8 +82,8 @@ def test_dataframe_to_gbq_writes_to_anonymous_dataset(
     anonymous_dataset = google.cloud.bigquery.DatasetReference.from_string(
         anonymous_dataset_id
     )
-    session = resources.create_bigquery_session(anonymous_dataset=anonymous_dataset)
-    dataframe = resources.create_dataframe(monkeypatch, session=session)
+    session = mocks.create_bigquery_session(anonymous_dataset=anonymous_dataset)
+    dataframe = mocks.create_dataframe(monkeypatch, session=session)
 
     destination = dataframe.to_gbq()
 
@@ -94,7 +93,7 @@ def test_dataframe_to_gbq_writes_to_anonymous_dataset(
 def test_dataframe_semantics_property_future_warning(
     monkeypatch: pytest.MonkeyPatch,
 ):
-    dataframe = resources.create_dataframe(monkeypatch)
+    dataframe = mocks.create_dataframe(monkeypatch)
 
     with bigframes.option_context("experiments.semantic_operators", True), pytest.warns(
         FutureWarning
diff --git a/tests/unit/test_dataframe_io.py b/tests/unit/test_dataframe_io.py
index 5deb0d7a24..7845a71134 100644
--- a/tests/unit/test_dataframe_io.py
+++ b/tests/unit/test_dataframe_io.py
@@ -12,17 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from unittest.mock import Mock
+from unittest import mock
 
 import pytest
 
-from . import resources
+from bigframes.testing import mocks
 
 
 @pytest.fixture
 def mock_df(monkeypatch: pytest.MonkeyPatch):
-    dataframe = resources.create_dataframe(monkeypatch)
-    monkeypatch.setattr(dataframe, "to_pandas", Mock())
+    dataframe = mocks.create_dataframe(monkeypatch)
+    monkeypatch.setattr(dataframe, "to_pandas", mock.Mock())
     return dataframe
 
 
diff --git a/tests/unit/test_local_engine.py b/tests/unit/test_local_engine.py
index 4697c84960..d4e0dae1f3 100644
--- a/tests/unit/test_local_engine.py
+++ b/tests/unit/test_local_engine.py
@@ -19,9 +19,9 @@
 
 import bigframes
 import bigframes.pandas as bpd
-from tests.system.utils import skip_legacy_pandas
 
 pytest.importorskip("polars")
+pytest.importorskip("pandas", minversion="2.0.0")
 
 
 # All tests in this file require polars to be installed to pass.
@@ -50,8 +50,6 @@ def small_inline_frame() -> pd.DataFrame:
     return df
 
 
-# These tests should be unit tests, but Session object is tightly coupled to BigQuery client.
-@skip_legacy_pandas
 def test_polars_local_engine_add(
     small_inline_frame: pd.DataFrame, polars_session: bigframes.Session
 ):
@@ -63,7 +61,6 @@ def test_polars_local_engine_add(
     pandas.testing.assert_series_equal(bf_result, pd_result)
 
 
-@skip_legacy_pandas
 def test_polars_local_engine_order_by(small_inline_frame: pd.DataFrame, polars_session):
     pd_df = small_inline_frame
     bf_df = bpd.DataFrame(pd_df, session=polars_session)
@@ -73,7 +70,6 @@ def test_polars_local_engine_order_by(small_inline_frame: pd.DataFrame, polars_s
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
-@skip_legacy_pandas
 def test_polars_local_engine_filter(small_inline_frame: pd.DataFrame, polars_session):
     pd_df = small_inline_frame
     bf_df = bpd.DataFrame(pd_df, session=polars_session)
@@ -83,7 +79,6 @@ def test_polars_local_engine_filter(small_inline_frame: pd.DataFrame, polars_ses
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
-@skip_legacy_pandas
 def test_polars_local_engine_reset_index(
     small_inline_frame: pd.DataFrame, polars_session
 ):
@@ -96,7 +91,6 @@ def test_polars_local_engine_reset_index(
     pandas.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
 
 
-@skip_legacy_pandas
 def test_polars_local_engine_join_binop(polars_session):
     pd_df_1 = pd.DataFrame({"colA": [1, None, 3], "colB": [3, 1, 2]}, index=[1, 2, 3])
     pd_df_2 = pd.DataFrame(
@@ -116,7 +110,6 @@ def test_polars_local_engine_join_binop(polars_session):
     )
 
 
-@skip_legacy_pandas
 @pytest.mark.parametrize(
     "join_type",
     ["inner", "left", "right", "outer"],
@@ -139,7 +132,6 @@ def test_polars_local_engine_joins(join_type, polars_session):
     )
 
 
-@skip_legacy_pandas
 def test_polars_local_engine_agg(polars_session):
     pd_df = pd.DataFrame(
         {"colA": [True, False, True, False, True], "colB": [1, 2, 3, 4, 5]}
@@ -152,7 +144,6 @@ def test_polars_local_engine_agg(polars_session):
     pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False, check_index_type=False)  # type: ignore
 
 
-@skip_legacy_pandas
 def test_polars_local_engine_groupby_sum(polars_session):
     pd_df = pd.DataFrame(
         {"colA": [True, False, True, False, True], "colB": [1, 2, 3, 4, 5]}
@@ -166,7 +157,6 @@ def test_polars_local_engine_groupby_sum(polars_session):
     )
 
 
-@skip_legacy_pandas
 def test_polars_local_engine_cumsum(small_inline_frame, polars_session):
     pd_df = small_inline_frame[["int1", "int2"]]
     bf_df = bpd.DataFrame(pd_df, session=polars_session)
@@ -176,7 +166,6 @@ def test_polars_local_engine_cumsum(small_inline_frame, polars_session):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
-@skip_legacy_pandas
 def test_polars_local_engine_explode(small_inline_frame, polars_session):
     pd_df = small_inline_frame
     bf_df = bpd.DataFrame(pd_df, session=polars_session)
@@ -206,7 +195,6 @@ def test_polars_local_engine_explode(small_inline_frame, polars_session):
         (7, -7, -2),
     ],
 )
-@skip_legacy_pandas
 def test_polars_local_engine_slice(
     small_inline_frame, polars_session, start, stop, step
 ):
diff --git a/tests/unit/test_pandas.py b/tests/unit/test_pandas.py
index 64a287aaca..e8383512a6 100644
--- a/tests/unit/test_pandas.py
+++ b/tests/unit/test_pandas.py
@@ -91,13 +91,48 @@ def test_method_matches_session(method_name: str):
     assert pandas_signature.return_annotation == session_signature.return_annotation
 
 
-def test_cut_raises_with_labels():
+@pytest.mark.parametrize(
+    ("bins", "labels", "error_message"),
+    [
+        pytest.param(
+            5,
+            True,
+            "Bin labels must either be False, None or passed in as a list-like argument",
+            id="true",
+        ),
+        pytest.param(
+            5,
+            1.5,
+            "Bin labels must either be False, None or passed in as a list-like argument",
+            id="invalid_types",
+        ),
+        pytest.param(
+            2,
+            ["A"],
+            "must be same as the value of bins",
+            id="int_bins_mismatch",
+        ),
+        pytest.param(
+            [1, 2, 3],
+            ["A"],
+            "must be same as the number of bin edges",
+            id="iterator_bins_mismatch",
+        ),
+    ],
+)
+def test_cut_raises_with_invalid_labels(bins: int, labels, error_message: str):
+    mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True)
+    with pytest.raises(ValueError, match=error_message):
+        bigframes.pandas.cut(mock_series, bins, labels=labels)
+
+
+def test_cut_raises_with_unsupported_labels():
+    mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True)
+    labels = [1, 2]
     with pytest.raises(
-        NotImplementedError,
-        match="The 'labels' parameter must be either False or None.",
+        NotImplementedError, match=r".*only iterables of strings are supported.*"
     ):
-        mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True)
-        bigframes.pandas.cut(mock_series, 4, labels=["a", "b", "c", "d"])
+        bigframes.pandas.cut(mock_series, 2, labels=labels)  # type: ignore
 
 
 @pytest.mark.parametrize(
@@ -111,11 +146,21 @@ def test_cut_raises_with_labels():
             "`bins` iterable should contain tuples or numerics",
             id="iterable_w_wrong_type",
         ),
+        pytest.param(
+            [10, 3],
+            "left side of interval must be <= right side",
+            id="decreased_breaks",
+        ),
+        pytest.param(
+            [(1, 10), (2, 25)],
+            "Overlapping IntervalIndex is not accepted.",
+            id="overlapping_intervals",
+        ),
     ],
 )
 def test_cut_raises_with_invalid_bins(bins: int, error_message: str):
+    mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True)
     with pytest.raises(ValueError, match=error_message):
-        mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True)
         bigframes.pandas.cut(mock_series, bins, labels=False)
 
 
diff --git a/tests/unit/test_series_io.py b/tests/unit/test_series_io.py
index a97293d3da..bb0ea15053 100644
--- a/tests/unit/test_series_io.py
+++ b/tests/unit/test_series_io.py
@@ -12,18 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from unittest.mock import Mock
+from unittest import mock
 
 import pytest
 
-from . import resources
+from bigframes.testing import mocks
 
 
 @pytest.fixture
 def mock_series(monkeypatch: pytest.MonkeyPatch):
-    dataframe = resources.create_dataframe(monkeypatch)
+    dataframe = mocks.create_dataframe(monkeypatch)
     series = dataframe["col"]
-    monkeypatch.setattr(series, "to_pandas", Mock())
+    monkeypatch.setattr(series, "to_pandas", mock.Mock())
     return series
 
 
diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py
index e75bdf81e0..4ad4f383cf 100644
--- a/third_party/bigframes_vendored/geopandas/geoseries.py
+++ b/third_party/bigframes_vendored/geopandas/geoseries.py
@@ -45,11 +45,11 @@ def x(self) -> bigframes.series.Series:
 
             >>> import bigframes.pandas as bpd
             >>> import geopandas.array
-            >>> import shapely
+            >>> import shapely.geometry
             >>> bpd.options.display.progress_bar = None
 
             >>> series = bpd.Series(
-            ...     [shapely.Point(1, 2), shapely.Point(2, 3), shapely.Point(3, 4)],
+            ...     [shapely.geometry.Point(1, 2), shapely.geometry.Point(2, 3), shapely.geometry.Point(3, 4)],
             ...     dtype=geopandas.array.GeometryDtype()
             ... )
             >>> series.geo.x
@@ -72,11 +72,11 @@ def y(self) -> bigframes.series.Series:
 
             >>> import bigframes.pandas as bpd
             >>> import geopandas.array
-            >>> import shapely
+            >>> import shapely.geometry
             >>> bpd.options.display.progress_bar = None
 
             >>> series = bpd.Series(
-            ...     [shapely.Point(1, 2), shapely.Point(2, 3), shapely.Point(3, 4)],
+            ...     [shapely.geometry.Point(1, 2), shapely.geometry.Point(2, 3), shapely.geometry.Point(3, 4)],
             ...     dtype=geopandas.array.GeometryDtype()
             ... )
             >>> series.geo.y
@@ -101,7 +101,7 @@ def boundary(self) -> bigframes.geopandas.GeoSeries:
 
             >>> import bigframes.pandas as bpd
             >>> import geopandas.array
-            >>> import shapely
+            >>> import shapely.geometry
             >>> bpd.options.display.progress_bar = None
 
             >>> from shapely.geometry import Polygon, LineString, Point
@@ -120,7 +120,7 @@ def boundary(self) -> bigframes.geopandas.GeoSeries:
 
             >>> s.boundary
             0    LINESTRING (0 0, 1 1, 0 1, 0 0)
-            1              MULTIPOINT (0 0, 1 0)
+            1          MULTIPOINT ((0 0), (1 0))
             2           GEOMETRYCOLLECTION EMPTY
             dtype: geometry
 
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py b/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py
index 71e5d9e3df..a87cb081cb 100644
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py
@@ -28,6 +28,7 @@
 from bigframes_vendored.ibis.backends.sql.compilers import BigQueryCompiler
 from bigframes_vendored.ibis.backends.sql.datatypes import BigQueryType
 import bigframes_vendored.ibis.common.exceptions as com
+import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
 import bigframes_vendored.ibis.expr.operations as ops
 import bigframes_vendored.ibis.expr.schema as sch
 import bigframes_vendored.ibis.expr.types as ir
@@ -773,7 +774,7 @@ def execute(self, expr, params=None, limit="default", **kwargs):
         self._run_pre_execute_hooks(expr)
 
         schema = expr.as_table().schema() - bigframes_vendored.ibis.schema(
-            {"_TABLE_SUFFIX": "string"}
+            {"_TABLE_SUFFIX": ibis_dtypes.string()}
         )
 
         sql = self.compile(expr, limit=limit, params=params, **kwargs)
diff --git a/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py b/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py
index d1ab36c41a..6e98d6a9e1 100644
--- a/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py
+++ b/third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py
@@ -706,6 +706,10 @@ def visit_Literal(self, op, *, value, dtype):
            else return the result of the previous step.
         """
         if value is None:
+            if dtype.is_array():
+                # hack: bq arrays are like semi-nullable, but want to treat as non-nullable for simplicity
+                # instead, use empty array as missing value sentinel
+                return self.cast(self.f.array(), dtype)
             if dtype.nullable:
                 return NULL if dtype.is_null() else self.cast(NULL, dtype)
             raise ibis_exceptions.UnsupportedOperationError(
@@ -763,8 +767,9 @@ def visit_DefaultLiteral(self, op, *, value, dtype):
         elif dtype.is_date():
             return self.f.datefromparts(value.year, value.month, value.day)
         elif dtype.is_array():
+            # array type is ambiguous if no elements
             value_type = dtype.value_type
-            return self.f.array(
+            values = self.f.array(
                 *(
                     self.visit_Literal(
                         ops.Literal(v, value_type), value=v, dtype=value_type
@@ -772,6 +777,7 @@ def visit_DefaultLiteral(self, op, *, value, dtype):
                     for v in value
                 )
             )
+            return values if len(value) > 0 else self.cast(values, dtype)
         elif dtype.is_map():
             key_type = dtype.key_type
             keys = self.f.array(
@@ -804,11 +810,11 @@ def visit_DefaultLiteral(self, op, *, value, dtype):
             return sge.Struct.from_arg_list(items)
         elif dtype.is_uuid():
             return self.cast(str(value), dtype)
+        elif dtype.is_json():
+            return sge.ParseJSON(this=sge.convert(str(value)))
         elif dtype.is_geospatial():
-            args = [value.wkt]
-            if (srid := dtype.srid) is not None:
-                args.append(srid)
-            return self.f.st_geomfromtext(*args)
+            wkt = value if isinstance(value, str) else value.wkt
+            return self.f.st_geogfromtext(wkt)
 
         raise NotImplementedError(f"Unsupported type: {dtype!r}")
 
diff --git a/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py b/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py
index 7d6cd6d2b4..7e001d1ac3 100644
--- a/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py
+++ b/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py
@@ -3,6 +3,7 @@
 
 from __future__ import annotations
 
+import datetime
 import decimal
 import math
 import re
@@ -478,6 +479,11 @@ def visit_NonNullLiteral(self, op, *, value, dtype):
             return sge.convert(str(value))
 
         elif dtype.is_int64():
+            # allows directly using values out of a duration arrow array
+            if isinstance(value, datetime.timedelta):
+                value = (
+                    (value.days * 3600 * 24) + value.seconds
+                ) * 1_000_000 + value.microseconds
             return sge.convert(np.int64(value))
         return None
 
@@ -1024,7 +1030,7 @@ def visit_InMemoryTable(self, op, *, name, schema, data):
         # Avoid creating temp tables for small data, which is how memtable is
         # used in BigQuery DataFrames. Inspired by:
         # https://github.com/ibis-project/ibis/blob/efa6fb72bf4c790450d00a926d7bd809dade5902/ibis/backends/druid/compiler.py#L95
-        tuples = data.to_frame().itertuples(index=False)
+        rows = data.to_pyarrow(schema=None).to_pylist()  # type: ignore
         quoted = self.quoted
         columns = [sg.column(col, quoted=quoted) for col in schema.names]
         array_expr = sge.DataType(
@@ -1042,10 +1048,10 @@ def visit_InMemoryTable(self, op, *, name, schema, data):
             sge.Struct(
                 expressions=tuple(
                     self.visit_Literal(None, value=value, dtype=type_)
-                    for value, type_ in zip(row, schema.types)
+                    for value, type_ in zip(row.values(), schema.types)
                 )
             )
-            for row in tuples
+            for row in rows
         ]
         expr = sge.Unnest(
             expressions=[
diff --git a/third_party/bigframes_vendored/ibis/expr/api.py b/third_party/bigframes_vendored/ibis/expr/api.py
index 8427ab1c4b..4ef10e449b 100644
--- a/third_party/bigframes_vendored/ibis/expr/api.py
+++ b/third_party/bigframes_vendored/ibis/expr/api.py
@@ -2369,7 +2369,7 @@ def ifelse(condition: Any, true_expr: Any, false_expr: Any) -> ir.Value:
     if not isinstance(condition, ir.Value):
         condition = literal(condition, type="bool")
     elif not condition.type().is_boolean():
-        condition = condition.cast("bool")
+        condition = condition.cast(bool)
     return condition.ifelse(true_expr, false_expr)
 
 
diff --git a/third_party/bigframes_vendored/ibis/expr/datatypes/__init__.py b/third_party/bigframes_vendored/ibis/expr/datatypes/__init__.py
index e17050c865..2ff4d41ab5 100644
--- a/third_party/bigframes_vendored/ibis/expr/datatypes/__init__.py
+++ b/third_party/bigframes_vendored/ibis/expr/datatypes/__init__.py
@@ -4,7 +4,6 @@
 
 from bigframes_vendored.ibis.expr.datatypes.cast import *  # noqa: F403
 from bigframes_vendored.ibis.expr.datatypes.core import *  # noqa: F403
-from bigframes_vendored.ibis.expr.datatypes.parse import *  # noqa: F403
 from bigframes_vendored.ibis.expr.datatypes.value import *  # noqa: F403
 
 halffloat = float16  # noqa: F405
diff --git a/third_party/bigframes_vendored/ibis/expr/datatypes/core.py b/third_party/bigframes_vendored/ibis/expr/datatypes/core.py
index 73dd375563..eb597cfc6a 100644
--- a/third_party/bigframes_vendored/ibis/expr/datatypes/core.py
+++ b/third_party/bigframes_vendored/ibis/expr/datatypes/core.py
@@ -167,15 +167,6 @@ def castable(self, to, **kwargs) -> bool:
 
         return castable(self, to, **kwargs)
 
-    @classmethod
-    def from_string(cls, value) -> Self:
-        from bigframes_vendored.ibis.expr.datatypes.parse import parse
-
-        try:
-            return parse(value)
-        except SyntaxError:
-            raise TypeError(f"{value!r} cannot be parsed as a datatype")
-
     @classmethod
     def from_typehint(cls, typ, nullable=True) -> Self:
         origin_type = get_origin(typ)
diff --git a/third_party/bigframes_vendored/ibis/expr/datatypes/parse.py b/third_party/bigframes_vendored/ibis/expr/datatypes/parse.py
deleted file mode 100644
index 78bbe0347c..0000000000
--- a/third_party/bigframes_vendored/ibis/expr/datatypes/parse.py
+++ /dev/null
@@ -1,211 +0,0 @@
-# Contains code from https://github.com/ibis-project/ibis/blob/9.2.0/ibis/expr/datatypes/parse.py
-
-from __future__ import annotations
-
-import ast
-import functools
-from operator import methodcaller
-import re
-
-import bigframes_vendored.ibis.expr.datatypes.core as dt
-import parsy
-from public import public
-
-_STRING_REGEX = (
-    """('[^\n'\\\\]*(?:\\\\.[^\n'\\\\]*)*'|"[^\n"\\\\"]*(?:\\\\.[^\n"\\\\]*)*")"""
-)
-
-SPACES = parsy.regex(r"\s*", re.MULTILINE)
-
-
-def spaceless(parser):
-    return SPACES.then(parser).skip(SPACES)
-
-
-def spaceless_string(*strings: str):
-    return spaceless(
-        parsy.alt(*(parsy.string(string, transform=str.lower) for string in strings))
-    )
-
-
-SINGLE_DIGIT = parsy.decimal_digit
-RAW_NUMBER = SINGLE_DIGIT.at_least(1).concat()
-PRECISION = SCALE = NUMBER = LENGTH = RAW_NUMBER.map(int)
-TEMPORAL_SCALE = SINGLE_DIGIT.map(int)
-
-LPAREN = spaceless_string("(")
-RPAREN = spaceless_string(")")
-
-LBRACKET = spaceless_string("[")
-RBRACKET = spaceless_string("]")
-
-LANGLE = spaceless_string("<")
-RANGLE = spaceless_string(">")
-
-COMMA = spaceless_string(",")
-COLON = spaceless_string(":")
-SEMICOLON = spaceless_string(";")
-
-RAW_STRING = parsy.regex(_STRING_REGEX).map(ast.literal_eval)
-FIELD = parsy.regex("[a-zA-Z_0-9]+") | parsy.string("")
-
-
-@public
-@functools.lru_cache(maxsize=100)
-def parse(
-    text: str, default_decimal_parameters: tuple[int | None, int | None] = (None, None)
-) -> dt.DataType:
-    """Parse a type from a [](`str`) `text`.
-
-    The default `maxsize` parameter for caching is chosen to cache the most
-    commonly used types--there are about 30--along with some capacity for less
-    common but repeatedly-used complex types.
-
-    Parameters
-    ----------
-    text
-        The type string to parse
-    default_decimal_parameters
-        Default precision and scale for decimal types
-
-    Examples
-    --------
-    Parse an array type from a string
-
-    >>> import ibis
-    >>> import ibis.expr.datatypes as dt
-    >>> dt.parse("array<int64>")
-    Array(value_type=Int64(nullable=True), nullable=True)
-
-    You can avoid parsing altogether by constructing objects directly
-
-    >>> import ibis
-    >>> import ibis.expr.datatypes as dt
-    >>> ty = dt.parse("array<int64>")
-    >>> ty == dt.Array(dt.int64)
-    True
-
-    """
-    geotype = spaceless_string("geography", "geometry")
-
-    srid_geotype = SEMICOLON.then(parsy.seq(srid=NUMBER.skip(COLON), geotype=geotype))
-    geotype_part = COLON.then(parsy.seq(geotype=geotype))
-    srid_part = SEMICOLON.then(parsy.seq(srid=NUMBER))
-
-    def geotype_parser(typ: type[dt.DataType]) -> dt.DataType:
-        return spaceless_string(typ.__name__.lower()).then(
-            (srid_geotype | geotype_part | srid_part).optional(dict()).combine_dict(typ)
-        )
-
-    primitive = (
-        spaceless_string("boolean", "bool").result(dt.boolean)
-        | spaceless_string("halffloat", "float16").result(dt.float16)
-        | spaceless_string("float32").result(dt.float32)
-        | spaceless_string("double", "float64", "float").result(dt.float64)
-        | spaceless_string(
-            "int8",
-            "int16",
-            "int32",
-            "int64",
-            "uint8",
-            "uint16",
-            "uint32",
-            "uint64",
-            "string",
-            "binary",
-            "timestamp",
-            "time",
-            "date",
-            "null",
-        ).map(functools.partial(getattr, dt))
-        | spaceless_string("bytes").result(dt.binary)
-        | geotype.map(dt.GeoSpatial)
-        | geotype_parser(dt.LineString)
-        | geotype_parser(dt.Polygon)
-        | geotype_parser(dt.Point)
-        | geotype_parser(dt.MultiLineString)
-        | geotype_parser(dt.MultiPolygon)
-        | geotype_parser(dt.MultiPoint)
-    )
-
-    varchar_or_char = (
-        spaceless_string("varchar", "char")
-        .then(LPAREN.then(RAW_NUMBER).skip(RPAREN).optional())
-        .result(dt.string)
-    )
-
-    decimal = spaceless_string("decimal").then(
-        parsy.seq(
-            LPAREN.then(spaceless(PRECISION)).skip(COMMA), spaceless(SCALE).skip(RPAREN)
-        )
-        .optional(default_decimal_parameters)
-        .combine(dt.Decimal)
-    )
-
-    bignumeric = spaceless_string("bignumeric", "bigdecimal").then(
-        parsy.seq(
-            LPAREN.then(spaceless(PRECISION)).skip(COMMA), spaceless(SCALE).skip(RPAREN)
-        )
-        .optional((76, 38))
-        .combine(dt.Decimal)
-    )
-
-    parened_string = LPAREN.then(RAW_STRING).skip(RPAREN)
-    timestamp_scale = SINGLE_DIGIT.map(int)
-
-    timestamp_tz_args = LPAREN.then(
-        parsy.seq(timezone=RAW_STRING, scale=COMMA.then(timestamp_scale).optional())
-    ).skip(RPAREN)
-
-    timestamp_no_tz_args = LPAREN.then(parsy.seq(scale=timestamp_scale).skip(RPAREN))
-
-    timestamp = spaceless_string("timestamp").then(
-        (timestamp_tz_args | timestamp_no_tz_args)
-        .optional({})
-        .combine_dict(dt.Timestamp)
-    )
-
-    interval = spaceless_string("interval").then(
-        parsy.seq(unit=parened_string.optional("s")).combine_dict(dt.Interval)
-    )
-
-    ty = parsy.forward_declaration()
-    angle_type = LANGLE.then(ty).skip(RANGLE)
-    array = spaceless_string("array").then(angle_type).map(dt.Array)
-
-    map = (
-        spaceless_string("map")
-        .then(LANGLE)
-        .then(parsy.seq(ty, COMMA.then(ty)).combine(dt.Map))
-        .skip(RANGLE)
-    )
-
-    struct = (
-        spaceless_string("struct")
-        .then(LANGLE)
-        .then(parsy.seq(spaceless(FIELD).skip(COLON), ty).sep_by(COMMA))
-        .skip(RANGLE)
-        .map(dt.Struct.from_tuples)
-    )
-
-    nullable = spaceless_string("!").then(ty).map(methodcaller("copy", nullable=False))
-
-    ty.become(
-        nullable
-        | timestamp
-        | primitive
-        | decimal
-        | bignumeric
-        | varchar_or_char
-        | interval
-        | array
-        | map
-        | struct
-        | spaceless_string("jsonb", "json", "uuid", "macaddr", "inet").map(
-            functools.partial(getattr, dt)
-        )
-        | spaceless_string("int").result(dt.int64)
-        | spaceless_string("str").result(dt.string)
-    )
-
-    return ty.parse(text)
diff --git a/third_party/bigframes_vendored/ibis/expr/datatypes/value.py b/third_party/bigframes_vendored/ibis/expr/datatypes/value.py
index f9302b63f4..e390cea02c 100644
--- a/third_party/bigframes_vendored/ibis/expr/datatypes/value.py
+++ b/third_party/bigframes_vendored/ibis/expr/datatypes/value.py
@@ -312,15 +312,16 @@ def normalize(typ, value):
             )
         return frozendict({k: normalize(t, value[k]) for k, t in dtype.items()})
     elif dtype.is_geospatial():
-        import shapely as shp
+        import shapely
+        import shapely.geometry
 
         if isinstance(value, (tuple, list)):
             if dtype.is_point():
-                return shp.Point(value)
+                return shapely.geometry.Point(value)
             elif dtype.is_linestring():
-                return shp.LineString(value)
+                return shapely.geometry.LineString(value)
             elif dtype.is_polygon():
-                return shp.Polygon(
+                return shapely.geometry.Polygon(
                     toolz.concat(
                         map(
                             attrgetter("coords"),
@@ -329,19 +330,23 @@ def normalize(typ, value):
                     )
                 )
             elif dtype.is_multipoint():
-                return shp.MultiPoint(tuple(map(partial(normalize, dt.point), value)))
+                return shapely.geometry.MultiPoint(
+                    tuple(map(partial(normalize, dt.point), value))
+                )
             elif dtype.is_multilinestring():
-                return shp.MultiLineString(
+                return shapely.geometry.MultiLineString(
                     tuple(map(partial(normalize, dt.linestring), value))
                 )
             elif dtype.is_multipolygon():
-                return shp.MultiPolygon(map(partial(normalize, dt.polygon), value))
+                return shapely.geometry.MultiPolygon(
+                    map(partial(normalize, dt.polygon), value)
+                )
             else:
                 raise IbisTypeError(f"Unsupported geospatial type: {dtype}")
-        elif isinstance(value, shp.geometry.base.BaseGeometry):
+        elif isinstance(value, shapely.geometry.base.BaseGeometry):
             return value
         else:
-            return shp.from_wkt(value)
+            return shapely.from_wkt(value)
     elif dtype.is_date():
         return normalize_datetime(value).date()
     elif dtype.is_time():
diff --git a/third_party/bigframes_vendored/ibis/expr/types/arrays.py b/third_party/bigframes_vendored/ibis/expr/types/arrays.py
index 5f86cfe477..a8f64490c1 100644
--- a/third_party/bigframes_vendored/ibis/expr/types/arrays.py
+++ b/third_party/bigframes_vendored/ibis/expr/types/arrays.py
@@ -416,7 +416,7 @@ def map(self, func: Deferred | Callable[[ir.Value], ir.Value]) -> ir.ArrayValue:
 
         The most succinct way to use `map` is with `Deferred` expressions:
 
-        >>> t.a.map((_ + 100).cast("float"))
+        >>> t.a.map((_ + 100).cast(float))
         ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
         ┃ ArrayMap(a, Cast(Add(_, 100), float64)) ┃
         ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
@@ -429,7 +429,7 @@ def map(self, func: Deferred | Callable[[ir.Value], ir.Value]) -> ir.ArrayValue:
 
         You can also use `map` with a lambda function:
 
-        >>> t.a.map(lambda x: (x + 100).cast("float"))
+        >>> t.a.map(lambda x: (x + 100).cast(float))
         ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
         ┃ ArrayMap(a, Cast(Add(x, 100), float64)) ┃
         ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
diff --git a/third_party/bigframes_vendored/ibis/expr/types/generic.py b/third_party/bigframes_vendored/ibis/expr/types/generic.py
index 607170e1ca..7de357b138 100644
--- a/third_party/bigframes_vendored/ibis/expr/types/generic.py
+++ b/third_party/bigframes_vendored/ibis/expr/types/generic.py
@@ -179,31 +179,10 @@ def cast(self, target_type: Any) -> Value:
         │                          … │
         └────────────────────────────┘
 
-        or string names
-
-        >>> x.cast("uint16")
-        ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-        ┃ Cast(bill_depth_mm, uint16) ┃
-        ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-        │ uint16                      │
-        ├─────────────────────────────┤
-        │                          19 │
-        │                          17 │
-        │                          18 │
-        │                        NULL │
-        │                          19 │
-        │                          21 │
-        │                          18 │
-        │                          20 │
-        │                          18 │
-        │                          20 │
-        │                           … │
-        └─────────────────────────────┘
-
         If you make an illegal cast, you won't know until the backend actually
         executes it. Consider [`.try_cast()`](#ibis.expr.types.generic.Value.try_cast).
 
-        >>> ibis.literal("a string").cast("int64")  # doctest: +SKIP
+        >>> ibis.literal("a string").cast(int)  # doctest: +SKIP
         <error>
         """
         op = ops.Cast(self, to=target_type)
diff --git a/third_party/bigframes_vendored/ibis/expr/types/geospatial.py b/third_party/bigframes_vendored/ibis/expr/types/geospatial.py
index 3f42a4ad14..298e74d6de 100644
--- a/third_party/bigframes_vendored/ibis/expr/types/geospatial.py
+++ b/third_party/bigframes_vendored/ibis/expr/types/geospatial.py
@@ -135,7 +135,7 @@ def contains(self, right: GeoSpatialValue) -> ir.BooleanValue:
         >>> ibis.options.interactive = True
         >>> import shapely
         >>> t = ibis.examples.zones.fetch()
-        >>> p = shapely.Point(935996.821, 191376.75)  # centroid for zone 1
+        >>> p = shapely.geometry.Point(935996.821, 191376.75)  # centroid for zone 1
         >>> plit = ibis.literal(p, "geometry")
         >>> t.geom.contains(plit).name("contains")
         ┏━━━━━━━━━━┓
@@ -197,7 +197,7 @@ def covers(self, right: GeoSpatialValue) -> ir.BooleanValue:
 
         Polygon area center in zone 1
 
-        >>> z1_ctr_buff = shapely.Point(935996.821, 191376.75).buffer(10)
+        >>> z1_ctr_buff = shapely.geometry.Point(935996.821, 191376.75).buffer(10)
         >>> z1_ctr_buff_lit = ibis.literal(z1_ctr_buff, "geometry")
         >>> t.geom.covers(z1_ctr_buff_lit).name("covers")
         ┏━━━━━━━━━┓
@@ -242,7 +242,7 @@ def covered_by(self, right: GeoSpatialValue) -> ir.BooleanValue:
 
         Polygon area center in zone 1
 
-        >>> pol_big = shapely.Point(935996.821, 191376.75).buffer(10000)
+        >>> pol_big = shapely.geometry.Point(935996.821, 191376.75).buffer(10000)
         >>> pol_big_lit = ibis.literal(pol_big, "geometry")
         >>> t.geom.covered_by(pol_big_lit).name("covered_by")
         ┏━━━━━━━━━━━━┓
@@ -262,7 +262,7 @@ def covered_by(self, right: GeoSpatialValue) -> ir.BooleanValue:
         │ False      │
         │ …          │
         └────────────┘
-        >>> pol_small = shapely.Point(935996.821, 191376.75).buffer(100)
+        >>> pol_small = shapely.geometry.Point(935996.821, 191376.75).buffer(100)
         >>> pol_small_lit = ibis.literal(pol_small, "geometry")
         >>> t.geom.covered_by(pol_small_lit).name("covered_by")
         ┏━━━━━━━━━━━━┓
@@ -387,7 +387,7 @@ def disjoint(self, right: GeoSpatialValue) -> ir.BooleanValue:
         >>> ibis.options.interactive = True
         >>> import shapely
         >>> t = ibis.examples.zones.fetch()
-        >>> p = shapely.Point(935996.821, 191376.75)  # zone 1 centroid
+        >>> p = shapely.geometry.Point(935996.821, 191376.75)  # zone 1 centroid
         >>> plit = ibis.literal(p, "geometry")
         >>> t.geom.disjoint(plit).name("disjoint")
         ┏━━━━━━━━━━┓
@@ -435,7 +435,7 @@ def d_within(
         >>> ibis.options.interactive = True
         >>> import shapely
         >>> t = ibis.examples.zones.fetch()
-        >>> penn_station = shapely.Point(986345.399, 211974.446)
+        >>> penn_station = shapely.geometry.Point(986345.399, 211974.446)
         >>> penn_lit = ibis.literal(penn_station, "geometry")
 
         Check zones within 1000ft of Penn Station centroid
@@ -578,7 +578,7 @@ def intersects(self, right: GeoSpatialValue) -> ir.BooleanValue:
         >>> ibis.options.interactive = True
         >>> import shapely
         >>> t = ibis.examples.zones.fetch()
-        >>> p = shapely.Point(935996.821, 191376.75)  # zone 1 centroid
+        >>> p = shapely.geometry.Point(935996.821, 191376.75)  # zone 1 centroid
         >>> plit = ibis.literal(p, "geometry")
         >>> t.geom.intersects(plit).name("intersects")
         ┏━━━━━━━━━━━━┓
@@ -675,7 +675,7 @@ def overlaps(self, right: GeoSpatialValue) -> ir.BooleanValue:
 
         Polygon center in an edge point of zone 1
 
-        >>> p_edge_buffer = shapely.Point(933100.918, 192536.086).buffer(100)
+        >>> p_edge_buffer = shapely.geometry.Point(933100.918, 192536.086).buffer(100)
         >>> buff_lit = ibis.literal(p_edge_buffer, "geometry")
         >>> t.geom.overlaps(buff_lit).name("overlaps")
         ┏━━━━━━━━━━┓
@@ -720,7 +720,7 @@ def touches(self, right: GeoSpatialValue) -> ir.BooleanValue:
 
         Edge point of zone 1
 
-        >>> p_edge = shapely.Point(933100.9183527103, 192536.08569720192)
+        >>> p_edge = shapely.geometry.Point(933100.9183527103, 192536.08569720192)
         >>> p_edge_lit = ibis.literal(p_edge, "geometry")
         >>> t.geom.touches(p_edge_lit).name("touches")
         ┏━━━━━━━━━┓
@@ -765,7 +765,7 @@ def distance(self, right: GeoSpatialValue) -> ir.FloatingValue:
 
         Penn station zone centroid
 
-        >>> penn_station = shapely.Point(986345.399, 211974.446)
+        >>> penn_station = shapely.geometry.Point(986345.399, 211974.446)
         >>> penn_lit = ibis.literal(penn_station, "geometry")
         >>> t.geom.distance(penn_lit).name("distance_penn")
         ┏━━━━━━━━━━━━━━━┓
@@ -886,7 +886,7 @@ def union(self, right: GeoSpatialValue) -> GeoSpatialValue:
 
         Penn station zone centroid
 
-        >>> penn_station = shapely.Point(986345.399, 211974.446)
+        >>> penn_station = shapely.geometry.Point(986345.399, 211974.446)
         >>> penn_lit = ibis.literal(penn_station, "geometry")
         >>> t.geom.centroid().union(penn_lit).name("union_centroid_penn")
         ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
@@ -1312,7 +1312,7 @@ def within(self, right: GeoSpatialValue) -> ir.BooleanValue:
         >>> ibis.options.interactive = True
         >>> import shapely
         >>> t = ibis.examples.zones.fetch()
-        >>> penn_station_buff = shapely.Point(986345.399, 211974.446).buffer(5000)
+        >>> penn_station_buff = shapely.geometry.Point(986345.399, 211974.446).buffer(5000)
         >>> penn_lit = ibis.literal(penn_station_buff, "geometry")
         >>> t.filter(t.geom.within(penn_lit))["zone"]
         ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
diff --git a/third_party/bigframes_vendored/ibis/expr/types/json.py b/third_party/bigframes_vendored/ibis/expr/types/json.py
index 388b4d8742..51d1642de0 100644
--- a/third_party/bigframes_vendored/ibis/expr/types/json.py
+++ b/third_party/bigframes_vendored/ibis/expr/types/json.py
@@ -446,24 +446,6 @@ def str(self) -> ir.StringValue:
         │ NULL                 │
         └──────────────────────┘
 
-        Note the difference between `.string` and `.cast("string")`.
-
-        The latter preserves quotes for JSON string values and returns a valid
-        JSON string.
-
-        >>> t.js.cast("string")
-        ┏━━━━━━━━━━━━━━━━━━┓
-        ┃ Cast(js, string) ┃
-        ┡━━━━━━━━━━━━━━━━━━┩
-        │ string           │
-        ├──────────────────┤
-        │ "a"              │
-        │ "b"              │
-        │ 1                │
-        │ {}               │
-        │ [{"a": 1}]       │
-        └──────────────────┘
-
         Here's a more complex example with a table containing a JSON column
         with nested fields.
 
diff --git a/third_party/bigframes_vendored/ibis/expr/types/relations.py b/third_party/bigframes_vendored/ibis/expr/types/relations.py
index 919dec0669..d3d66b1512 100644
--- a/third_party/bigframes_vendored/ibis/expr/types/relations.py
+++ b/third_party/bigframes_vendored/ibis/expr/types/relations.py
@@ -3798,7 +3798,7 @@ def pivot_longer(
         ...     names_pattern=r"wk(.+)",
         ...     names_transform=int,
         ...     values_to="rank",
-        ...     values_transform=_.cast("int"),
+        ...     values_transform=_.cast(int),
         ... ).drop_null("rank")
         ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━┓
         ┃ artist  ┃ track                   ┃ date_entered ┃ week ┃ rank  ┃
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index e59232ee85..8f3e150606 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -4433,7 +4433,7 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame:
         to potentially reuse a previously deployed ``remote_function`` from
         the same user defined function.
 
-            >>> @bpd.remote_function(reuse=False)
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
             ... def minutes_to_hours(x: int) -> float:
             ...     return x/60
 
@@ -4813,7 +4813,7 @@ def apply(self, func, *, axis=0, args=(), **kwargs):
         to select only the necessary columns before calling `apply()`. Note: This
         feature is currently in **preview**.
 
-            >>> @bpd.remote_function(reuse=False)
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
             ... def foo(row: pd.Series) -> int:
             ...     result = 1
             ...     result += row["col1"]
@@ -4828,7 +4828,7 @@ def apply(self, func, *, axis=0, args=(), **kwargs):
         You could return an array output for every input row from the remote
         function.
 
-            >>> @bpd.remote_function(reuse=False)
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
             ... def marks_analyzer(marks: pd.Series) -> list[float]:
             ...     import statistics
             ...     average = marks.mean()
@@ -4869,7 +4869,7 @@ def apply(self, func, *, axis=0, args=(), **kwargs):
             <BLANKLINE>
             [2 rows x 3 columns]
 
-            >>> @bpd.remote_function(reuse=False)
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
             ... def foo(x: int, y: int, z: int) -> float:
             ...     result = 1
             ...     result += x
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index 8dd43fd8da..4c9d1338f4 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -1052,37 +1052,68 @@ def rolling(
         self,
         window,
         min_periods: int | None = None,
+        on: str | None = None,
+        closed: Literal["right", "left", "both", "neither"] = "right",
     ):
         """
         Provide rolling window calculations.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([0,1,2,3,4])
+            >>> s.rolling(window=3).min()
+            0    <NA>
+            1    <NA>
+            2       0
+            3       1
+            4       2
+            dtype: Int64
+
+            >>> df = bpd.DataFrame({'A': [0,1,2,3], 'B': [0,2,4,6]})
+            >>> df.rolling(window=2, on='A', closed='both').sum()
+            A     B
+            0  0  <NA>
+            1  1     2
+            2  2     6
+            3  3    12
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
         Args:
-            window (int, timedelta, str, offset, or BaseIndexer subclass):
+            window (int, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, str):
                 Size of the moving window.
 
                 If an integer, the fixed number of observations used for
                 each window.
 
-                If a timedelta, str, or offset, the time period of each window. Each
-                window will be a variable sized based on the observations included in
-                the time-period. This is only valid for datetime-like indexes.
-                To learn more about the offsets & frequency strings, please see `this link
-                <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
+                If a string, the timedelta representation in string. This string
+                must be parsable by pandas.Timedelta().
 
-                If a BaseIndexer subclass, the window boundaries
-                based on the defined ``get_window_bounds`` method. Additional rolling
-                keyword arguments, namely ``min_periods``, ``center``, ``closed`` and
-                ``step`` will be passed to ``get_window_bounds``.
+                Otherwise, the time range for each window.
 
             min_periods (int, default None):
                 Minimum number of observations in window required to have a value;
                 otherwise, result is ``np.nan``.
 
-                For a window that is specified by an offset, ``min_periods`` will default to 1.
-
                 For a window that is specified by an integer, ``min_periods`` will default
                 to the size of the window.
 
+                For a window that is not spicified by an interger, ``min_periods`` will default
+                to 1.
+
+            on (str, optional):
+                For a DataFrame, a column label on which to calculate the rolling window,
+                rather than the DataFrame’s index.
+
+            closed (str, default 'right'):
+                If 'right', the first point in the window is excluded from calculations.
+                If 'left', the last point in the window is excluded from calculations.
+                If 'both', the no points in the window are excluded from calculations.
+                If 'neither', the first and last points in the window are excluded from calculations.
+
         Returns:
             bigframes.core.window.Window: ``Window`` subclass if a ``win_type`` is passed.
                 ``Rolling`` subclass if ``win_type`` is not passed.
diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
index 31a9aa6a93..4fb8498932 100644
--- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
+++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
@@ -1025,16 +1025,37 @@ def rolling(self, *args, **kwargs):
             dtype: Int64
 
         Args:
+            window (int, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, str):
+                Size of the moving window.
+
+                If an integer, the fixed number of observations used for
+                each window.
+
+                If a string, the timedelta representation in string. This string
+                must be parsable by pandas.Timedelta().
+
+                Otherwise, the time range for each window.
+
             min_periods (int, default None):
                 Minimum number of observations in window required to have a value;
                 otherwise, result is ``np.nan``.
 
-                For a window that is specified by an offset,
-                ``min_periods`` will default to 1.
-
                 For a window that is specified by an integer, ``min_periods`` will default
                 to the size of the window.
 
+                For a window that is not spicified by an interger, ``min_periods`` will default
+                to 1.
+
+            on (str, optional):
+                For a DataFrame, a column label on which to calculate the rolling window,
+                rather than the DataFrame’s index.
+
+            closed (str, default 'right'):
+                If 'right', the first point in the window is excluded from calculations.
+                If 'left', the last point in the window is excluded from calculations.
+                If 'both', the no points in the window are excluded from calculations.
+                If 'neither', the first and last points in the window are excluded from calculations.
+
         Returns:
             bigframes.pandas.DataFrame or bigframes.pandas.Series:
                 Return a new grouper with our rolling appended.
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/tile.py b/third_party/bigframes_vendored/pandas/core/reshape/tile.py
index d911a303eb..fccaffdadf 100644
--- a/third_party/bigframes_vendored/pandas/core/reshape/tile.py
+++ b/third_party/bigframes_vendored/pandas/core/reshape/tile.py
@@ -31,8 +31,6 @@ def cut(
     age ranges. Supports binning into an equal number of bins, or a
     pre-specified array of bins.
 
-    ``labels=False`` implies you just want the bins back.
-
     **Examples:**
 
         >>> import bigframes.pandas as bpd
@@ -55,7 +53,16 @@ def cut(
             3     {'left_exclusive': 7.5, 'right_inclusive': 10.0}
             dtype: struct<left_exclusive: double, right_inclusive: double>[pyarrow]
 
-    Cut with an integer (equal-width bins) and labels=False:
+    Cut with the same bins, but assign them specific labels:
+
+        >>> bpd.cut(s, bins=3, labels=["bad", "medium", "good"])
+        0    bad
+        1    bad
+        2    medium
+        3    good
+        dtype: string
+
+    `labels=False` implies you want the bins back.
 
         >>> bpd.cut(s, bins=4, labels=False)
         0    0
@@ -67,7 +74,6 @@ def cut(
     Cut with pd.IntervalIndex, requires importing pandas for IntervalIndex:
 
         >>> import pandas as pd
-
         >>> interval_index = pd.IntervalIndex.from_tuples([(0, 1), (1, 5), (5, 20)])
         >>> bpd.cut(s, bins=interval_index)
         0                                            <NA>
@@ -107,7 +113,7 @@ def cut(
         dtype: struct<left_inclusive: int64, right_exclusive: int64>[pyarrow]
 
     Args:
-        x (Series):
+        x (bigframes.pandas.Series):
             The input Series to be binned. Must be 1-dimensional.
         bins (int, pd.IntervalIndex, Iterable):
             The criteria to bin by.
@@ -127,10 +133,11 @@ def cut(
             ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]``
             indicate (1,2], (2,3], (3,4]. This argument is ignored when
             `bins` is an IntervalIndex.
-        labels (default None):
+        labels (bool, Iterable, default None):
             Specifies the labels for the returned bins. Must be the same length as
             the resulting bins. If False, returns only integer indicators of the
-            bins. This affects the type of the output container.
+            bins. This affects the type of the output container. This argument is
+            ignored when `bins` is an IntervalIndex. If True, raises an error.
 
     Returns:
         bigframes.pandas.Series:
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 913a2e7c3e..a2d0983652 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -1854,7 +1854,7 @@ def apply(
         to potentially reuse a previously deployed `remote_function` from
         the same user defined function.
 
-            >>> @bpd.remote_function(reuse=False)
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
             ... def minutes_to_hours(x: int) -> float:
             ...     return x/60
 
@@ -1883,6 +1883,7 @@ def apply(
             >>> @bpd.remote_function(
             ...     reuse=False,
             ...     packages=["cryptography"],
+            ...     cloud_function_service_account="default"
             ... )
             ... def get_hash(input: str) -> str:
             ...     from cryptography.fernet import Fernet
@@ -1900,7 +1901,7 @@ def apply(
 
         You could return an array output from the remote function.
 
-            >>> @bpd.remote_function(reuse=False)
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
             ... def text_analyzer(text: str) -> list[int]:
             ...     words = text.count(" ") + 1
             ...     periods = text.count(".")
@@ -5069,7 +5070,7 @@ def mask(self, cond, other):
         condition is evaluated based on a complicated business logic which cannot
         be expressed in form of a Series.
 
-            >>> @bpd.remote_function(reuse=False)
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
             ... def should_mask(name: str) -> bool:
             ...     hash = 0
             ...     for char_ in name:
@@ -5665,7 +5666,7 @@ def map(
 
         It also accepts a remote function:
 
-            >>> @bpd.remote_function()
+            >>> @bpd.remote_function(cloud_function_service_account="default")
             ... def my_mapper(val: str) -> str:
             ...     vowels = ["a", "e", "i", "o", "u"]
             ...     if val:
diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_mf.py b/third_party/bigframes_vendored/sklearn/decomposition/_mf.py
new file mode 100644
index 0000000000..fb29cc8984
--- /dev/null
+++ b/third_party/bigframes_vendored/sklearn/decomposition/_mf.py
@@ -0,0 +1,95 @@
+""" Matrix Factorization.
+"""
+
+# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#         Olivier Grisel <olivier.grisel@ensta.org>
+#         Mathieu Blondel <mathieu@mblondel.org>
+#         Denis A. Engemann <denis-alexander.engemann@inria.fr>
+#         Michael Eickenberg <michael.eickenberg@inria.fr>
+#         Giorgio Patrini <giorgio.patrini@anu.edu.au>
+#
+# License: BSD 3 clause
+
+from abc import ABCMeta
+
+from bigframes_vendored.sklearn.base import BaseEstimator
+
+from bigframes import constants
+
+
+class MatrixFactorization(BaseEstimator, metaclass=ABCMeta):
+    """Matrix Factorization (MF).
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> from bigframes.ml.decomposition import MatrixFactorization
+        >>> bpd.options.display.progress_bar = None
+        >>> X = bpd.DataFrame({
+        ... "row": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6],
+        ... "column": [0,1] * 7,
+        ... "value": [1, 1, 2, 1, 3, 1.2, 4, 1, 5, 0.8, 6, 1, 2, 3],
+        ... })
+        >>> model = MatrixFactorization(feedback_type='explicit', num_factors=6, user_col='row', item_col='column', rating_col='value', l2_reg=2.06)
+        >>> W = model.fit(X)
+
+    Args:
+        feedback_type ('explicit' | 'implicit'):
+            Specifies the feedback type for the model. The feedback type determines the algorithm that is used during training.
+        num_factors (int or auto, default auto):
+            Specifies the number of latent factors to use.
+        user_col (str):
+            The user column name.
+        item_col (str):
+            The item column name.
+        l2_reg (float, default 1.0):
+            A floating point value for L2 regularization. The default value is 1.0.
+    """
+
+    def fit(self, X, y=None):
+        """Fit the model according to the given training data.
+
+        Args:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
+                Series or DataFrame of shape (n_samples, n_features). Training vector,
+                where `n_samples` is the number of samples and `n_features` is
+                the number of features.
+
+            y (default None):
+                Ignored.
+
+        Returns:
+            bigframes.ml.decomposition.MatrixFactorization: Fitted estimator.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def score(self, X=None, y=None):
+        """Calculate evaluation metrics of the model.
+
+        .. note::
+
+            Output matches that of the BigQuery ML.EVALUATE function.
+            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#matrix_factorization_models
+            for the outputs relevant to this model type.
+
+        Args:
+            X (default None):
+                Ignored.
+
+            y (default None):
+                Ignored.
+        Returns:
+            bigframes.dataframe.DataFrame: DataFrame that represents model metrics.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def predict(self, X):
+        """Generate a predicted rating for every user-item row combination for a matrix factorization model.
+
+        Args:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
+                Series or a DataFrame to predict.
+
+        Returns:
+            bigframes.dataframe.DataFrame: Predicted DataFrames."""
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
diff --git a/third_party/bigframes_vendored/tpch/queries/q11.py b/third_party/bigframes_vendored/tpch/queries/q11.py
index 365aa12eb9..9d868f3343 100644
--- a/third_party/bigframes_vendored/tpch/queries/q11.py
+++ b/third_party/bigframes_vendored/tpch/queries/q11.py
@@ -43,4 +43,4 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
 
     result_df = result_df.sort_values(by="VALUE", ascending=False)
 
-    next(result_df.to_pandas_batches(max_results=1500))
+    result_df.to_pandas()
diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py
index 356e73a71d..e3a1d84bfa 100644
--- a/third_party/bigframes_vendored/version.py
+++ b/third_party/bigframes_vendored/version.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.42.0"
+__version__ = "2.0.0"
 
 # {x-release-please-start-date}
-__release_date__ = "2025-03-27"
+__release_date__ = "2025-04-17"
 # {x-release-please-end}