From fdcdc189e5fcae9de68bf8fb3872136f55be36cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Fri, 3 Jan 2025 12:24:10 -0600
Subject: [PATCH 1/9] fix: reduce the number of labels added to query jobs
 (#1245)

* fix: reduce the number of labels added to query jobs

Fixes internal issue 386825477

* fix unit tests
---
 bigframes/core/log_adapter.py          |  7 ++++++-
 bigframes/session/__init__.py          | 16 ++++++++++++----
 tests/unit/core/test_log_adapter.py    |  5 ++++-
 tests/unit/session/test_io_bigquery.py |  2 +-
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py
index 4afa6037de..cc7d4ad58d 100644
--- a/bigframes/core/log_adapter.py
+++ b/bigframes/core/log_adapter.py
@@ -17,7 +17,12 @@
 from typing import List
 
 _lock = threading.Lock()
-MAX_LABELS_COUNT = 64
+
+# The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements),
+# but leave a few spare for internal labels to be added.
+# See internal issue 386825477.
+MAX_LABELS_COUNT = 64 - 8
+
 _api_methods: List = []
 _excluded_methods = ["__setattr__", "__getattr__"]
 
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 40d377bcba..edac7efa4b 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -373,10 +373,18 @@ def close(self):
         """Delete resources that were created with this session's session_id.
         This includes BigQuery tables, remote functions and cloud functions
         serving the remote functions."""
-        self._temp_storage_manager.clean_up_tables()
-        self._remote_function_session.clean_up(
-            self.bqclient, self.cloudfunctionsclient, self.session_id
-        )
+
+        # Protect against failure when the Session is a fake for testing or
+        # failed to initialize.
+        temp_storage_manager = getattr(self, "_temp_storage_manager", None)
+        if temp_storage_manager:
+            self._temp_storage_manager.clean_up_tables()
+
+        remote_function_session = getattr(self, "_remote_function_session", None)
+        if remote_function_session:
+            self._remote_function_session.clean_up(
+                self.bqclient, self.cloudfunctionsclient, self.session_id
+            )
 
     def read_gbq(
         self,
diff --git a/tests/unit/core/test_log_adapter.py b/tests/unit/core/test_log_adapter.py
index 7033369dd5..1199391813 100644
--- a/tests/unit/core/test_log_adapter.py
+++ b/tests/unit/core/test_log_adapter.py
@@ -16,7 +16,10 @@
 
 from bigframes.core import log_adapter
 
-MAX_LABELS_COUNT = 64
+# The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements),
+# but leave a few spare for internal labels to be added.
+# See internal issue 386825477.
+MAX_LABELS_COUNT = 56
 
 
 @pytest.fixture
diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py
index 46c3c92036..f06578ce03 100644
--- a/tests/unit/session/test_io_bigquery.py
+++ b/tests/unit/session/test_io_bigquery.py
@@ -114,7 +114,7 @@ def test_create_job_configs_labels_length_limit_met_and_labels_is_none():
         job_configs_labels=None, api_methods=api_methods
     )
     assert labels is not None
-    assert len(labels) == 64
+    assert len(labels) == log_adapter.MAX_LABELS_COUNT
     assert "dataframe-head" in labels.values()
 
 

From 10f08daec6034aafe48096be56683c953accc79a Mon Sep 17 00:00:00 2001
From: Arwa Sharif <146148342+arwas11@users.noreply.github.com>
Date: Fri, 3 Jan 2025 14:33:09 -0600
Subject: [PATCH 2/9] docs: update bigframes.pandas.DatetimeMethods docstrings
 (#1246)

---
 .../pandas/core/arrays/datetimelike.py                 | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py
index 0d910cec92..1736a7f9ef 100644
--- a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py
+++ b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py
@@ -33,7 +33,8 @@ def strftime(self, date_format: str):
                 Date format string (e.g. "%Y-%m-%d").
 
         Returns:
-            bigframes.series.Series: Series of formatted strings.
+            bigframes.pandas.Series:
+                Series of formatted strings.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -64,7 +65,8 @@ def normalize(self):
             dtype: timestamp[us, tz=UTC][pyarrow]
 
         Returns:
-            bigframes.series.Series of the same dtype as the data.
+            bigframes.pandas.Series:
+                Series of the same dtype as the data.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -95,5 +97,9 @@ def floor(self, freq: str):
         Args:
             freq (str):
                 Frequency string (e.g. "D", "min", "s").
+
+        Returns:
+            bigframes.pandas.Series:
+                Series of the same dtype as the data.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

From 8cfaae8718f3c4c6739b7155a02ef13dbed73425 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Fri, 3 Jan 2025 15:34:23 -0600
Subject: [PATCH 3/9] fix: raise if trying to change `ordering_mode` after
 session has started (#1252)

* fix: raise if trying to change `ordering_mode` after session has started

* fix lint and add unit tests

* fix mypy
---
 bigframes/_config/bigquery_options.py       |  7 ++-
 tests/unit/_config/test_bigquery_options.py | 51 ++++++++++++++-------
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py
index 9962d4286e..052ad5d921 100644
--- a/bigframes/_config/bigquery_options.py
+++ b/bigframes/_config/bigquery_options.py
@@ -317,8 +317,11 @@ def ordering_mode(self) -> Literal["strict", "partial"]:
         return self._ordering_mode.value
 
     @ordering_mode.setter
-    def ordering_mode(self, ordering_mode: Literal["strict", "partial"]) -> None:
-        self._ordering_mode = _validate_ordering_mode(ordering_mode)
+    def ordering_mode(self, value: Literal["strict", "partial"]) -> None:
+        ordering_mode = _validate_ordering_mode(value)
+        if self._session_started and self._ordering_mode != ordering_mode:
+            raise ValueError(SESSION_STARTED_MESSAGE.format(attribute="ordering_mode"))
+        self._ordering_mode = ordering_mode
 
     @property
     def client_endpoints_override(self) -> dict:
diff --git a/tests/unit/_config/test_bigquery_options.py b/tests/unit/_config/test_bigquery_options.py
index 784e92af40..31f43ffee5 100644
--- a/tests/unit/_config/test_bigquery_options.py
+++ b/tests/unit/_config/test_bigquery_options.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import re
+from unittest import mock
 import warnings
 
+import google.auth.credentials
 import pytest
 
 import bigframes
@@ -35,6 +37,7 @@
         ("kms_key_name", "kms/key/name/1", "kms/key/name/2"),
         ("skip_bq_connection_check", False, True),
         ("client_endpoints_override", {}, {"bqclient": "endpoint_address"}),
+        ("ordering_mode", "strict", "partial"),
     ],
 )
 def test_setter_raises_if_session_started(attribute, original_value, new_value):
@@ -57,32 +60,46 @@ def test_setter_raises_if_session_started(attribute, original_value, new_value):
 @pytest.mark.parametrize(
     [
         "attribute",
+        "original_value",
     ],
     [
-        (attribute,)
-        for attribute in [
-            "application_name",
-            "credentials",
-            "location",
-            "project",
-            "bq_connection",
-            "use_regional_endpoints",
-            "bq_kms_key_name",
-            "client_endpoints_override",
-        ]
+        ("application_name", "test-partner"),
+        ("location", "us-east1"),
+        ("project", "my-project"),
+        ("bq_connection", "path/to/connection/1"),
+        ("use_regional_endpoints", True),
+        ("kms_key_name", "kms/key/name/1"),
+        ("skip_bq_connection_check", True),
+        ("client_endpoints_override", {"bqclient": "endpoint_address"}),
+        ("ordering_mode", "partial"),
     ],
 )
-def test_setter_if_session_started_but_setting_the_same_value(attribute):
+def test_setter_if_session_started_but_setting_the_same_value(
+    attribute, original_value
+):
     options = bigquery_options.BigQueryOptions()
-    original_object = object()
-    setattr(options, attribute, original_object)
-    assert getattr(options, attribute) is original_object
+    setattr(options, attribute, original_value)
+    assert getattr(options, attribute) == original_value
 
     # This should work fine since we're setting the same value as before.
     options._session_started = True
-    setattr(options, attribute, original_object)
+    setattr(options, attribute, original_value)
+
+    assert getattr(options, attribute) == original_value
 
-    assert getattr(options, attribute) is original_object
+
+def test_setter_if_session_started_but_setting_the_same_credentials_object():
+    options = bigquery_options.BigQueryOptions()
+    original_object = mock.create_autospec(
+        google.auth.credentials.Credentials, instance=True
+    )
+    options.credentials = original_object
+    assert options.credentials is original_object
+
+    # This should work fine since we're setting the same value as before.
+    options._session_started = True
+    options.credentials = original_object
+    assert options.credentials is original_object
 
 
 @pytest.mark.parametrize(

From 7f8c9721f0d2f624d5b77e02d1d25f7214237370 Mon Sep 17 00:00:00 2001
From: Jiaxun Wu <35040939+jiaxunwu@users.noreply.github.com>
Date: Fri, 3 Jan 2025 14:32:52 -0800
Subject: [PATCH 4/9] Update semantic_operators.ipynb (#1254)

Rename semantic operator to AI operator in the introduction.
---
 notebooks/experimental/semantic_operators.ipynb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/notebooks/experimental/semantic_operators.ipynb b/notebooks/experimental/semantic_operators.ipynb
index 815e14284f..374236e152 100644
--- a/notebooks/experimental/semantic_operators.ipynb
+++ b/notebooks/experimental/semantic_operators.ipynb
@@ -25,18 +25,18 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# BigQuery DataFrames Semantic Operator Demo"
+    "# BigQuery DataFrames AI (semantic) Operator Demo"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The BigQuery DataFrames team implements semantics operators as described in the \"Lotus\" paper: https://arxiv.org/pdf/2407.11418.\n",
+    "The BigQuery DataFrames team implements AI operators inspired by the \"Lotus\" paper: https://arxiv.org/pdf/2407.11418.\n",
     "\n",
-    "This notebook gives you a hands-on preview of semantic operator APIs powered by LLM. You can open this notebook on Google Colab [here](https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/semantic_operators.ipynb). \n",
+    "This notebook gives you a hands-on preview of AI operator APIs powered by LLM. You can open this notebook on Google Colab [here](https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/semantic_operators.ipynb). \n",
     "\n",
-    "The notebook has two sections. The first section introduces the API syntax with examples, with the aim to get you familiar with how semantic operators work. The second section applies semantic operators on a large real-world dataset. You will also find some performance statistics there."
+    "The notebook has two sections. The first section introduces the API syntax with examples, with the aim to get you familiar with how AI operators work. The second section applies AI operators on a large real-world dataset. You will also find some performance statistics there."
    ]
   },
   {

From 5ba4511ad85cf02f0e5ad4e33ea3826b19527293 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@users.noreply.github.com>
Date: Fri, 3 Jan 2025 15:43:52 -0800
Subject: [PATCH 5/9] feat: implement confirmation threshold for semantic
 operators (#1251)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: implement confirmation threshold for semantic operators

* fix format

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* add doc

* raise exception when the user didn't confirm to proceed

* fix prompt format

* add sem ops autofail option

* fix doc

* use option_context to set options in tests

* remove redundant code

* fix tests

* fix doctest

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 bigframes/_config/compute_options.py          |  10 +
 bigframes/exceptions.py                       |   4 +
 bigframes/operations/semantics.py             |  80 +-
 .../experimental/semantic_operators.ipynb     | 403 +++++-----
 .../system/large/operations/test_semantics.py | 697 ++++++++++++++----
 5 files changed, 881 insertions(+), 313 deletions(-)

diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py
index c8a54fe0b3..21b41eb185 100644
--- a/bigframes/_config/compute_options.py
+++ b/bigframes/_config/compute_options.py
@@ -66,6 +66,14 @@ class ComputeOptions:
             engine to handle. However this comes at the cost of increase cost and latency.
         extra_query_labels (Dict[str, Any], Options):
             Stores additional custom labels for query configuration.
+        semmantic_ops_confirmation_threshold (int, optional):
+            Guards against unexepcted processing of large amount of rows by semantic operators.
+            If the number of rows exceeds the threshold, the user will be asked to confirm
+            their operations to resume. The default value is 0. Set the value to None
+            to turn off the guard.
+        semantic_ops_threshold_autofail (bool):
+            Guards against unexepcted processing of large amount of rows by semantic operators.
+            When set to True, the operation automatically fails without asking for user inputs.
     """
 
     maximum_bytes_billed: Optional[int] = None
@@ -73,6 +81,8 @@ class ComputeOptions:
     extra_query_labels: Dict[str, Any] = dataclasses.field(
         default_factory=dict, init=False
     )
+    semantic_ops_confirmation_threshold: Optional[int] = 0
+    semantic_ops_threshold_autofail = False
 
     def assign_extra_query_labels(self, **kwargs: Any) -> None:
         """
diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py
index 27f3508ff4..3cb5f3665d 100644
--- a/bigframes/exceptions.py
+++ b/bigframes/exceptions.py
@@ -59,6 +59,10 @@ class QueryComplexityError(RuntimeError):
     """Query plan is too complex to execute."""
 
 
+class OperationAbortedError(RuntimeError):
+    """Operation is aborted."""
+
+
 class TimeTravelDisabledWarning(Warning):
     """A query was reattempted without time travel."""
 
diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py
index 79b92afe4f..6a537db4f3 100644
--- a/bigframes/operations/semantics.py
+++ b/bigframes/operations/semantics.py
@@ -20,8 +20,8 @@
 
 import numpy as np
 
-import bigframes.core.guid as guid
-import bigframes.dtypes as dtypes
+from bigframes import dtypes, exceptions
+from bigframes.core import guid
 
 
 class Semantics:
@@ -53,6 +53,7 @@ def agg(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
@@ -115,6 +116,15 @@ def agg(
         self._validate_model(model)
         columns = self._parse_columns(instruction)
 
+        if max_agg_rows <= 1:
+            raise ValueError(
+                f"Invalid value for `max_agg_rows`: {max_agg_rows}."
+                "It must be greater than 1."
+            )
+
+        work_estimate = len(self._df) * int(max_agg_rows / (max_agg_rows - 1))
+        self._confirm_operation(work_estimate)
+
         df: bigframes.dataframe.DataFrame = self._df.copy()
         for column in columns:
             if column not in self._df.columns:
@@ -135,12 +145,6 @@ def agg(
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
 
-        if max_agg_rows <= 1:
-            raise ValueError(
-                f"Invalid value for `max_agg_rows`: {max_agg_rows}."
-                "It must be greater than 1."
-            )
-
         user_instruction = self._format_instruction(instruction, columns)
 
         num_cluster = 1
@@ -243,6 +247,7 @@ def cluster_by(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.TextEmbeddingGenerator()
@@ -296,6 +301,8 @@ def cluster_by(
                 "It must be greater than 1."
             )
 
+        self._confirm_operation(len(self._df))
+
         df: bigframes.dataframe.DataFrame = self._df.copy()
         embeddings_df = model.predict(df[column])
 
@@ -314,6 +321,7 @@ def filter(self, instruction: str, model, ground_with_google_search: bool = Fals
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
@@ -367,6 +375,8 @@ def filter(self, instruction: str, model, ground_with_google_search: bool = Fals
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
 
+        self._confirm_operation(len(self._df))
+
         df: bigframes.dataframe.DataFrame = self._df[columns].copy()
         for column in columns:
             if df[column].dtype != dtypes.STRING_DTYPE:
@@ -403,6 +413,7 @@ def map(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
@@ -462,6 +473,8 @@ def map(
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
 
+        self._confirm_operation(len(self._df))
+
         df: bigframes.dataframe.DataFrame = self._df[columns].copy()
         for column in columns:
             if df[column].dtype != dtypes.STRING_DTYPE:
@@ -490,7 +503,6 @@ def join(
         other,
         instruction: str,
         model,
-        max_rows: int = 1000,
         ground_with_google_search: bool = False,
     ):
         """
@@ -502,6 +514,7 @@ def join(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
@@ -561,12 +574,8 @@ def join(
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
 
-        joined_table_rows = len(self._df) * len(other)
-
-        if joined_table_rows > max_rows:
-            raise ValueError(
-                f"Number of rows that need processing is {joined_table_rows}, which exceeds row limit {max_rows}."
-            )
+        work_estimate = len(self._df) * len(other)
+        self._confirm_operation(work_estimate)
 
         left_columns = []
         right_columns = []
@@ -645,6 +654,7 @@ def search(
 
             >>> import bigframes
             >>> bigframes.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-005")
@@ -680,6 +690,8 @@ def search(
         if search_column not in self._df.columns:
             raise ValueError(f"Column `{search_column}` not found")
 
+        self._confirm_operation(len(self._df))
+
         import bigframes.ml.llm as llm
 
         if not isinstance(model, llm.TextEmbeddingGenerator):
@@ -743,6 +755,7 @@ def top_k(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
@@ -803,6 +816,9 @@ def top_k(
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
 
+        work_estimate = int(len(self._df) * (len(self._df) - 1) / 2)
+        self._confirm_operation(work_estimate)
+
         df: bigframes.dataframe.DataFrame = self._df[columns].copy()
         column = columns[0]
         if df[column].dtype != dtypes.STRING_DTYPE:
@@ -940,9 +956,8 @@ def sim_join(
 
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
-
-            >>> import bigframes
-            >>> bigframes.options.experiments.semantic_operators = True
+            >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-005")
@@ -1001,6 +1016,9 @@ def sim_join(
         if top_k < 1:
             raise ValueError("top_k must be an integer greater than or equal to 1.")
 
+        work_estimate = len(self._df) * len(other)
+        self._confirm_operation(work_estimate)
+
         base_table_embedding_column = guid.generate_guid()
         base_table = self._attach_embedding(
             other, right_on, base_table_embedding_column, model
@@ -1072,3 +1090,29 @@ def _validate_model(model):
 
         if not isinstance(model, GeminiTextGenerator):
             raise TypeError("Model is not GeminiText Generator")
+
+    @staticmethod
+    def _confirm_operation(row_count: int):
+        """Raises OperationAbortedError when the confirmation fails"""
+        import bigframes
+
+        threshold = bigframes.options.compute.semantic_ops_confirmation_threshold
+
+        if threshold is None or row_count <= threshold:
+            return
+
+        if bigframes.options.compute.semantic_ops_threshold_autofail:
+            raise exceptions.OperationAbortedError(
+                f"Operation was cancelled because your work estimate is {row_count} rows, which exceeds the threshold {threshold} rows."
+            )
+
+        # Separate the prompt out. In IDE such VS Code, leaving prompt in the
+        # input function makes it less visible to the end user.
+        print(f"This operation will process about {row_count} rows.")
+        print(
+            "You can raise the confirmation threshold by setting `bigframes.options.compute.semantic_ops_confirmation_threshold` to a higher value. To completely turn off the confirmation check, set the threshold to `None`."
+        )
+        print("Proceed? [Y/n]")
+        reply = input().casefold()
+        if reply not in {"y", "yes", ""}:
+            raise exceptions.OperationAbortedError("Operation was cancelled.")
diff --git a/notebooks/experimental/semantic_operators.ipynb b/notebooks/experimental/semantic_operators.ipynb
index 374236e152..8a2f083419 100644
--- a/notebooks/experimental/semantic_operators.ipynb
+++ b/notebooks/experimental/semantic_operators.ipynb
@@ -153,7 +153,43 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# API Syntax"
+    "*Note*: semantic operators could be expensive over a large set of data. As a result, our team added this option `bigframes.options.compute.sem_ops_confirmation_threshold` at `version 1.31.0` so that the BigQuery Dataframe will ask for your confirmation if the amount of data to be processed is too large. If the amount of rows exceeds your threshold, you will see a prompt for your keyboard input -- 'y' to proceed and 'n' to abort. If you abort the operation, no LLM processing will be done.\n",
+    "\n",
+    "The default threshold is 0, which means the operators will always ask for confirmations. You are free to adjust the value as needed. You can also set the threshold to `None` to disable this feature."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
+    "    bigframes.options.compute.semantic_ops_confirmation_threshold = 100"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you would like your operations to fail automatically when the data is too large, set `bigframes.options.compute.semantic_ops_threshold_autofail` to `True`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
+    "#     bigframes.options.compute.semantic_ops_threshold_autofail = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# The API"
    ]
   },
   {
@@ -181,7 +217,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -239,7 +275,7 @@
        "[3 rows x 2 columns]"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -263,7 +299,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -317,7 +353,7 @@
        "[1 rows x 2 columns]"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -351,7 +387,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -409,7 +445,7 @@
        "[3 rows x 2 columns]"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -431,7 +467,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -504,7 +540,7 @@
        "[3 rows x 3 columns]"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -531,7 +567,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -548,7 +584,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -620,7 +656,7 @@
        "[4 rows x 2 columns]"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -633,7 +669,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "!! **Important:** Semantic join can trigger probihitively expensitve operations! This operation first cross joins two dataframes, then invokes semantic filter on each row. That means if you have two dataframes of sizes `M` and `N`, the total amount of queries sent to the LLM is on the scale of `M * N`. Therefore, our team has added a parameter `max_rows`, a threshold that guards against unexpected expensive calls. With this parameter, the operator first calculates the size of your cross-joined data, and compares it with the threshold. If the size exceeds your threshold, the fuction will abort early with a `ValueError`. You can manually set the value of `max_rows` to raise or lower the threshold."
+    "!! **Important:** Semantic join can trigger probihitively expensitve operations! This operation first cross joins two dataframes, then invokes semantic filter on each row. That means if you have two dataframes of sizes `M` and `N`, the total amount of queries sent to the LLM is on the scale of `M * N`. "
    ]
   },
   {
@@ -654,7 +690,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -670,7 +706,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -754,7 +790,7 @@
        "[6 rows x 2 columns]"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -781,7 +817,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -855,7 +891,7 @@
        "[7 rows x 1 columns]"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -884,7 +920,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -903,7 +939,7 @@
        "Name: Movies, dtype: string"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -936,7 +972,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -952,7 +988,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -1009,7 +1045,7 @@
        "[2 rows x 1 columns]"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1041,7 +1077,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -1105,7 +1141,7 @@
        "[5 rows x 1 columns]"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1124,7 +1160,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -1188,7 +1224,7 @@
        "[2 rows x 2 columns]"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1222,7 +1258,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1239,7 +1275,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -1325,7 +1361,7 @@
        "[5 rows x 3 columns]"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1338,7 +1374,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "!! **Important** Like semantic join, this operator can also be very expensive. To guard against unexpected processing of large dataset, use the `max_rows` parameter to specify a threshold. "
+    "!! **Important** Like semantic join, this operator can also be very expensive. To guard against unexpected processing of large dataset, use the `bigframes.options.compute.sem_ops_confirmation_threshold` option to specify a threshold. "
    ]
   },
   {
@@ -1357,7 +1393,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1373,7 +1409,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -1415,17 +1451,17 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>Smartphone</td>\n",
-       "      <td>2</td>\n",
+       "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Laptop</td>\n",
-       "      <td>2</td>\n",
+       "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>Coffee Maker</td>\n",
-       "      <td>2</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1444,16 +1480,16 @@
       ],
       "text/plain": [
        "        Product  Cluster ID\n",
-       "0    Smartphone           2\n",
-       "1        Laptop           2\n",
-       "2  Coffee Maker           2\n",
+       "0    Smartphone           3\n",
+       "1        Laptop           3\n",
+       "2  Coffee Maker           1\n",
        "3       T-shirt           2\n",
        "4         Jeans           2\n",
        "\n",
        "[5 rows x 2 columns]"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1487,7 +1523,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -1836,7 +1872,7 @@
        "[3000 rows x 6 columns]"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1855,16 +1891,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "2555"
+       "2556"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1883,16 +1919,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "390.61878669276047"
+       "390.29068857589976"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1901,6 +1937,23 @@
     "hacker_news_with_texts['text'].str.len().mean()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[Optional] You can raise the confirmation threshold for a smoother experience."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
+    "    bigframes.options.compute.semantic_ops_confirmation_threshold = 5000"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1910,9 +1963,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This operation will process about 2556 rows. Proceed? [Y/n]\n"
+     ]
+    },
     {
      "name": "stderr",
      "output_type": "stream",
@@ -1961,7 +2021,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>419</th>\n",
+       "      <th>420</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Well last time I got angry down votes for sayi...</td>\n",
        "      <td>drieddust</td>\n",
@@ -1970,7 +2030,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>812</th>\n",
+       "      <th>814</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>New iPhone should be announced on September. L...</td>\n",
        "      <td>meerita</td>\n",
@@ -1979,7 +2039,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1512</th>\n",
+       "      <th>1515</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Why would this take a week? i(phone)OS was ori...</td>\n",
        "      <td>TheOtherHobbes</td>\n",
@@ -1988,7 +2048,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1559</th>\n",
+       "      <th>1562</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>&amp;gt;or because Apple drama brings many clicks?...</td>\n",
        "      <td>weberer</td>\n",
@@ -2004,22 +2064,22 @@
       "text/plain": [
        "     title                                               text              by  \\\n",
        "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
-       "419   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
-       "812   <NA>  New iPhone should be announced on September. L...         meerita   \n",
-       "1512  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
-       "1559  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
+       "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
+       "814   <NA>  New iPhone should be announced on September. L...         meerita   \n",
+       "1515  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
+       "1562  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
        "\n",
        "      score                  timestamp     type  \n",
        "9      <NA>  2023-04-21 16:45:13+00:00  comment  \n",
-       "419    <NA>  2021-01-11 19:27:27+00:00  comment  \n",
-       "812    <NA>  2019-07-30 20:54:42+00:00  comment  \n",
-       "1512   <NA>  2021-06-08 09:25:24+00:00  comment  \n",
-       "1559   <NA>  2022-09-05 13:16:02+00:00  comment  \n",
+       "420    <NA>  2021-01-11 19:27:27+00:00  comment  \n",
+       "814    <NA>  2019-07-30 20:54:42+00:00  comment  \n",
+       "1515   <NA>  2021-06-08 09:25:24+00:00  comment  \n",
+       "1562   <NA>  2022-09-05 13:16:02+00:00  comment  \n",
        "\n",
        "[5 rows x 6 columns]"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2051,7 +2111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -2104,7 +2164,7 @@
        "      <td>Frustrated, but hopeful.</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>419</th>\n",
+       "      <th>420</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Well last time I got angry down votes for sayi...</td>\n",
        "      <td>drieddust</td>\n",
@@ -2114,7 +2174,7 @@
        "      <td>Frustrated and angry.</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>812</th>\n",
+       "      <th>814</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>New iPhone should be announced on September. L...</td>\n",
        "      <td>meerita</td>\n",
@@ -2124,7 +2184,7 @@
        "      <td>Excited anticipation.</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1512</th>\n",
+       "      <th>1515</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Why would this take a week? i(phone)OS was ori...</td>\n",
        "      <td>TheOtherHobbes</td>\n",
@@ -2134,7 +2194,7 @@
        "      <td>Frustrated, critical, obvious.</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1559</th>\n",
+       "      <th>1562</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>&amp;gt;or because Apple drama brings many clicks?...</td>\n",
        "      <td>weberer</td>\n",
@@ -2151,34 +2211,34 @@
       "text/plain": [
        "     title                                               text              by  \\\n",
        "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
-       "419   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
-       "812   <NA>  New iPhone should be announced on September. L...         meerita   \n",
-       "1512  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
-       "1559  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
+       "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
+       "814   <NA>  New iPhone should be announced on September. L...         meerita   \n",
+       "1515  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
+       "1562  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
        "\n",
        "      score                  timestamp     type  \\\n",
        "9      <NA>  2023-04-21 16:45:13+00:00  comment   \n",
-       "419    <NA>  2021-01-11 19:27:27+00:00  comment   \n",
-       "812    <NA>  2019-07-30 20:54:42+00:00  comment   \n",
-       "1512   <NA>  2021-06-08 09:25:24+00:00  comment   \n",
-       "1559   <NA>  2022-09-05 13:16:02+00:00  comment   \n",
+       "420    <NA>  2021-01-11 19:27:27+00:00  comment   \n",
+       "814    <NA>  2019-07-30 20:54:42+00:00  comment   \n",
+       "1515   <NA>  2021-06-08 09:25:24+00:00  comment   \n",
+       "1562   <NA>  2022-09-05 13:16:02+00:00  comment   \n",
        "\n",
        "                             sentiment  \n",
        "9           Frustrated, but hopeful. \n",
        "  \n",
-       "419            Frustrated and angry. \n",
+       "420            Frustrated and angry. \n",
        "  \n",
-       "812            Excited anticipation. \n",
+       "814            Excited anticipation. \n",
        "  \n",
-       "1512  Frustrated, critical, obvious. \n",
+       "1515  Frustrated, critical, obvious. \n",
        "  \n",
-       "1559     Negative, clickbait, Apple. \n",
+       "1562     Negative, clickbait, Apple. \n",
        "  \n",
        "\n",
        "[5 rows x 7 columns]"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2196,14 +2256,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3577: UserWarning: Reading cached table from 2024-12-27 21:39:10.129973+00:00 to avoid incompatibilies with previous reads of this table. To read the latest version, set `use_cache=False` or close the current session with Session.close() or bigframes.pandas.close_session().\n",
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3577: UserWarning: Reading cached table from 2025-01-03 01:18:29.080474+00:00 to avoid incompatibilies with previous reads of this table. To read the latest version, set `use_cache=False` or close the current session with Session.close() or bigframes.pandas.close_session().\n",
       "  exec(code_obj, self.user_global_ns, self.user_ns)\n"
      ]
     },
@@ -2553,7 +2613,7 @@
        "[3000 rows x 6 columns]"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2565,9 +2625,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This operation will process about 3000 rows. Proceed? [Y/n]\n"
+     ]
+    },
     {
      "name": "stderr",
      "output_type": "stream",
@@ -2643,7 +2710,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>208</th>\n",
+       "      <th>209</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>I like the idea of moving that arrow the way h...</td>\n",
        "      <td>rattray</td>\n",
@@ -2652,7 +2719,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>227</th>\n",
+       "      <th>228</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>I don&amp;#x27;t understand why a beginner would s...</td>\n",
        "      <td>wolco</td>\n",
@@ -2661,7 +2728,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>289</th>\n",
+       "      <th>290</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>I leaerned more with one minute of this than a...</td>\n",
        "      <td>agumonkey</td>\n",
@@ -2670,7 +2737,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>302</th>\n",
+       "      <th>303</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>I've suggested a &lt;i&gt;rationale&lt;/i&gt; for the tabo...</td>\n",
        "      <td>mechanical_fish</td>\n",
@@ -2679,7 +2746,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>311</th>\n",
+       "      <th>312</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Do you have any reference for this?&lt;p&gt;I&amp;#x27;m...</td>\n",
        "      <td>banashark</td>\n",
@@ -2688,7 +2755,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>321</th>\n",
+       "      <th>322</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Default search scope is an option in the Finde...</td>\n",
        "      <td>kitsunesoba</td>\n",
@@ -2697,7 +2764,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>390</th>\n",
+       "      <th>391</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Orthogonality and biology aren&amp;#x27;t friends.</td>\n",
        "      <td>agumonkey</td>\n",
@@ -2706,7 +2773,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>395</th>\n",
+       "      <th>396</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>I chose some random physics book that was good...</td>\n",
        "      <td>prawn</td>\n",
@@ -2715,7 +2782,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>423</th>\n",
+       "      <th>424</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Seeing this get huge on Twitter. It&amp;#x27;s the...</td>\n",
        "      <td>shenanigoat</td>\n",
@@ -2724,7 +2791,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>427</th>\n",
+       "      <th>428</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Looking through the comments there are a numbe...</td>\n",
        "      <td>moomin</td>\n",
@@ -2733,7 +2800,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>428</th>\n",
+       "      <th>429</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Legacy media is a tough business. GBTC is payi...</td>\n",
        "      <td>arcticbull</td>\n",
@@ -2742,7 +2809,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>435</th>\n",
+       "      <th>436</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Same thing if you sell unsafe food, yet we hav...</td>\n",
        "      <td>jabradoodle</td>\n",
@@ -2751,7 +2818,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>437</th>\n",
+       "      <th>438</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>There was briefly a thing called HSCSD (&amp;quot;...</td>\n",
        "      <td>LeoPanthera</td>\n",
@@ -2760,7 +2827,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>445</th>\n",
+       "      <th>446</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>&amp;gt; This article is a bit comical to read and...</td>\n",
        "      <td>lapcat</td>\n",
@@ -2769,7 +2836,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>452</th>\n",
+       "      <th>453</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Large positions are most likely sold off in sm...</td>\n",
        "      <td>meowkit</td>\n",
@@ -2778,7 +2845,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>506</th>\n",
+       "      <th>507</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>A US-based VPN (or really any VPN) is only goi...</td>\n",
        "      <td>RandomBacon</td>\n",
@@ -2787,7 +2854,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>542</th>\n",
+       "      <th>543</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>&lt;a href=\"https:&amp;#x2F;&amp;#x2F;codeberg.org&amp;#x2F;A...</td>\n",
        "      <td>ElectronBadger</td>\n",
@@ -2796,7 +2863,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>564</th>\n",
+       "      <th>565</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>It’s much harder for people without hands to w...</td>\n",
        "      <td>Aeolun</td>\n",
@@ -2805,7 +2872,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>611</th>\n",
+       "      <th>612</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>So by using ADMIN_SL0T instead was it just set...</td>\n",
        "      <td>minitoar</td>\n",
@@ -2814,7 +2881,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>658</th>\n",
+       "      <th>660</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Outstanding!</td>\n",
        "      <td>cafard</td>\n",
@@ -2823,7 +2890,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>671</th>\n",
+       "      <th>673</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>On the other hand, something can be said for &amp;...</td>\n",
        "      <td>babby</td>\n",
@@ -2842,87 +2909,87 @@
        "98                                              <NA>   \n",
        "137  FDA reverses marketing ban on Juul e-cigarettes   \n",
        "188                                             <NA>   \n",
-       "208                                             <NA>   \n",
-       "227                                             <NA>   \n",
-       "289                                             <NA>   \n",
-       "302                                             <NA>   \n",
-       "311                                             <NA>   \n",
-       "321                                             <NA>   \n",
-       "390                                             <NA>   \n",
-       "395                                             <NA>   \n",
-       "423                                             <NA>   \n",
-       "427                                             <NA>   \n",
+       "209                                             <NA>   \n",
+       "228                                             <NA>   \n",
+       "290                                             <NA>   \n",
+       "303                                             <NA>   \n",
+       "312                                             <NA>   \n",
+       "322                                             <NA>   \n",
+       "391                                             <NA>   \n",
+       "396                                             <NA>   \n",
+       "424                                             <NA>   \n",
        "428                                             <NA>   \n",
-       "435                                             <NA>   \n",
-       "437                                             <NA>   \n",
-       "445                                             <NA>   \n",
-       "452                                             <NA>   \n",
-       "506                                             <NA>   \n",
-       "542                                             <NA>   \n",
-       "564                                             <NA>   \n",
-       "611                                             <NA>   \n",
-       "658                                             <NA>   \n",
-       "671                                             <NA>   \n",
+       "429                                             <NA>   \n",
+       "436                                             <NA>   \n",
+       "438                                             <NA>   \n",
+       "446                                             <NA>   \n",
+       "453                                             <NA>   \n",
+       "507                                             <NA>   \n",
+       "543                                             <NA>   \n",
+       "565                                             <NA>   \n",
+       "612                                             <NA>   \n",
+       "660                                             <NA>   \n",
+       "673                                             <NA>   \n",
        "\n",
        "                                                  text               by  \\\n",
        "24                                                <NA>   GiraffeNecktie   \n",
        "98   i resisted switching to chrome for months beca...         catshirt   \n",
        "137                                               <NA>        anigbrowl   \n",
        "188  I think it&#x27;s more than hazing. It may be ...    bayesianhorse   \n",
-       "208  I like the idea of moving that arrow the way h...          rattray   \n",
-       "227  I don&#x27;t understand why a beginner would s...            wolco   \n",
-       "289  I leaerned more with one minute of this than a...        agumonkey   \n",
-       "302  I've suggested a <i>rationale</i> for the tabo...  mechanical_fish   \n",
-       "311  Do you have any reference for this?<p>I&#x27;m...        banashark   \n",
-       "321  Default search scope is an option in the Finde...      kitsunesoba   \n",
-       "390     Orthogonality and biology aren&#x27;t friends.        agumonkey   \n",
-       "395  I chose some random physics book that was good...            prawn   \n",
-       "423  Seeing this get huge on Twitter. It&#x27;s the...      shenanigoat   \n",
-       "427  Looking through the comments there are a numbe...           moomin   \n",
-       "428  Legacy media is a tough business. GBTC is payi...       arcticbull   \n",
-       "435  Same thing if you sell unsafe food, yet we hav...      jabradoodle   \n",
-       "437  There was briefly a thing called HSCSD (&quot;...      LeoPanthera   \n",
-       "445  &gt; This article is a bit comical to read and...           lapcat   \n",
-       "452  Large positions are most likely sold off in sm...          meowkit   \n",
-       "506  A US-based VPN (or really any VPN) is only goi...      RandomBacon   \n",
-       "542  <a href=\"https:&#x2F;&#x2F;codeberg.org&#x2F;A...   ElectronBadger   \n",
-       "564  It’s much harder for people without hands to w...           Aeolun   \n",
-       "611  So by using ADMIN_SL0T instead was it just set...         minitoar   \n",
-       "658                                       Outstanding!           cafard   \n",
-       "671  On the other hand, something can be said for &...            babby   \n",
+       "209  I like the idea of moving that arrow the way h...          rattray   \n",
+       "228  I don&#x27;t understand why a beginner would s...            wolco   \n",
+       "290  I leaerned more with one minute of this than a...        agumonkey   \n",
+       "303  I've suggested a <i>rationale</i> for the tabo...  mechanical_fish   \n",
+       "312  Do you have any reference for this?<p>I&#x27;m...        banashark   \n",
+       "322  Default search scope is an option in the Finde...      kitsunesoba   \n",
+       "391     Orthogonality and biology aren&#x27;t friends.        agumonkey   \n",
+       "396  I chose some random physics book that was good...            prawn   \n",
+       "424  Seeing this get huge on Twitter. It&#x27;s the...      shenanigoat   \n",
+       "428  Looking through the comments there are a numbe...           moomin   \n",
+       "429  Legacy media is a tough business. GBTC is payi...       arcticbull   \n",
+       "436  Same thing if you sell unsafe food, yet we hav...      jabradoodle   \n",
+       "438  There was briefly a thing called HSCSD (&quot;...      LeoPanthera   \n",
+       "446  &gt; This article is a bit comical to read and...           lapcat   \n",
+       "453  Large positions are most likely sold off in sm...          meowkit   \n",
+       "507  A US-based VPN (or really any VPN) is only goi...      RandomBacon   \n",
+       "543  <a href=\"https:&#x2F;&#x2F;codeberg.org&#x2F;A...   ElectronBadger   \n",
+       "565  It’s much harder for people without hands to w...           Aeolun   \n",
+       "612  So by using ADMIN_SL0T instead was it just set...         minitoar   \n",
+       "660                                       Outstanding!           cafard   \n",
+       "673  On the other hand, something can be said for &...            babby   \n",
        "\n",
        "     score                  timestamp     type  \n",
        "24     249  2011-04-19 14:25:17+00:00    story  \n",
        "98    <NA>  2011-04-06 08:02:24+00:00  comment  \n",
        "137      2  2024-06-06 16:42:40+00:00    story  \n",
        "188   <NA>  2015-06-18 16:42:53+00:00  comment  \n",
-       "208   <NA>  2015-06-08 02:15:30+00:00  comment  \n",
-       "227   <NA>  2019-02-03 14:35:43+00:00  comment  \n",
-       "289   <NA>  2016-07-16 06:19:39+00:00  comment  \n",
-       "302   <NA>  2008-12-17 04:42:02+00:00  comment  \n",
-       "311   <NA>  2023-11-13 19:57:00+00:00  comment  \n",
-       "321   <NA>  2017-08-13 17:15:19+00:00  comment  \n",
-       "390   <NA>  2016-04-24 16:33:41+00:00  comment  \n",
-       "395   <NA>  2011-03-27 22:29:51+00:00  comment  \n",
-       "423   <NA>  2016-01-09 03:04:22+00:00  comment  \n",
-       "427   <NA>  2024-10-01 14:37:04+00:00  comment  \n",
-       "428   <NA>  2021-04-16 16:30:33+00:00  comment  \n",
-       "435   <NA>  2023-08-03 20:47:52+00:00  comment  \n",
-       "437   <NA>  2019-02-11 19:49:29+00:00  comment  \n",
-       "445   <NA>  2023-01-02 16:00:49+00:00  comment  \n",
-       "452   <NA>  2021-01-27 23:22:48+00:00  comment  \n",
-       "506   <NA>  2019-04-05 00:58:58+00:00  comment  \n",
-       "542   <NA>  2023-12-13 08:13:15+00:00  comment  \n",
-       "564   <NA>  2024-05-03 11:58:13+00:00  comment  \n",
-       "611   <NA>  2021-03-05 16:07:56+00:00  comment  \n",
-       "658   <NA>  2022-06-09 09:51:54+00:00  comment  \n",
-       "671   <NA>  2013-08-12 00:31:02+00:00  comment  \n",
+       "209   <NA>  2015-06-08 02:15:30+00:00  comment  \n",
+       "228   <NA>  2019-02-03 14:35:43+00:00  comment  \n",
+       "290   <NA>  2016-07-16 06:19:39+00:00  comment  \n",
+       "303   <NA>  2008-12-17 04:42:02+00:00  comment  \n",
+       "312   <NA>  2023-11-13 19:57:00+00:00  comment  \n",
+       "322   <NA>  2017-08-13 17:15:19+00:00  comment  \n",
+       "391   <NA>  2016-04-24 16:33:41+00:00  comment  \n",
+       "396   <NA>  2011-03-27 22:29:51+00:00  comment  \n",
+       "424   <NA>  2016-01-09 03:04:22+00:00  comment  \n",
+       "428   <NA>  2024-10-01 14:37:04+00:00  comment  \n",
+       "429   <NA>  2021-04-16 16:30:33+00:00  comment  \n",
+       "436   <NA>  2023-08-03 20:47:52+00:00  comment  \n",
+       "438   <NA>  2019-02-11 19:49:29+00:00  comment  \n",
+       "446   <NA>  2023-01-02 16:00:49+00:00  comment  \n",
+       "453   <NA>  2021-01-27 23:22:48+00:00  comment  \n",
+       "507   <NA>  2019-04-05 00:58:58+00:00  comment  \n",
+       "543   <NA>  2023-12-13 08:13:15+00:00  comment  \n",
+       "565   <NA>  2024-05-03 11:58:13+00:00  comment  \n",
+       "612   <NA>  2021-03-05 16:07:56+00:00  comment  \n",
+       "660   <NA>  2022-06-09 09:51:54+00:00  comment  \n",
+       "673   <NA>  2013-08-12 00:31:02+00:00  comment  \n",
        "...\n",
        "\n",
        "[123 rows x 6 columns]"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/tests/system/large/operations/test_semantics.py b/tests/system/large/operations/test_semantics.py
index 7602be2fca..20219ef46e 100644
--- a/tests/system/large/operations/test_semantics.py
+++ b/tests/system/large/operations/test_semantics.py
@@ -12,22 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from contextlib import nullcontext
+from unittest.mock import patch
+
 import pandas as pd
 import pandas.testing
 import pytest
 
 import bigframes
-import bigframes.dataframe as dataframe
-import bigframes.dtypes as dtypes
+from bigframes import dataframe, dtypes, exceptions
+
+EXPERIMENT_OPTION = "experiments.semantic_operators"
+THRESHOLD_OPTION = "compute.semantic_ops_confirmation_threshold"
 
 
 def test_semantics_experiment_off_raise_error():
-    bigframes.options.experiments.semantic_operators = False
     df = dataframe.DataFrame(
         {"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]}
     )
 
-    with pytest.raises(NotImplementedError):
+    with bigframes.option_context(EXPERIMENT_OPTION, False), pytest.raises(
+        NotImplementedError
+    ):
         df.semantics
 
 
@@ -44,7 +50,6 @@ def test_semantics_experiment_off_raise_error():
     ],
 )
 def test_agg(session, gemini_flash_model, max_agg_rows, cluster_column):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "Movies": [
@@ -61,20 +66,66 @@ def test_agg(session, gemini_flash_model, max_agg_rows, cluster_column):
         session=session,
     )
     instruction = "Find the shared first name of actors in {Movies}. One word answer."
-    actual_s = df.semantics.agg(
-        instruction,
-        model=gemini_flash_model,
-        max_agg_rows=max_agg_rows,
-        cluster_column=cluster_column,
-    ).to_pandas()
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        50,
+    ):
+        actual_s = df.semantics.agg(
+            instruction,
+            model=gemini_flash_model,
+            max_agg_rows=max_agg_rows,
+            cluster_column=cluster_column,
+        ).to_pandas()
 
     expected_s = pd.Series(["Leonardo \n"], dtype=dtypes.STRING_DTYPE)
     expected_s.name = "Movies"
     pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_agg_with_confirmation(session, gemini_flash_model, reply, monkeypatch):
+    df = dataframe.DataFrame(
+        data={
+            "Movies": [
+                "Titanic",
+                "The Wolf of Wall Street",
+                "Killers of the Flower Moon",
+                "The Revenant",
+                "Inception",
+                "Shuttle Island",
+                "The Great Gatsby",
+            ],
+            "Years": [1997, 2013, 2023, 2015, 2010, 2010, 2013],
+        },
+        session=session,
+    )
+    instruction = "Find the shared first name of actors in {Movies}. One word answer."
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df.semantics.agg(
+            instruction,
+            model=gemini_flash_model,
+        )
+
+
 def test_agg_w_int_column(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "Movies": [
@@ -86,10 +137,17 @@ def test_agg_w_int_column(session, gemini_flash_model):
         session=session,
     )
     instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Answer with the year only."
-    actual_s = df.semantics.agg(
-        instruction,
-        model=gemini_flash_model,
-    ).to_pandas()
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_s = df.semantics.agg(
+            instruction,
+            model=gemini_flash_model,
+        ).to_pandas()
 
     expected_s = pd.Series(["2013 \n"], dtype=dtypes.STRING_DTYPE)
     expected_s.name = "Years"
@@ -117,7 +175,6 @@ def test_agg_w_int_column(session, gemini_flash_model):
     ],
 )
 def test_agg_invalid_instruction_raise_error(instruction, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "Movies": [
@@ -128,7 +185,14 @@ def test_agg_invalid_instruction_raise_error(instruction, gemini_flash_model):
             "Year": [1997, 2013, 2023],
         },
     )
-    df.semantics.agg(instruction, gemini_flash_model)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        df.semantics.agg(instruction, gemini_flash_model)
 
 
 @pytest.mark.parametrize(
@@ -145,7 +209,6 @@ def test_agg_invalid_instruction_raise_error(instruction, gemini_flash_model):
     ],
 )
 def test_agg_invalid_cluster_column_raise_error(gemini_flash_model, cluster_column):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "Movies": [
@@ -157,7 +220,14 @@ def test_agg_invalid_cluster_column_raise_error(gemini_flash_model, cluster_colu
         },
     )
     instruction = "Find the shared first name of actors in {Movies}. One word answer."
-    df.semantics.agg(instruction, gemini_flash_model, cluster_column=cluster_column)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        df.semantics.agg(instruction, gemini_flash_model, cluster_column=cluster_column)
 
 
 @pytest.mark.parametrize(
@@ -168,7 +238,6 @@ def test_agg_invalid_cluster_column_raise_error(gemini_flash_model, cluster_colu
     ],
 )
 def test_cluster_by(session, text_embedding_generator, n_clusters):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         (
             {
@@ -186,28 +255,82 @@ def test_cluster_by(session, text_embedding_generator, n_clusters):
         session=session,
     )
     output_column = "cluster id"
-    result = df.semantics.cluster_by(
-        "Item",
-        output_column,
-        text_embedding_generator,
-        n_clusters=n_clusters,
-    )
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        result = df.semantics.cluster_by(
+            "Item",
+            output_column,
+            text_embedding_generator,
+            n_clusters=n_clusters,
+        )
 
     assert output_column in result
     # In rare cases, it's possible to have fewer than K clusters due to randomness.
     assert len(result[output_column].unique()) <= n_clusters
 
 
-def test_cluster_by_invalid_column(session, text_embedding_generator):
-    bigframes.options.experiments.semantic_operators = True
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_cluster_by_with_confirmation(
+    session, text_embedding_generator, reply, monkeypatch
+):
+    df = dataframe.DataFrame(
+        (
+            {
+                "Item": [
+                    "Orange",
+                    "Cantaloupe",
+                    "Watermelon",
+                    "Chicken",
+                    "Duck",
+                    "Hen",
+                    "Rooster",
+                ]
+            }
+        ),
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df.semantics.cluster_by(
+            "Item",
+            "cluster id",
+            text_embedding_generator,
+            n_clusters=2,
+        )
+
 
+def test_cluster_by_invalid_column(session, text_embedding_generator):
     df = dataframe.DataFrame(
         ({"Product": ["Smartphone", "Laptop", "Coffee Maker", "T-shirt", "Jeans"]}),
         session=session,
     )
-
     output_column = "cluster id"
-    with pytest.raises(ValueError):
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.cluster_by(
             "unknown_column",
             output_column,
@@ -217,15 +340,18 @@ def test_cluster_by_invalid_column(session, text_embedding_generator):
 
 
 def test_cluster_by_invalid_model(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
-
     df = dataframe.DataFrame(
         ({"Product": ["Smartphone", "Laptop", "Coffee Maker", "T-shirt", "Jeans"]}),
         session=session,
     )
-
     output_column = "cluster id"
-    with pytest.raises(TypeError):
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         df.semantics.cluster_by(
             "Product",
             output_column,
@@ -235,7 +361,6 @@ def test_cluster_by_invalid_model(session, gemini_flash_model):
 
 
 def test_filter(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "country": ["USA", "Germany"],
@@ -245,9 +370,15 @@ def test_filter(session, gemini_flash_model):
         session=session,
     )
 
-    actual_df = df.semantics.filter(
-        "{city} is the capital of {country} in {year}", gemini_flash_model
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_df = df.semantics.filter(
+            "{city} is the capital of {country} in {year}", gemini_flash_model
+        ).to_pandas()
 
     expected_df = pd.DataFrame(
         {"country": ["Germany"], "city": ["Berlin"], "year": [2024]}, index=[1]
@@ -257,16 +388,52 @@ def test_filter(session, gemini_flash_model):
     )
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_filter_with_confirmation(session, gemini_flash_model, reply, monkeypatch):
+    df = dataframe.DataFrame(
+        data={
+            "country": ["USA", "Germany"],
+            "city": ["Seattle", "Berlin"],
+            "year": [2023, 2024],
+        },
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df.semantics.filter(
+            "{city} is the capital of {country} in {year}", gemini_flash_model
+        )
+
+
 def test_filter_single_column_reference(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]},
         session=session,
     )
 
-    actual_df = df.semantics.filter(
-        "{country} is in Europe", gemini_flash_model
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_df = df.semantics.filter(
+            "{country} is in Europe", gemini_flash_model
+        ).to_pandas()
 
     expected_df = pd.DataFrame({"country": ["Germany"], "city": ["Berlin"]}, index=[1])
     pandas.testing.assert_frame_equal(
@@ -295,25 +462,32 @@ def test_filter_single_column_reference(session, gemini_flash_model):
     ],
 )
 def test_filter_invalid_instruction_raise_error(instruction, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame({"id": [1, 2], "city": ["Seattle", "Berlin"]})
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.filter(instruction, gemini_flash_model)
 
 
 def test_filter_invalid_model_raise_error():
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         {"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]}
     )
 
-    with pytest.raises(TypeError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         df.semantics.filter("{city} is the capital of {country}", None)
 
 
 def test_map(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "ingredient_1": ["Burger Bun", "Soy Bean"],
@@ -323,11 +497,17 @@ def test_map(session, gemini_flash_model):
         session=session,
     )
 
-    actual_df = df.semantics.map(
-        "What is the {gluten-free} food made from {ingredient_1} and {ingredient_2}? One word only.",
-        "food",
-        gemini_flash_model,
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_df = df.semantics.map(
+            "What is the {gluten-free} food made from {ingredient_1} and {ingredient_2}? One word only.",
+            "food",
+            gemini_flash_model,
+        ).to_pandas()
     # Result sanitation
     actual_df["food"] = actual_df["food"].str.strip().str.lower()
 
@@ -348,6 +528,39 @@ def test_map(session, gemini_flash_model):
     )
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_map_with_confirmation(session, gemini_flash_model, reply, monkeypatch):
+    df = dataframe.DataFrame(
+        data={
+            "ingredient_1": ["Burger Bun", "Soy Bean"],
+            "ingredient_2": ["Beef Patty", "Bittern"],
+            "gluten-free": [True, True],
+        },
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df.semantics.map(
+            "What is the {gluten-free} food made from {ingredient_1} and {ingredient_2}? One word only.",
+            "food",
+            gemini_flash_model,
+        )
+
+
 @pytest.mark.parametrize(
     "instruction",
     [
@@ -369,7 +582,6 @@ def test_map(session, gemini_flash_model):
     ],
 )
 def test_map_invalid_instruction_raise_error(instruction, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "id": [1, 2],
@@ -378,12 +590,16 @@ def test_map_invalid_instruction_raise_error(instruction, gemini_flash_model):
         }
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.map(instruction, "food", gemini_flash_model)
 
 
 def test_map_invalid_model_raise_error():
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "ingredient_1": ["Burger Bun", "Soy Bean"],
@@ -391,7 +607,12 @@ def test_map_invalid_model_raise_error():
         },
     )
 
-    with pytest.raises(TypeError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         df.semantics.map(
             "What is the food made from {ingredient_1} and {ingredient_2}? One word only.",
             "food",
@@ -414,7 +635,6 @@ def test_map_invalid_model_raise_error():
     ],
 )
 def test_join(instruction, session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     cities = dataframe.DataFrame(
         data={
             "city": ["Seattle", "Berlin"],
@@ -426,11 +646,17 @@ def test_join(instruction, session, gemini_flash_model):
         session=session,
     )
 
-    actual_df = cities.semantics.join(
-        countries,
-        instruction,
-        gemini_flash_model,
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_df = cities.semantics.join(
+            countries,
+            instruction,
+            gemini_flash_model,
+        ).to_pandas()
 
     expected_df = pd.DataFrame(
         {
@@ -447,8 +673,42 @@ def test_join(instruction, session, gemini_flash_model):
     )
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch):
+    cities = dataframe.DataFrame(
+        data={
+            "city": ["Seattle", "Berlin"],
+        },
+        session=session,
+    )
+    countries = dataframe.DataFrame(
+        data={"country": ["USA", "UK", "Germany"]},
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        cities.semantics.join(
+            countries,
+            "{city} is in {country}",
+            gemini_flash_model,
+        )
+
+
 def test_self_join(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     animals = dataframe.DataFrame(
         data={
             "animal": ["spider", "capybara"],
@@ -456,11 +716,17 @@ def test_self_join(session, gemini_flash_model):
         session=session,
     )
 
-    actual_df = animals.semantics.join(
-        animals,
-        "{left.animal} is heavier than {right.animal}",
-        gemini_flash_model,
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_df = animals.semantics.join(
+            animals,
+            "{left.animal} is heavier than {right.animal}",
+            gemini_flash_model,
+        ).to_pandas()
 
     expected_df = pd.DataFrame(
         {
@@ -477,25 +743,6 @@ def test_self_join(session, gemini_flash_model):
     )
 
 
-def test_join_data_too_large_raise_error(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
-    cities = dataframe.DataFrame(
-        data={
-            "city": ["Seattle", "Berlin"],
-        },
-        session=session,
-    )
-    countries = dataframe.DataFrame(
-        data={"country": ["USA", "UK", "Germany"]},
-        session=session,
-    )
-
-    with pytest.raises(ValueError):
-        cities.semantics.join(
-            countries, "{city} belongs to {country}", gemini_flash_model, max_rows=1
-        )
-
-
 @pytest.mark.parametrize(
     ("instruction", "error_pattern"),
     [
@@ -521,7 +768,6 @@ def test_join_data_too_large_raise_error(session, gemini_flash_model):
 def test_join_invalid_instruction_raise_error(
     instruction, error_pattern, gemini_flash_model
 ):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         {"city": ["Seattle", "Berlin"], "country": ["USA", "Germany"]}
     )
@@ -532,16 +778,25 @@ def test_join_invalid_instruction_raise_error(
         }
     )
 
-    with pytest.raises(ValueError, match=error_pattern):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError, match=error_pattern):
         df1.semantics.join(df2, instruction, gemini_flash_model)
 
 
 def test_join_invalid_model_raise_error():
-    bigframes.options.experiments.semantic_operators = True
     cities = dataframe.DataFrame({"city": ["Seattle", "Berlin"]})
     countries = dataframe.DataFrame({"country": ["USA", "UK", "Germany"]})
 
-    with pytest.raises(TypeError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         cities.semantics.join(countries, "{city} is in {country}", None)
 
 
@@ -553,19 +808,24 @@ def test_join_invalid_model_raise_error():
     ],
 )
 def test_search(session, text_embedding_generator, score_column):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
         session=session,
     )
 
-    actual_result = df.semantics.search(
-        "creatures",
-        "monkey",
-        top_k=2,
-        model=text_embedding_generator,
-        score_column=score_column,
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_result = df.semantics.search(
+            "creatures",
+            "monkey",
+            top_k=2,
+            model=text_embedding_generator,
+            score_column=score_column,
+        ).to_pandas()
 
     expected_result = pd.Series(
         ["baboons", "chimpanzee"], index=[2, 4], name="creatures"
@@ -583,38 +843,82 @@ def test_search(session, text_embedding_generator, score_column):
         assert score_column in actual_result.columns
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_search_with_confirmation(
+    session, text_embedding_generator, reply, monkeypatch
+):
+    df = dataframe.DataFrame(
+        data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df.semantics.search(
+            "creatures",
+            "monkey",
+            top_k=2,
+            model=text_embedding_generator,
+        )
+
+
 def test_search_invalid_column_raises_error(session, text_embedding_generator):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
         session=session,
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.search(
             "whatever", "monkey", top_k=2, model=text_embedding_generator
         )
 
 
 def test_search_invalid_model_raises_error(session):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
         session=session,
     )
 
-    with pytest.raises(TypeError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         df.semantics.search("creatures", "monkey", top_k=2, model=None)
 
 
 def test_search_invalid_top_k_raises_error(session, text_embedding_generator):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
         session=session,
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.search(
             "creatures", "monkey", top_k=0, model=text_embedding_generator
         )
@@ -628,7 +932,6 @@ def test_search_invalid_top_k_raises_error(session, text_embedding_generator):
     ],
 )
 def test_sim_join(session, text_embedding_generator, score_column):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         data={"creatures": ["salmon", "cat"]},
         session=session,
@@ -638,14 +941,20 @@ def test_sim_join(session, text_embedding_generator, score_column):
         session=session,
     )
 
-    actual_result = df1.semantics.sim_join(
-        df2,
-        left_on="creatures",
-        right_on="creatures",
-        model=text_embedding_generator,
-        top_k=1,
-        score_column=score_column,
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_result = df1.semantics.sim_join(
+            df2,
+            left_on="creatures",
+            right_on="creatures",
+            model=text_embedding_generator,
+            top_k=1,
+            score_column=score_column,
+        ).to_pandas()
 
     expected_result = pd.DataFrame(
         {"creatures": ["salmon", "cat"], "creatures_1": ["tuna", "dog"]}
@@ -663,6 +972,43 @@ def test_sim_join(session, text_embedding_generator, score_column):
         assert score_column in actual_result.columns
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_sim_join_with_confirmation(
+    session, text_embedding_generator, reply, monkeypatch
+):
+    df1 = dataframe.DataFrame(
+        data={"creatures": ["salmon", "cat"]},
+        session=session,
+    )
+    df2 = dataframe.DataFrame(
+        data={"creatures": ["dog", "tuna"]},
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df1.semantics.sim_join(
+            df2,
+            left_on="creatures",
+            right_on="creatures",
+            model=text_embedding_generator,
+            top_k=1,
+        )
+
+
 @pytest.mark.parametrize(
     ("left_on", "right_on"),
     [
@@ -673,7 +1019,6 @@ def test_sim_join(session, text_embedding_generator, score_column):
 def test_sim_join_invalid_column_raises_error(
     session, text_embedding_generator, left_on, right_on
 ):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         data={"creatures": ["salmon", "cat"]},
         session=session,
@@ -683,14 +1028,18 @@ def test_sim_join_invalid_column_raises_error(
         session=session,
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df1.semantics.sim_join(
             df2, left_on=left_on, right_on=right_on, model=text_embedding_generator
         )
 
 
 def test_sim_join_invalid_model_raises_error(session):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         data={"creatures": ["salmon", "cat"]},
         session=session,
@@ -700,14 +1049,18 @@ def test_sim_join_invalid_model_raises_error(session):
         session=session,
     )
 
-    with pytest.raises(TypeError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         df1.semantics.sim_join(
             df2, left_on="creatures", right_on="creatures", model=None
         )
 
 
 def test_sim_join_invalid_top_k_raises_error(session, text_embedding_generator):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         data={"creatures": ["salmon", "cat"]},
         session=session,
@@ -717,7 +1070,12 @@ def test_sim_join_invalid_top_k_raises_error(session, text_embedding_generator):
         session=session,
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df1.semantics.sim_join(
             df2,
             left_on="creatures",
@@ -728,7 +1086,6 @@ def test_sim_join_invalid_top_k_raises_error(session, text_embedding_generator):
 
 
 def test_sim_join_data_too_large_raises_error(session, text_embedding_generator):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         data={"creatures": ["salmon", "cat"]},
         session=session,
@@ -738,7 +1095,12 @@ def test_sim_join_data_too_large_raises_error(session, text_embedding_generator)
         session=session,
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df1.semantics.sim_join(
             df2,
             left_on="creatures",
@@ -774,7 +1136,6 @@ def test_sim_join_data_too_large_raises_error(session, text_embedding_generator)
     ],
 )
 def test_top_k_invalid_instruction_raise_error(instruction, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         {
             "Animals": ["Dog", "Cat", "Bird", "Horse"],
@@ -782,15 +1143,97 @@ def test_top_k_invalid_instruction_raise_error(instruction, gemini_flash_model):
             "index": ["a", "b", "c", "d"],
         }
     )
-    df.semantics.top_k(instruction, model=gemini_flash_model, k=2)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        df.semantics.top_k(instruction, model=gemini_flash_model, k=2)
 
 
 def test_top_k_invalid_k_raise_error(gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame({"Animals": ["Dog", "Cat", "Bird", "Horse"]})
-    with pytest.raises(ValueError):
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.top_k(
             "{Animals} are more popular as pets",
             gemini_flash_model,
             k=0,
         )
+
+
+@patch("builtins.input", return_value="")
+def test_confirm_operation__below_threshold_do_not_confirm(mock_input):
+    df = dataframe.DataFrame({})
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        3,
+    ):
+        df.semantics._confirm_operation(1)
+
+    mock_input.assert_not_called()
+
+
+@patch("builtins.input", return_value="")
+def test_confirm_operation__threshold_is_none_do_not_confirm(mock_input):
+    df = dataframe.DataFrame({})
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        None,
+    ):
+        df.semantics._confirm_operation(100)
+
+    mock_input.assert_not_called()
+
+
+@patch("builtins.input", return_value="")
+def test_confirm_operation__threshold_autofail_do_not_confirm(mock_input):
+    df = dataframe.DataFrame({})
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        1,
+        "compute.semantic_ops_threshold_autofail",
+        True,
+    ), pytest.raises(exceptions.OperationAbortedError):
+        df.semantics._confirm_operation(100)
+
+    mock_input.assert_not_called()
+
+
+@pytest.mark.parametrize(
+    ("reply", "expectation"),
+    [
+        ("y", nullcontext()),
+        ("yes", nullcontext()),
+        ("", nullcontext()),
+        ("n", pytest.raises(exceptions.OperationAbortedError)),
+        ("something", pytest.raises(exceptions.OperationAbortedError)),
+    ],
+)
+def test_confirm_operation__above_threshold_confirm(reply, expectation, monkeypatch):
+    monkeypatch.setattr("builtins.input", lambda: reply)
+    df = dataframe.DataFrame({})
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        3,
+    ), expectation as e:
+        assert df.semantics._confirm_operation(4) == e

From dd4fd2e8bafa73b4b5d99f095943bd9a757cd5b5 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei.l@outlook.com>
Date: Sat, 4 Jan 2025 01:33:45 +0000
Subject: [PATCH 6/9] docs: remove bq studio link (#1258)

* remove bq studio link

* remove a box that is added by mistake

---------

Co-authored-by: Shuowei Li <shuowei@google.com>
---
 .../generative_ai/bq_dataframes_llm_code_generation.ipynb | 6 ------
 notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb    | 6 ------
 .../bq_dataframes_ml_drug_name_generation.ipynb           | 6 ------
 notebooks/getting_started/bq_dataframes_template.ipynb    | 6 ------
 .../getting_started/getting_started_bq_dataframes.ipynb   | 8 +-------
 .../getting_started/ml_fundamentals_bq_dataframes.ipynb   | 6 ------
 .../remote_function_vertex_claude_model.ipynb             | 6 ------
 .../visualization/bq_dataframes_covid_line_graphs.ipynb   | 6 ------
 8 files changed, 1 insertion(+), 49 deletions(-)

diff --git a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
index c7c302e5a9..09e3d9c969 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
@@ -50,12 +50,6 @@
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://pantheon.corp.google.com/bigquery?project=bigframes-dev&ws=!1m7!1m6!12m5!1m3!1sbigframes-dev!2sus-central1!3se8c41ea1-e159-4865-b2b8-5b784176193f!2e2\">\n",
-        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
-        "      Open in BQ Studio\n",
-        "    </a>\n",
-        "  </td>\n",
         "</table>"
       ]
     },
diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
index b9a9c6723b..7307bb62e5 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
@@ -47,12 +47,6 @@
     "      Open in Vertex AI Workbench\n",
     "    </a>\n",
     "  </td>\n",
-    "  <td>\n",
-    "    <a href=\"https://pantheon.corp.google.com/bigquery?project=bigframes-dev&ws=!1m7!1m6!12m5!1m3!1sbigframes-dev!2sus-central1!3s06318b1a-ab57-46e4-b0a2-a0ad6665b0ee!2e2\">\n",
-    "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
-    "      Open in BQ Studio\n",
-    "    </a>\n",
-    "  </td> \n",
     "</table>"
    ]
   },
diff --git a/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb b/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb
index 4c589abf95..d7419deee0 100644
--- a/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb
@@ -49,12 +49,6 @@
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://pantheon.corp.google.com/bigquery?project=bigframes-dev&ws=!1m7!1m6!12m5!1m3!1sbigframes-dev!2sus-central1!3s4da57cb0-e53d-4bcb-bbe4-d0ad3982648e!2e2\">\n",
-        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
-        "      Open in BQ Studio\n",
-        "    </a>\n",
-        "  </td> \n",
         "</table>"
       ]
     },
diff --git a/notebooks/getting_started/bq_dataframes_template.ipynb b/notebooks/getting_started/bq_dataframes_template.ipynb
index aa12a2ed67..a04c7f7907 100644
--- a/notebooks/getting_started/bq_dataframes_template.ipynb
+++ b/notebooks/getting_started/bq_dataframes_template.ipynb
@@ -50,12 +50,6 @@
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://pantheon.corp.google.com/bigquery?ws=!1m7!1m6!12m5!1m3!1sbigframes-dev!2sus-central1!3s9cb631d0-d2c2-4352-808c-15a4f94cf082!2e2\">\n",
-        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
-        "      Open in BQ Studio\n",
-        "    </a>\n",
-        "  </td> \n",
         "</table>"
       ]
     },
diff --git a/notebooks/getting_started/getting_started_bq_dataframes.ipynb b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
index f0cc61d8d2..3a9bb26f57 100644
--- a/notebooks/getting_started/getting_started_bq_dataframes.ipynb
+++ b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
@@ -50,12 +50,6 @@
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://pantheon.corp.google.com/bigquery?ws=!1m7!1m6!12m5!1m3!1sbigframes-dev!2sus-central1!3sbf431d8d-e7a6-453f-93e6-7b918f348d7f!2e2\">\n",
-        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
-        "      Open in BQ Studio\n",
-        "    </a>\n",
-        "  </td> \n",
         "</table>"
       ]
     },
@@ -1798,7 +1792,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.11.1"
+      "version": "3.10.15"
     }
   },
   "nbformat": 4,
diff --git a/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb b/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb
index d8e277d6a8..e3c01058ea 100644
--- a/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb
+++ b/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb
@@ -50,12 +50,6 @@
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://pantheon.corp.google.com/bigquery?ws=!1m7!1m6!12m5!1m3!1sbigframes-dev!2sus-central1!3s52b80d56-2706-4b9d-a1bf-b2770834292a!2e2&project=bigframes-dev\">\n",
-        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
-        "      Open in BQ Studio\n",
-        "    </a>\n",
-        "  </td> \n",
         "</table>"
       ]
     },
diff --git a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb
index c450f563c2..f5a88b3066 100644
--- a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb
+++ b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb
@@ -19,12 +19,6 @@
     "      View on GitHub\n",
     "    </a>\n",
     "  </td>\n",
-    "  <td>\n",
-    "    <a href=\"https://pantheon.corp.google.com/bigquery?project=bigframes-dev&ws=!1m7!1m6!12m5!1m3!1sbigframes-dev!2sus-central1!3s9556e137-dc3d-47d1-9f21-e8c90b01a047!2e2\">\n",
-    "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
-    "      Open in BQ Studio\n",
-    "    </a>\n",
-    "  </td> \n",
     "</table>"
    ]
   },
diff --git a/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb b/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb
index 9e44a0a5c1..66a35d0046 100644
--- a/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb
+++ b/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb
@@ -44,12 +44,6 @@
         "      View on GitHub\n",
         "    </a>\n",
         "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://pantheon.corp.google.com/bigquery?project=bigframes-dev&ws=!1m7!1m6!12m5!1m3!1sbigframes-dev!2sus-central1!3s0c5e7d12-2769-439f-a34a-cf9cb4eb1336!2e2\">\n",
-        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
-        "      Open in BQ Studio\n",
-        "    </a>\n",
-        "  </td> \n",
         "</table>"
       ]
     },

From b483932c3db7baa3ae16a5bb03be35d396c14b7e Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Sat, 4 Jan 2025 01:07:10 -0800
Subject: [PATCH 7/9] chore: fix wordings of Gemini max_retries (#1244)

---
 bigframes/ml/llm.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 2427009cf1..d42138b006 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -986,9 +986,8 @@ def predict(
                 The default is `False`.
 
             max_retries (int, default 0):
-                Max number of retry rounds if any rows failed in the prediction. Each round need to make progress (has succeeded rows) to continue the next retry round.
-                Each round will append newly succeeded rows. When the max retry rounds is reached, the remaining failed rows will be appended to the end of the result.
-
+                Max number of retries if the prediction for any rows failed. Each try needs to make progress (i.e. has successfully predicted rows) to continue the retry.
+                Each retry will append newly succeeded rows. When the max retries are reached, the remaining rows (the ones without successful predictions) will be appended to the end of the result.
         Returns:
             bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
         """
@@ -1034,11 +1033,15 @@ def predict(
         for _ in range(max_retries + 1):
             df = self._bqml_model.generate_text(df_fail, options)
 
-            df_succ = df[df[_ML_GENERATE_TEXT_STATUS].str.len() == 0]
-            df_fail = df[df[_ML_GENERATE_TEXT_STATUS].str.len() > 0]
+            success = df[_ML_GENERATE_TEXT_STATUS].str.len() == 0
+            df_succ = df[success]
+            df_fail = df[~success]
 
             if df_succ.empty:
-                warnings.warn("Can't make any progress, stop retrying.", RuntimeWarning)
+                if max_retries > 0:
+                    warnings.warn(
+                        "Can't make any progress, stop retrying.", RuntimeWarning
+                    )
                 break
 
             df_result = (

From a2ed989fac789b0debacc0ec8a044b473cc6112c Mon Sep 17 00:00:00 2001
From: Jiaxun Wu <35040939+jiaxunwu@users.noreply.github.com>
Date: Sat, 4 Jan 2025 18:03:17 -0800
Subject: [PATCH 8/9] docs: Update semantic_operators.ipynb (#1260)

---
 .../experimental/semantic_operators.ipynb     | 6198 +++++++++--------
 1 file changed, 3187 insertions(+), 3011 deletions(-)

diff --git a/notebooks/experimental/semantic_operators.ipynb b/notebooks/experimental/semantic_operators.ipynb
index 8a2f083419..f9c7f67358 100644
--- a/notebooks/experimental/semantic_operators.ipynb
+++ b/notebooks/experimental/semantic_operators.ipynb
@@ -1,3032 +1,3208 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Copyright 2024 Google LLC\n",
-    "#\n",
-    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-    "# you may not use this file except in compliance with the License.\n",
-    "# You may obtain a copy of the License at\n",
-    "#\n",
-    "#     https://www.apache.org/licenses/LICENSE-2.0\n",
-    "#\n",
-    "# Unless required by applicable law or agreed to in writing, software\n",
-    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-    "# See the License for the specific language governing permissions and\n",
-    "# limitations under the License."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# BigQuery DataFrames AI (semantic) Operator Demo"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The BigQuery DataFrames team implements AI operators inspired by the \"Lotus\" paper: https://arxiv.org/pdf/2407.11418.\n",
-    "\n",
-    "This notebook gives you a hands-on preview of AI operator APIs powered by LLM. You can open this notebook on Google Colab [here](https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/semantic_operators.ipynb). \n",
-    "\n",
-    "The notebook has two sections. The first section introduces the API syntax with examples, with the aim to get you familiar with how AI operators work. The second section applies AI operators on a large real-world dataset. You will also find some performance statistics there."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Preparation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "First, import the BigQuery DataFrames modules."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import bigframes\n",
-    "import bigframes.pandas as bpd"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Make sure the BigFrames version is at least `1.23.0`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from packaging.version import Version\n",
-    "\n",
-    "assert Version(bigframes.__version__) >= Version(\"1.23.0\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Turn on the semantic operator experiment. You will see a warning sign saying that these operators are still under experiments. If you don't turn on the experiment before using the operators, you will get `NotImplemenetedError`s."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/_config/experiment_options.py:34: UserWarning: Semantic operators are still under experiments, and are subject to change in the future.\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
-   "source": [
-    "bigframes.options.experiments.semantic_operators = True"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Optional: turn off the display of progress bar so that only the operation results will be printed out"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bpd.options.display.progress_bar = None"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create LLM instances. They will be passed in as parameters for each semantic operator."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:258: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
-      "  return global_session.get_global_session()\n"
-     ]
-    }
-   ],
-   "source": [
-    "from bigframes.ml import llm\n",
-    "gemini_model = llm.GeminiTextGenerator(model_name=\"gemini-1.5-flash-001\")\n",
-    "text_embedding_model = llm.TextEmbeddingGenerator(model_name=\"text-embedding-005\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "*Note*: semantic operators could be expensive over a large set of data. As a result, our team added this option `bigframes.options.compute.sem_ops_confirmation_threshold` at `version 1.31.0` so that the BigQuery Dataframe will ask for your confirmation if the amount of data to be processed is too large. If the amount of rows exceeds your threshold, you will see a prompt for your keyboard input -- 'y' to proceed and 'n' to abort. If you abort the operation, no LLM processing will be done.\n",
-    "\n",
-    "The default threshold is 0, which means the operators will always ask for confirmations. You are free to adjust the value as needed. You can also set the threshold to `None` to disable this feature."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
-    "    bigframes.options.compute.semantic_ops_confirmation_threshold = 100"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "If you would like your operations to fail automatically when the data is too large, set `bigframes.options.compute.semantic_ops_threshold_autofail` to `True`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
-    "#     bigframes.options.compute.semantic_ops_threshold_autofail = True"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# The API"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You will learn about each semantic operator by trying some examples."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Semantic Filtering"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Semantic filtering allows you to filter your dataframe based on the instruction (i.e. prompt) you provided. \n",
-    "\n",
-    "First, create a dataframe:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>country</th>\n",
-       "      <th>city</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>USA</td>\n",
-       "      <td>Seattle</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Germany</td>\n",
-       "      <td>Berlin</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Japan</td>\n",
-       "      <td>Kyoto</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>3 rows × 2 columns</p>\n",
-       "</div>[3 rows x 2 columns in total]"
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UYeZd_I8iouP"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2024 Google LLC\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rWJnGj2ViouP"
+      },
+      "source": [
+        "# BigFrames AI (semantic) Operator Tutorial\n",
+        "\n",
+        "<table align=\"left\">\n",
+        "\n",
+        "  <td>\n",
+        "    <a href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/semantic_operators.ipynb\">\n",
+        "      <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"> Run in Colab\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/semantic_operators.ipynb\">\n",
+        "      <img src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" alt=\"GitHub logo\">\n",
+        "      View on GitHub\n",
+        "    </a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mgOrr256iouQ"
+      },
+      "source": [
+        "This notebook provides a hands-on preview of AI operator APIs powered by the Gemini model.\n",
+        "\n",
+        "The notebook is divided into two sections. The first section introduces the API syntax with examples, aiming to familiarize you with how AI operators work. The second section applies AI operators to a large real-world dataset and presents performance statistics.\n",
+        "\n",
+        "This work is inspired by [this paper](https://arxiv.org/pdf/2407.11418) and powered by BigQuery ML and Vertex AI."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2ymVbJV2iouQ"
+      },
+      "source": [
+        "# Preparation"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vvVzFzo3iouQ"
+      },
+      "source": [
+        "First, import the BigFrames modules.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Jb9glT2ziouQ"
+      },
+      "outputs": [],
+      "source": [
+        "import bigframes\n",
+        "import bigframes.pandas as bpd"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xQiCWj7OiouQ"
+      },
+      "source": [
+        "Make sure the BigFrames version is at least `1.23.0`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LTPpI8IpiouQ"
+      },
+      "outputs": [],
+      "source": [
+        "from packaging.version import Version\n",
+        "\n",
+        "assert Version(bigframes.__version__) >= Version(\"1.23.0\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "agxLmtlbiouR"
+      },
+      "source": [
+        "Turn on the semantic operator experiment. You will see a warning sign saying that these operators are still under experiments. If you don't turn on the experiment before using the operators, you will get `NotImplemenetedError`s."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1wXqdDr8iouR"
+      },
+      "outputs": [],
+      "source": [
+        "bigframes.options.experiments.semantic_operators = True"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Specify your GCP project and location."
       ],
-      "text/plain": [
-       "   country     city\n",
-       "0      USA  Seattle\n",
-       "1  Germany   Berlin\n",
-       "2    Japan    Kyoto\n",
-       "\n",
-       "[3 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = bpd.DataFrame({'country': ['USA', 'Germany', 'Japan'], 'city': ['Seattle', 'Berlin', 'Kyoto']})\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now, filter this dataframe by keeping only the rows where the value in `city` column is the capital of the value in `country` column. The column references could be \"escaped\" by using a pair of braces in your instruction. In this example, your instruction should be like this:\n",
-    "```\n",
-    "The {city} is the capital of the {country}.\n",
-    "```\n",
-    "\n",
-    "Note that this is not a Python f-string, so you shouldn't prefix your instruction with an `f`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>country</th>\n",
-       "      <th>city</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Germany</td>\n",
-       "      <td>Berlin</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>1 rows × 2 columns</p>\n",
-       "</div>[1 rows x 2 columns in total]"
+      "metadata": {
+        "id": "W8TPUvnsqxhv"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "bpd.options.bigquery.project = 'YOUR_PROJECT_ID'\n",
+        "bpd.options.bigquery.location = 'US'"
       ],
-      "text/plain": [
-       "   country    city\n",
-       "1  Germany  Berlin\n",
-       "\n",
-       "[1 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df.semantics.filter(\"The {city} is the capital of the {country}\", model=gemini_model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The filter operator extracts the information from the referenced column to enrich your instruction with context. The instruction is then sent for the designated model for evaluation. For filtering operations, the LLM is asked to return only `True` and `False` for each row, and the operator removes the rows accordingly."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Semantic Mapping"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Semantic mapping allows to you to combine values from multiple columns into a single output based your instruction. \n",
-    "\n",
-    "Here is an example:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>ingredient_1</th>\n",
-       "      <th>ingredient_2</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Bun</td>\n",
-       "      <td>Beef Patty</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Soy Bean</td>\n",
-       "      <td>Bittern</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Sausage</td>\n",
-       "      <td>Long Bread</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>3 rows × 2 columns</p>\n",
-       "</div>[3 rows x 2 columns in total]"
+      "metadata": {
+        "id": "vCkraKOeqJFl"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "n95MFlS0iouR"
+      },
+      "source": [
+        "**Optional**: turn off the display of progress bar so that only the operation results will be printed out"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5r6ahx7MiouR"
+      },
+      "outputs": [],
+      "source": [
+        "# bpd.options.display.progress_bar = None"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "93iYvp7niouR"
+      },
+      "source": [
+        "Create LLM instances. They will be passed in as parameters for each semantic operator.\n",
+        "\n",
+        "This tutorial uses the \"gemini-1.5-flash-001\" model for text generation and \"text-embedding-005\" for embedding. While these are recommended, you can choose [other Vertex AI LLM models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models) based on your needs and availability. Ensure you have [sufficient quota](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas) for your chosen models and adjust it if necessary."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tHkymaLNiouR"
+      },
+      "outputs": [],
+      "source": [
+        "from bigframes.ml import llm\n",
+        "gemini_model = llm.GeminiTextGenerator(model_name=\"gemini-1.5-flash-001\")\n",
+        "text_embedding_model = llm.TextEmbeddingGenerator(model_name=\"text-embedding-005\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mbFDcvnPiouR"
+      },
+      "source": [
+        "**Note**: semantic operators could be expensive over a large set of data. As a result, our team added this option `bigframes.options.compute.sem_ops_confirmation_threshold` at `version 1.31.0` so that the BigFrames will ask for your confirmation if the amount of data to be processed is too large. If the amount of rows exceeds your threshold, you will see a prompt for your keyboard input -- 'y' to proceed and 'n' to abort. If you abort the operation, no LLM processing will be done.\n",
+        "\n",
+        "The default threshold is 0, which means the operators will always ask for confirmations. You are free to adjust the value as needed. You can also set the threshold to `None` to disable this feature."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "F4dZm4b7iouR"
+      },
+      "outputs": [],
+      "source": [
+        "if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
+        "    bigframes.options.compute.semantic_ops_confirmation_threshold = 1000"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_dEA3G9RiouR"
+      },
+      "source": [
+        "If you would like your operations to fail automatically when the data is too large, set `bigframes.options.compute.semantic_ops_threshold_autofail` to `True`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BoUK-cpbiouS"
+      },
+      "outputs": [],
+      "source": [
+        "# if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
+        "#     bigframes.options.compute.semantic_ops_threshold_autofail = True"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hQft3o3OiouS"
+      },
+      "source": [
+        "# API Samples"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dt5Kl-QGiouS"
+      },
+      "source": [
+        "You will learn about each semantic operator by trying some examples."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "J7XAT459iouS"
+      },
+      "source": [
+        "## Semantic Filtering"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9d5HUIvliouS"
+      },
+      "source": [
+        "Semantic filtering allows you to filter your dataframe based on the instruction (i.e. prompt) you provided.\n",
+        "\n",
+        "First, create a dataframe:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 190
+        },
+        "id": "NDpCRGd_iouS",
+        "outputId": "5048c935-06d3-4ef1-ad87-72e14a30b1b7"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "   country     city\n",
+              "0      USA  Seattle\n",
+              "1  Germany   Berlin\n",
+              "2    Japan    Kyoto\n",
+              "\n",
+              "[3 rows x 2 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>country</th>\n",
+              "      <th>city</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>USA</td>\n",
+              "      <td>Seattle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Germany</td>\n",
+              "      <td>Berlin</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Japan</td>\n",
+              "      <td>Kyoto</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>3 rows × 2 columns</p>\n",
+              "</div>[3 rows x 2 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 10
+        }
       ],
-      "text/plain": [
-       "  ingredient_1 ingredient_2\n",
-       "0          Bun   Beef Patty\n",
-       "1     Soy Bean      Bittern\n",
-       "2      Sausage   Long Bread\n",
-       "\n",
-       "[3 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = bpd.DataFrame({\n",
-    "    \"ingredient_1\": [\"Bun\", \"Soy Bean\", \"Sausage\"], \n",
-    "    \"ingredient_2\": [\"Beef Patty\", \"Bittern\", \"Long Bread\"]\n",
-    "    })\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now, you ask LLM what kind of food can be made from the two ingredients in each row. The column reference syntax in your instruction stays the same. In addition, you need to specify the column name by setting the `output_column` parameter to hold the mapping results."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>ingredient_1</th>\n",
-       "      <th>ingredient_2</th>\n",
-       "      <th>food</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Bun</td>\n",
-       "      <td>Beef Patty</td>\n",
-       "      <td>Burger</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Soy Bean</td>\n",
-       "      <td>Bittern</td>\n",
-       "      <td>Tofu</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Sausage</td>\n",
-       "      <td>Long Bread</td>\n",
-       "      <td>Hotdog</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>3 rows × 3 columns</p>\n",
-       "</div>[3 rows x 3 columns in total]"
+      "source": [
+        "df = bpd.DataFrame({'country': ['USA', 'Germany', 'Japan'], 'city': ['Seattle', 'Berlin', 'Kyoto']})\n",
+        "df"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6AXmT7sniouS"
+      },
+      "source": [
+        "Now, filter this dataframe by keeping only the rows where the value in `city` column is the capital of the value in `country` column. The column references could be \"escaped\" by using a pair of braces in your instruction. In this example, your instruction should be like this:\n",
+        "```\n",
+        "The {city} is the capital of the {country}.\n",
+        "```\n",
+        "\n",
+        "Note that this is not a Python f-string, so you shouldn't prefix your instruction with an `f`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 127
+        },
+        "id": "ipW3Z_l4iouS",
+        "outputId": "ad447459-225a-419c-d4c8-fedac4a9ed0f"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "   country    city\n",
+              "1  Germany  Berlin\n",
+              "\n",
+              "[1 rows x 2 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>country</th>\n",
+              "      <th>city</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Germany</td>\n",
+              "      <td>Berlin</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>1 rows × 2 columns</p>\n",
+              "</div>[1 rows x 2 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 11
+        }
       ],
-      "text/plain": [
-       "  ingredient_1 ingredient_2      food\n",
-       "0          Bun   Beef Patty  Burger \n",
-       "\n",
-       "1     Soy Bean      Bittern    Tofu \n",
-       "\n",
-       "2      Sausage   Long Bread  Hotdog \n",
-       "\n",
-       "\n",
-       "[3 rows x 3 columns]"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df.semantics.map(\"What is the food made from {ingredient_1} and {ingredient_2}? One word only.\", output_column=\"food\", model=gemini_model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Semantic Joining"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Semantic joining can join two dataframes based on the instruction you provided. \n",
-    "\n",
-    "First, you prepare two dataframes:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cities = bpd.DataFrame({'city': ['Seattle', 'Ottawa', 'Berlin', 'Shanghai', 'New Delhi']})\n",
-    "continents = bpd.DataFrame({'continent': ['North America', 'Africa', 'Asia']})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You want to join the `cities` with `continents` to form a new dataframe such that, in each row the city from the `cities` data frame is in the continent from the `continents` dataframe. You could re-use the aforementioned column reference syntax:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>city</th>\n",
-       "      <th>continent</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Seattle</td>\n",
-       "      <td>North America</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Ottawa</td>\n",
-       "      <td>North America</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Shanghai</td>\n",
-       "      <td>Asia</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>New Delhi</td>\n",
-       "      <td>Asia</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>4 rows × 2 columns</p>\n",
-       "</div>[4 rows x 2 columns in total]"
+      "source": [
+        "df.semantics.filter(\"The {city} is the capital of the {country}\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "swKvgfm1iouS"
+      },
+      "source": [
+        "The filter operator extracts the information from the referenced column to enrich your instruction with context. The instruction is then sent for the designated model for evaluation. For filtering operations, the LLM is asked to return only `True` and `False` for each row, and the operator removes the rows accordingly."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r_2AAGGoiouS"
+      },
+      "source": [
+        "## Semantic Mapping"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vT6skC57iouS"
+      },
+      "source": [
+        "Semantic mapping allows to you to combine values from multiple columns into a single output based your instruction.\n",
+        "\n",
+        "Here is an example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 190
+        },
+        "id": "BQ7xeUK3iouS",
+        "outputId": "33dcb742-77ed-4bea-8dbc-1cf775102a25"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "  ingredient_1 ingredient_2\n",
+              "0          Bun   Beef Patty\n",
+              "1     Soy Bean      Bittern\n",
+              "2      Sausage   Long Bread\n",
+              "\n",
+              "[3 rows x 2 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>ingredient_1</th>\n",
+              "      <th>ingredient_2</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Bun</td>\n",
+              "      <td>Beef Patty</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Soy Bean</td>\n",
+              "      <td>Bittern</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Sausage</td>\n",
+              "      <td>Long Bread</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>3 rows × 2 columns</p>\n",
+              "</div>[3 rows x 2 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 12
+        }
       ],
-      "text/plain": [
-       "        city      continent\n",
-       "0    Seattle  North America\n",
-       "1     Ottawa  North America\n",
-       "2   Shanghai           Asia\n",
-       "3  New Delhi           Asia\n",
-       "\n",
-       "[4 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cities.semantics.join(continents, \"{city} is in {continent}\", model=gemini_model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "!! **Important:** Semantic join can trigger probihitively expensitve operations! This operation first cross joins two dataframes, then invokes semantic filter on each row. That means if you have two dataframes of sizes `M` and `N`, the total amount of queries sent to the LLM is on the scale of `M * N`. "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Self Joins"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This self-join example is for demonstrating a special case: what happens when the joining columns exist in both data frames? It turns out that you need to provide extra information in your column references: by attaching \"left.\" and \"right.\" prefixes to your column names. \n",
-    "\n",
-    "Create an example data frame:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "animals = bpd.DataFrame({'animal': ['cow', 'cat', 'spider', 'elephant']})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You want to compare the weights of these animals, and output all the pairs where the animal on the left is heavier than the animal on the right. In this case, you use `left.animal` and `right.animal` to differentiate the data sources:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>animal_left</th>\n",
-       "      <th>animal_right</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>cow</td>\n",
-       "      <td>cat</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>cow</td>\n",
-       "      <td>spider</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>cat</td>\n",
-       "      <td>spider</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>elephant</td>\n",
-       "      <td>cow</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>elephant</td>\n",
-       "      <td>cat</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>elephant</td>\n",
-       "      <td>spider</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>6 rows × 2 columns</p>\n",
-       "</div>[6 rows x 2 columns in total]"
+      "source": [
+        "df = bpd.DataFrame({\n",
+        "    \"ingredient_1\": [\"Bun\", \"Soy Bean\", \"Sausage\"],\n",
+        "    \"ingredient_2\": [\"Beef Patty\", \"Bittern\", \"Long Bread\"]\n",
+        "    })\n",
+        "df"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VFObP2aFiouS"
+      },
+      "source": [
+        "Now, you ask LLM what kind of food can be made from the two ingredients in each row. The column reference syntax in your instruction stays the same. In addition, you need to specify the column name by setting the `output_column` parameter to hold the mapping results."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 190
+        },
+        "id": "PpL24AQFiouS",
+        "outputId": "e7aff038-bf4b-4833-def8-fe2648e8885b"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "  ingredient_1 ingredient_2      food\n",
+              "0          Bun   Beef Patty  Burger \n",
+              "\n",
+              "1     Soy Bean      Bittern    Tofu \n",
+              "\n",
+              "2      Sausage   Long Bread  Hotdog \n",
+              "\n",
+              "\n",
+              "[3 rows x 3 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>ingredient_1</th>\n",
+              "      <th>ingredient_2</th>\n",
+              "      <th>food</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Bun</td>\n",
+              "      <td>Beef Patty</td>\n",
+              "      <td>Burger</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Soy Bean</td>\n",
+              "      <td>Bittern</td>\n",
+              "      <td>Tofu</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Sausage</td>\n",
+              "      <td>Long Bread</td>\n",
+              "      <td>Hotdog</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>3 rows × 3 columns</p>\n",
+              "</div>[3 rows x 3 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
       ],
-      "text/plain": [
-       "  animal_left animal_right\n",
-       "0         cow          cat\n",
-       "1         cow       spider\n",
-       "2         cat       spider\n",
-       "3    elephant          cow\n",
-       "4    elephant          cat\n",
-       "5    elephant       spider\n",
-       "\n",
-       "[6 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "animals.semantics.join(animals, \"{left.animal} generally weighs heavier than {right.animal}\", model=gemini_model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Semantic Aggregation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Semantic aggregation merges all the values in a column into one. At this moment you can only aggregate a single column in each oeprator call.\n",
-    "\n",
-    "Here is an example:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Movies</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Titanic</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>The Wolf of Wall Street</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Killers of the Flower Moon</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>The Revenant</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Inception</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>Shuttle Island</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>The Great Gatsby</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>7 rows × 1 columns</p>\n",
-       "</div>[7 rows x 1 columns in total]"
+      "source": [
+        "df.semantics.map(\"What is the food made from {ingredient_1} and {ingredient_2}? One word only.\", output_column=\"food\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "70WTZZfdiouS"
+      },
+      "source": [
+        "## Semantic Joining"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "u93uieRaiouS"
+      },
+      "source": [
+        "Semantic joining can join two dataframes based on the instruction you provided.\n",
+        "\n",
+        "First, you prepare two dataframes:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dffIGEUEiouS"
+      },
+      "outputs": [],
+      "source": [
+        "cities = bpd.DataFrame({'city': ['Seattle', 'Ottawa', 'Berlin', 'Shanghai', 'New Delhi']})\n",
+        "continents = bpd.DataFrame({'continent': ['North America', 'Africa', 'Asia']})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Hz0X-0RtiouS"
+      },
+      "source": [
+        "You want to join the `cities` with `continents` to form a new dataframe such that, in each row the city from the `cities` data frame is in the continent from the `continents` dataframe. You could re-use the aforementioned column reference syntax:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 221
+        },
+        "id": "WPIOHEwCiouT",
+        "outputId": "976586c3-b5db-4088-a46a-44dfbf822ecb"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "        city      continent\n",
+              "0    Seattle  North America\n",
+              "1     Ottawa  North America\n",
+              "2   Shanghai           Asia\n",
+              "3  New Delhi           Asia\n",
+              "\n",
+              "[4 rows x 2 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>city</th>\n",
+              "      <th>continent</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Seattle</td>\n",
+              "      <td>North America</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Ottawa</td>\n",
+              "      <td>North America</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Shanghai</td>\n",
+              "      <td>Asia</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>New Delhi</td>\n",
+              "      <td>Asia</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>4 rows × 2 columns</p>\n",
+              "</div>[4 rows x 2 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 15
+        }
       ],
-      "text/plain": [
-       "                       Movies\n",
-       "0                     Titanic\n",
-       "1     The Wolf of Wall Street\n",
-       "2  Killers of the Flower Moon\n",
-       "3                The Revenant\n",
-       "4                   Inception\n",
-       "5              Shuttle Island\n",
-       "6            The Great Gatsby\n",
-       "\n",
-       "[7 rows x 1 columns]"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = bpd.DataFrame({\n",
-    "    \"Movies\": [\n",
-    "        \"Titanic\",\n",
-    "        \"The Wolf of Wall Street\",\n",
-    "        \"Killers of the Flower Moon\",\n",
-    "        \"The Revenant\",\n",
-    "        \"Inception\",\n",
-    "        \"Shuttle Island\",\n",
-    "        \"The Great Gatsby\",\n",
-    "    ],\n",
-    "})\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You ask LLM to find the oldest movie:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0    Titanic \n",
-       "\n",
-       "Name: Movies, dtype: string"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "agg_df = df.semantics.agg(\"Find the oldest movie from {Movies}. Reply with only the movie title\", model=gemini_model)\n",
-    "agg_df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Instead of going through each row one by one, this operator first batches rows to get many  aggregation results. It then repeatly batches those results for aggregation, until there is only one value left. You could set the batch size with `max_agg_rows` parameter, which defaults to 10."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Semantic Top K"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Semantic Top K selects the top K values based on your instruction. Here is an example:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = bpd.DataFrame({\"Animals\": [\"Corgi\", \"Orange Cat\", \"Parrot\", \"Tarantula\"]})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You want to find the top two most popular pets:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Animals</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Corgi</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Orange Cat</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>2 rows × 1 columns</p>\n",
-       "</div>[2 rows x 1 columns in total]"
+      "source": [
+        "cities.semantics.join(continents, \"{city} is in {continent}\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4Qc97GMWiouT"
+      },
+      "source": [
+        "!! **Important:** Semantic join can trigger probihitively expensitve operations! This operation first cross joins two dataframes, then invokes semantic filter on each row. That means if you have two dataframes of sizes `M` and `N`, the total amount of queries sent to the LLM is on the scale of `M * N`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MUEJXT1IiouT"
+      },
+      "source": [
+        "### Self Joins"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QvX-nCogiouT"
+      },
+      "source": [
+        "This self-join example is for demonstrating a special case: what happens when the joining columns exist in both data frames? It turns out that you need to provide extra information in your column references: by attaching \"left.\" and \"right.\" prefixes to your column names.\n",
+        "\n",
+        "Create an example data frame:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OIGz5sqxiouW"
+      },
+      "outputs": [],
+      "source": [
+        "animals = bpd.DataFrame({'animal': ['cow', 'cat', 'spider', 'elephant']})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VmJbuWNniouX"
+      },
+      "source": [
+        "You want to compare the weights of these animals, and output all the pairs where the animal on the left is heavier than the animal on the right. In this case, you use `left.animal` and `right.animal` to differentiate the data sources:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 284
+        },
+        "id": "UHfggdhBiouX",
+        "outputId": "a439e3aa-1382-4244-951f-127dc8da0fe3"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "  animal_left animal_right\n",
+              "0         cow          cat\n",
+              "1         cow       spider\n",
+              "2         cat       spider\n",
+              "3    elephant          cow\n",
+              "4    elephant          cat\n",
+              "5    elephant       spider\n",
+              "\n",
+              "[6 rows x 2 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>animal_left</th>\n",
+              "      <th>animal_right</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>cow</td>\n",
+              "      <td>cat</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>cow</td>\n",
+              "      <td>spider</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>cat</td>\n",
+              "      <td>spider</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>elephant</td>\n",
+              "      <td>cow</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>elephant</td>\n",
+              "      <td>cat</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>elephant</td>\n",
+              "      <td>spider</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>6 rows × 2 columns</p>\n",
+              "</div>[6 rows x 2 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 17
+        }
       ],
-      "text/plain": [
-       "      Animals\n",
-       "0       Corgi\n",
-       "1  Orange Cat\n",
-       "\n",
-       "[2 rows x 1 columns]"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df.semantics.top_k(\"{Animals} are more popular as pets\", model=gemini_model, k=2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Under the hood, the semantic top K operator performs pair-wise comparisons with LLM. The top K results are returned in the order of their indices instead of their ranks."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Semantic Search"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Semantic search searches the most similar values to your query within a single column. Here is an example:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>creatures</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>salmon</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>sea urchin</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>baboons</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>frog</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>chimpanzee</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 1 columns</p>\n",
-       "</div>[5 rows x 1 columns in total]"
+      "source": [
+        "animals.semantics.join(animals, \"{left.animal} generally weighs heavier than {right.animal}\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KONR7ywqiouX"
+      },
+      "source": [
+        "## Semantic Aggregation"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "I8iNRogoiouX"
+      },
+      "source": [
+        "Semantic aggregation merges all the values in a column into one. At this moment you can only aggregate a single column in each oeprator call.\n",
+        "\n",
+        "Here is an example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 315
+        },
+        "id": "9tsem17aiouX",
+        "outputId": "1db5fa6e-b59d-41f5-9c13-db2c9ed0415b"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                       Movies\n",
+              "0                     Titanic\n",
+              "1     The Wolf of Wall Street\n",
+              "2  Killers of the Flower Moon\n",
+              "3                The Revenant\n",
+              "4                   Inception\n",
+              "5              Shuttle Island\n",
+              "6            The Great Gatsby\n",
+              "\n",
+              "[7 rows x 1 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Movies</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Titanic</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>The Wolf of Wall Street</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Killers of the Flower Moon</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>The Revenant</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Inception</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>Shuttle Island</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>The Great Gatsby</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>7 rows × 1 columns</p>\n",
+              "</div>[7 rows x 1 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 18
+        }
       ],
-      "text/plain": [
-       "    creatures\n",
-       "0      salmon\n",
-       "1  sea urchin\n",
-       "2     baboons\n",
-       "3        frog\n",
-       "4  chimpanzee\n",
-       "\n",
-       "[5 rows x 1 columns]"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = bpd.DataFrame({\"creatures\": [\"salmon\", \"sea urchin\", \"baboons\", \"frog\", \"chimpanzee\"]})\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You want to get the top 2 creatures that are most similar to \"monkey\":"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n",
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n",
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>creatures</th>\n",
-       "      <th>similarity score</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>baboons</td>\n",
-       "      <td>0.708434</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>chimpanzee</td>\n",
-       "      <td>0.635844</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>2 rows × 2 columns</p>\n",
-       "</div>[2 rows x 2 columns in total]"
+      "source": [
+        "df = bpd.DataFrame({\n",
+        "    \"Movies\": [\n",
+        "        \"Titanic\",\n",
+        "        \"The Wolf of Wall Street\",\n",
+        "        \"Killers of the Flower Moon\",\n",
+        "        \"The Revenant\",\n",
+        "        \"Inception\",\n",
+        "        \"Shuttle Island\",\n",
+        "        \"The Great Gatsby\",\n",
+        "    ],\n",
+        "})\n",
+        "df"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uA9XpV0aiouX"
+      },
+      "source": [
+        "You ask LLM to find the oldest movie:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "KzYoX3mRiouX",
+        "outputId": "1ac50d7b-dfa7-4c16-8daf-aeb03b6df7a5"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0    Titanic \n",
+              "\n",
+              "Name: Movies, dtype: string"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 19
+        }
       ],
-      "text/plain": [
-       "    creatures  similarity score\n",
-       "2     baboons          0.708434\n",
-       "4  chimpanzee          0.635844\n",
-       "\n",
-       "[2 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df.semantics.search(\"creatures\", query=\"monkey\", top_k = 2, model = text_embedding_model, score_column='similarity score')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Notice that you are using a text embedding model this time. This model generates embedding vectors for both your query as well as the values in the search space. The operator then uses BigQuery's built-in VECTOR_SEARCH function to find the nearest neighbors of your query.\n",
-    "\n",
-    "In addition, `score_column` is an optional parameter for storing the distances between the results and your query. If not set, the score column won't be attached to the result."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Semantic Similarity Join"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "When you want to perform multiple similarity queries in the same value space, you could use similarity join to simplify your call. For example:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df1 = bpd.DataFrame({'animal': ['monkey', 'spider', 'salmon', 'giraffe', 'sparrow']})\n",
-    "df2 = bpd.DataFrame({'animal': ['scorpion', 'baboon', 'owl', 'elephant', 'tuna']})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In this example, you want to pick the most related animal from `df2` for each value in `df1`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n",
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>animal</th>\n",
-       "      <th>animal_1</th>\n",
-       "      <th>distance</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>monkey</td>\n",
-       "      <td>baboon</td>\n",
-       "      <td>0.620521</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>spider</td>\n",
-       "      <td>scorpion</td>\n",
-       "      <td>0.728024</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>salmon</td>\n",
-       "      <td>tuna</td>\n",
-       "      <td>0.782141</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>giraffe</td>\n",
-       "      <td>elephant</td>\n",
-       "      <td>0.7135</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>sparrow</td>\n",
-       "      <td>owl</td>\n",
-       "      <td>0.810864</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 3 columns</p>\n",
-       "</div>[5 rows x 3 columns in total]"
+      "source": [
+        "agg_df = df.semantics.agg(\"Find the oldest movie from {Movies}. Reply with only the movie title\", model=gemini_model)\n",
+        "agg_df"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "drvn75qJiouX"
+      },
+      "source": [
+        "Instead of going through each row one by one, this operator first batches rows to get many  aggregation results. It then repeatly batches those results for aggregation, until there is only one value left. You could set the batch size with `max_agg_rows` parameter, which defaults to 10."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kU7BsyTyiouX"
+      },
+      "source": [
+        "## Semantic Top K"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s9QePXEoiouX"
+      },
+      "source": [
+        "Semantic Top K selects the top K values based on your instruction. Here is an example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bMQqtyZ2iouX"
+      },
+      "outputs": [],
+      "source": [
+        "df = bpd.DataFrame({\"Animals\": [\"Corgi\", \"Orange Cat\", \"Parrot\", \"Tarantula\"]})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KiljGBSCiouX"
+      },
+      "source": [
+        "You want to find the top two most popular pets:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 159
+        },
+        "id": "OZv5WUGIiouX",
+        "outputId": "ae1cee27-cc31-455e-c4ac-c0a9a5cf4ca5"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "      Animals\n",
+              "0       Corgi\n",
+              "1  Orange Cat\n",
+              "\n",
+              "[2 rows x 1 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Animals</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Corgi</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Orange Cat</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2 rows × 1 columns</p>\n",
+              "</div>[2 rows x 1 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 21
+        }
       ],
-      "text/plain": [
-       "    animal  animal_1  distance\n",
-       "0   monkey    baboon  0.620521\n",
-       "1   spider  scorpion  0.728024\n",
-       "2   salmon      tuna  0.782141\n",
-       "3  giraffe  elephant    0.7135\n",
-       "4  sparrow       owl  0.810864\n",
-       "\n",
-       "[5 rows x 3 columns]"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df1.semantics.sim_join(df2, left_on='animal', right_on='animal', top_k=1, model=text_embedding_model, score_column='distance')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "!! **Important** Like semantic join, this operator can also be very expensive. To guard against unexpected processing of large dataset, use the `bigframes.options.compute.sem_ops_confirmation_threshold` option to specify a threshold. "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Semantic Cluster"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Semantic Cluster group similar values together. For example:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = bpd.DataFrame({'Product': ['Smartphone', 'Laptop', 'Coffee Maker', 'T-shirt', 'Jeans']})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You want to cluster these products into 3 groups:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n",
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Product</th>\n",
-       "      <th>Cluster ID</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Smartphone</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Laptop</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Coffee Maker</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T-shirt</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Jeans</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 2 columns</p>\n",
-       "</div>[5 rows x 2 columns in total]"
+      "source": [
+        "df.semantics.top_k(\"{Animals} are more popular as pets\", model=gemini_model, k=2)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dC8fyu3aiouX"
+      },
+      "source": [
+        "Under the hood, the semantic top K operator performs pair-wise comparisons with LLM. The top K results are returned in the order of their indices instead of their ranks."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sIszJ0zPiouX"
+      },
+      "source": [
+        "## Semantic Search"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e4ojHRKAiouX"
+      },
+      "source": [
+        "Semantic search searches the most similar values to your query within a single column. Here is an example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 253
+        },
+        "id": "gnQSIZ5SiouX",
+        "outputId": "dd6e1ecb-1bad-4a7c-8065-e56c697d0863"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "    creatures\n",
+              "0      salmon\n",
+              "1  sea urchin\n",
+              "2     baboons\n",
+              "3        frog\n",
+              "4  chimpanzee\n",
+              "\n",
+              "[5 rows x 1 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>creatures</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>salmon</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>sea urchin</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>baboons</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>frog</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>chimpanzee</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1 columns</p>\n",
+              "</div>[5 rows x 1 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 22
+        }
       ],
-      "text/plain": [
-       "        Product  Cluster ID\n",
-       "0    Smartphone           3\n",
-       "1        Laptop           3\n",
-       "2  Coffee Maker           1\n",
-       "3       T-shirt           2\n",
-       "4         Jeans           2\n",
-       "\n",
-       "[5 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df.semantics.cluster_by(column='Product', output_column='Cluster ID', model=text_embedding_model, n_clusters=3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This operator uses the the embedding model to generate vectors for each value, and then the KMeans algorithm for clustering."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Performance Analyses"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In this section, you will use BigQuery's public data of hacker news to perform some heavy work. We recommend you to check the code without executing them in order to save your time and money. The execution results are attached after each cell for your reference.\n",
-    "\n",
-    "First, load 3K rows from the table:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>title</th>\n",
-       "      <th>text</th>\n",
-       "      <th>by</th>\n",
-       "      <th>score</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>type</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Well, most people aren&amp;#x27;t alcoholics, so I...</td>\n",
-       "      <td>slipframe</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-06-26 02:37:56+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>No, you don&amp;#x27;t really &lt;i&gt;need&lt;/i&gt; a smartp...</td>\n",
-       "      <td>vetinari</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-04-19 15:56:34+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>It&amp;#x27;s for the late Paul Allen RIP. Should&amp;...</td>\n",
-       "      <td>lsr_ssri</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2018-10-16 01:07:55+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Yup they are dangerous. Be careful Donald Trump.</td>\n",
-       "      <td>Sven7</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2015-08-10 16:05:54+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Sure, it&amp;#x27;s totally reasonable. Just point...</td>\n",
-       "      <td>nicoburns</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2020-10-05 11:20:51+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I wonder how long before special forces start ...</td>\n",
-       "      <td>autisticcurio</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2020-09-01 15:38:50+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>The Impending NY Tech Apocalypse: Here's What ...</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>gaoprea</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2011-09-27 22:43:27+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Where would you relocate to? I'm assuming that...</td>\n",
-       "      <td>pavel_lishin</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2011-09-16 19:02:01+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>Eureca beta is live. A place for your business...</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>ricardos</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2012-10-15 13:09:32+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
-       "      <td>archiewood</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-04-21 16:45:13+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I guess I don’t see the relevance. Vegans eat ...</td>\n",
-       "      <td>stevula</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-01-19 20:05:54+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I remember watching the American news media go...</td>\n",
-       "      <td>fareesh</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2019-06-17 19:49:17+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>This article is incorrectly using the current ...</td>\n",
-       "      <td>stale2002</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2018-03-18 18:57:21+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>In the firm I made my internship, we have to u...</td>\n",
-       "      <td>iserlohnmage</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2019-10-22 10:41:01+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>The main reason it requires unsafe is for memo...</td>\n",
-       "      <td>comex</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2017-05-05 20:45:37+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>Discord vs. IRC Rough Notes</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>todsacerdoti</td>\n",
-       "      <td>48</td>\n",
-       "      <td>2024-07-12 18:39:52+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>you have to auth again when you use apple pay.</td>\n",
-       "      <td>empath75</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2017-09-12 18:58:20+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>It goes consumer grade, automotive, military, ...</td>\n",
-       "      <td>moftz</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-04-13 01:24:03+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I don&amp;#x27;t have a link handy but the differe...</td>\n",
-       "      <td>KennyBlanken</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2022-05-13 16:08:38+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&amp;gt; I don&amp;#x27;t think the use case you menti...</td>\n",
-       "      <td>colanderman</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2017-09-28 05:16:06+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I think you need to watch it again, because yo...</td>\n",
-       "      <td>vladimirralev</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2018-12-07 11:25:52+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>Oh dear: new Yahoo anti-spoofing measures brea...</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>joshreads</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2014-04-08 13:29:50+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>How Much Warmer Was Your City in 2016?</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>smb06</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2017-02-16 23:26:34+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Except that they clearly never tried to incent...</td>\n",
-       "      <td>aenis</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2022-01-31 17:08:57+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>Working Best at Coffee Shops</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>GiraffeNecktie</td>\n",
-       "      <td>249</td>\n",
-       "      <td>2011-04-19 14:25:17+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>25 rows × 6 columns</p>\n",
-       "</div>[3000 rows x 6 columns in total]"
+      "source": [
+        "df = bpd.DataFrame({\"creatures\": [\"salmon\", \"sea urchin\", \"baboons\", \"frog\", \"chimpanzee\"]})\n",
+        "df"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5apfIaZMiouX"
+      },
+      "source": [
+        "You want to get the top 2 creatures that are most similar to \"monkey\":"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 159
+        },
+        "id": "CkAuFgPYiouY",
+        "outputId": "723c7604-f53c-43d7-c754-4c91ec198dff"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "    creatures  similarity score\n",
+              "2     baboons          0.708434\n",
+              "4  chimpanzee          0.635844\n",
+              "\n",
+              "[2 rows x 2 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>creatures</th>\n",
+              "      <th>similarity score</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>baboons</td>\n",
+              "      <td>0.708434</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>chimpanzee</td>\n",
+              "      <td>0.635844</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2 rows × 2 columns</p>\n",
+              "</div>[2 rows x 2 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 23
+        }
       ],
-      "text/plain": [
-       "                                                title  \\\n",
-       "0                                                <NA>   \n",
-       "1                                                <NA>   \n",
-       "2                                                <NA>   \n",
-       "3                                                <NA>   \n",
-       "4                                                <NA>   \n",
-       "5                                                <NA>   \n",
-       "6   The Impending NY Tech Apocalypse: Here's What ...   \n",
-       "7                                                <NA>   \n",
-       "8   Eureca beta is live. A place for your business...   \n",
-       "9                                                <NA>   \n",
-       "10                                               <NA>   \n",
-       "11                                               <NA>   \n",
-       "12                                               <NA>   \n",
-       "13                                               <NA>   \n",
-       "14                                               <NA>   \n",
-       "15                        Discord vs. IRC Rough Notes   \n",
-       "16                                               <NA>   \n",
-       "17                                               <NA>   \n",
-       "18                                               <NA>   \n",
-       "19                                               <NA>   \n",
-       "20                                               <NA>   \n",
-       "21  Oh dear: new Yahoo anti-spoofing measures brea...   \n",
-       "22             How Much Warmer Was Your City in 2016?   \n",
-       "23                                               <NA>   \n",
-       "24                       Working Best at Coffee Shops   \n",
-       "\n",
-       "                                                 text              by  score  \\\n",
-       "0   Well, most people aren&#x27;t alcoholics, so I...       slipframe   <NA>   \n",
-       "1   No, you don&#x27;t really <i>need</i> a smartp...        vetinari   <NA>   \n",
-       "2   It&#x27;s for the late Paul Allen RIP. Should&...        lsr_ssri   <NA>   \n",
-       "3    Yup they are dangerous. Be careful Donald Trump.           Sven7   <NA>   \n",
-       "4   Sure, it&#x27;s totally reasonable. Just point...       nicoburns   <NA>   \n",
-       "5   I wonder how long before special forces start ...   autisticcurio   <NA>   \n",
-       "6                                                <NA>         gaoprea      3   \n",
-       "7   Where would you relocate to? I'm assuming that...    pavel_lishin   <NA>   \n",
-       "8                                                <NA>        ricardos      1   \n",
-       "9   It doesn’t work on Safari, and WebKit based br...      archiewood   <NA>   \n",
-       "10  I guess I don’t see the relevance. Vegans eat ...         stevula   <NA>   \n",
-       "11  I remember watching the American news media go...         fareesh   <NA>   \n",
-       "12  This article is incorrectly using the current ...       stale2002   <NA>   \n",
-       "13  In the firm I made my internship, we have to u...    iserlohnmage   <NA>   \n",
-       "14  The main reason it requires unsafe is for memo...           comex   <NA>   \n",
-       "15                                               <NA>    todsacerdoti     48   \n",
-       "16     you have to auth again when you use apple pay.        empath75   <NA>   \n",
-       "17  It goes consumer grade, automotive, military, ...           moftz   <NA>   \n",
-       "18  I don&#x27;t have a link handy but the differe...    KennyBlanken   <NA>   \n",
-       "19  &gt; I don&#x27;t think the use case you menti...     colanderman   <NA>   \n",
-       "20  I think you need to watch it again, because yo...   vladimirralev   <NA>   \n",
-       "21                                               <NA>       joshreads      1   \n",
-       "22                                               <NA>           smb06      1   \n",
-       "23  Except that they clearly never tried to incent...           aenis   <NA>   \n",
-       "24                                               <NA>  GiraffeNecktie    249   \n",
-       "\n",
-       "                    timestamp     type  \n",
-       "0   2021-06-26 02:37:56+00:00  comment  \n",
-       "1   2023-04-19 15:56:34+00:00  comment  \n",
-       "2   2018-10-16 01:07:55+00:00  comment  \n",
-       "3   2015-08-10 16:05:54+00:00  comment  \n",
-       "4   2020-10-05 11:20:51+00:00  comment  \n",
-       "5   2020-09-01 15:38:50+00:00  comment  \n",
-       "6   2011-09-27 22:43:27+00:00    story  \n",
-       "7   2011-09-16 19:02:01+00:00  comment  \n",
-       "8   2012-10-15 13:09:32+00:00    story  \n",
-       "9   2023-04-21 16:45:13+00:00  comment  \n",
-       "10  2023-01-19 20:05:54+00:00  comment  \n",
-       "11  2019-06-17 19:49:17+00:00  comment  \n",
-       "12  2018-03-18 18:57:21+00:00  comment  \n",
-       "13  2019-10-22 10:41:01+00:00  comment  \n",
-       "14  2017-05-05 20:45:37+00:00  comment  \n",
-       "15  2024-07-12 18:39:52+00:00    story  \n",
-       "16  2017-09-12 18:58:20+00:00  comment  \n",
-       "17  2021-04-13 01:24:03+00:00  comment  \n",
-       "18  2022-05-13 16:08:38+00:00  comment  \n",
-       "19  2017-09-28 05:16:06+00:00  comment  \n",
-       "20  2018-12-07 11:25:52+00:00  comment  \n",
-       "21  2014-04-08 13:29:50+00:00    story  \n",
-       "22  2017-02-16 23:26:34+00:00    story  \n",
-       "23  2022-01-31 17:08:57+00:00  comment  \n",
-       "24  2011-04-19 14:25:17+00:00    story  \n",
-       "...\n",
-       "\n",
-       "[3000 rows x 6 columns]"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "hacker_news = bpd.read_gbq(\"bigquery-public-data.hacker_news.full\")[['title', 'text', 'by', 'score', 'timestamp', 'type']].head(3000)\n",
-    "hacker_news"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Then, keep only the rows that have text content:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2556"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "hacker_news_with_texts = hacker_news[hacker_news['text'].isnull() == False]\n",
-    "len(hacker_news_with_texts)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You can get an idea of the input token length by calculating the average string length."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "390.29068857589976"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "hacker_news_with_texts['text'].str.len().mean()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "[Optional] You can raise the confirmation threshold for a smoother experience."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
-    "    bigframes.options.compute.semantic_ops_confirmation_threshold = 5000"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now it's LLM's turn. You want to keep only the rows whose texts are talking about iPhone. This will take several minutes to finish."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "This operation will process about 2556 rows. Proceed? [Y/n]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>title</th>\n",
-       "      <th>text</th>\n",
-       "      <th>by</th>\n",
-       "      <th>score</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>type</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
-       "      <td>archiewood</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-04-21 16:45:13+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>420</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Well last time I got angry down votes for sayi...</td>\n",
-       "      <td>drieddust</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-01-11 19:27:27+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>814</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>New iPhone should be announced on September. L...</td>\n",
-       "      <td>meerita</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2019-07-30 20:54:42+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1515</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Why would this take a week? i(phone)OS was ori...</td>\n",
-       "      <td>TheOtherHobbes</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-06-08 09:25:24+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1562</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&amp;gt;or because Apple drama brings many clicks?...</td>\n",
-       "      <td>weberer</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2022-09-05 13:16:02+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 6 columns</p>\n",
-       "</div>[5 rows x 6 columns in total]"
+      "source": [
+        "df.semantics.search(\"creatures\", query=\"monkey\", top_k = 2, model = text_embedding_model, score_column='similarity score')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GDZeVzFTiouY"
+      },
+      "source": [
+        "Note that you are using a text embedding model this time. This model generates embedding vectors for both your query as well as the values in the search space. The operator then uses BigQuery's built-in VECTOR_SEARCH function to find the nearest neighbors of your query.\n",
+        "\n",
+        "In addition, `score_column` is an optional parameter for storing the distances between the results and your query. If not set, the score column won't be attached to the result."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EXNutIXqiouY"
+      },
+      "source": [
+        "## Semantic Similarity Join"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BhWrhQMjiouY"
+      },
+      "source": [
+        "When you want to perform multiple similarity queries in the same value space, you could use similarity join to simplify your call. For example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cUc7-8O6iouY"
+      },
+      "outputs": [],
+      "source": [
+        "df1 = bpd.DataFrame({'animal': ['monkey', 'spider', 'salmon', 'giraffe', 'sparrow']})\n",
+        "df2 = bpd.DataFrame({'animal': ['scorpion', 'baboon', 'owl', 'elephant', 'tuna']})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "k96WerOviouY"
+      },
+      "source": [
+        "In this example, you want to pick the most related animal from `df2` for each value in `df1`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 253
+        },
+        "id": "wPV5EkfpiouY",
+        "outputId": "4be1211d-0353-4b94-8c27-ebd568e8e104"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "    animal  animal_1  distance\n",
+              "0   monkey    baboon  0.620521\n",
+              "1   spider  scorpion  0.728024\n",
+              "2   salmon      tuna  0.782141\n",
+              "3  giraffe  elephant    0.7135\n",
+              "4  sparrow       owl  0.810864\n",
+              "\n",
+              "[5 rows x 3 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>animal</th>\n",
+              "      <th>animal_1</th>\n",
+              "      <th>distance</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>monkey</td>\n",
+              "      <td>baboon</td>\n",
+              "      <td>0.620521</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>spider</td>\n",
+              "      <td>scorpion</td>\n",
+              "      <td>0.728024</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>salmon</td>\n",
+              "      <td>tuna</td>\n",
+              "      <td>0.782141</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>giraffe</td>\n",
+              "      <td>elephant</td>\n",
+              "      <td>0.7135</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>sparrow</td>\n",
+              "      <td>owl</td>\n",
+              "      <td>0.810864</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 3 columns</p>\n",
+              "</div>[5 rows x 3 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 25
+        }
       ],
-      "text/plain": [
-       "     title                                               text              by  \\\n",
-       "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
-       "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
-       "814   <NA>  New iPhone should be announced on September. L...         meerita   \n",
-       "1515  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
-       "1562  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
-       "\n",
-       "      score                  timestamp     type  \n",
-       "9      <NA>  2023-04-21 16:45:13+00:00  comment  \n",
-       "420    <NA>  2021-01-11 19:27:27+00:00  comment  \n",
-       "814    <NA>  2019-07-30 20:54:42+00:00  comment  \n",
-       "1515   <NA>  2021-06-08 09:25:24+00:00  comment  \n",
-       "1562   <NA>  2022-09-05 13:16:02+00:00  comment  \n",
-       "\n",
-       "[5 rows x 6 columns]"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "iphone_comments=hacker_news_with_texts.semantics.filter(\"The {text} is mainly focused on iPhone\", gemini_model)\n",
-    "iphone_comments"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The performance of the semantic operators depends on the length of your input as well as your quota. Here are our benchmarks for running the previous operation over data of different sizes.\n",
-    "\n",
-    "* 800 Rows -> 1m 21.3s\n",
-    "* 2550 Rows -> 5m 9s\n",
-    "* 8500 Rows -> 16m 34.4s\n",
-    "\n",
-    "These numbers can give you a general idea of how fast the operators run."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now, use LLM to summarize the sentiments towards iPhone:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>title</th>\n",
-       "      <th>text</th>\n",
-       "      <th>by</th>\n",
-       "      <th>score</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>type</th>\n",
-       "      <th>sentiment</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
-       "      <td>archiewood</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-04-21 16:45:13+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "      <td>Frustrated, but hopeful.</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>420</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Well last time I got angry down votes for sayi...</td>\n",
-       "      <td>drieddust</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-01-11 19:27:27+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "      <td>Frustrated and angry.</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>814</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>New iPhone should be announced on September. L...</td>\n",
-       "      <td>meerita</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2019-07-30 20:54:42+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "      <td>Excited anticipation.</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1515</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Why would this take a week? i(phone)OS was ori...</td>\n",
-       "      <td>TheOtherHobbes</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-06-08 09:25:24+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "      <td>Frustrated, critical, obvious.</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1562</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&amp;gt;or because Apple drama brings many clicks?...</td>\n",
-       "      <td>weberer</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2022-09-05 13:16:02+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "      <td>Negative, clickbait, Apple.</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 7 columns</p>\n",
-       "</div>[5 rows x 7 columns in total]"
+      "source": [
+        "df1.semantics.sim_join(df2, left_on='animal', right_on='animal', top_k=1, model=text_embedding_model, score_column='distance')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GplzD7v0iouY"
+      },
+      "source": [
+        "!! **Important** Like semantic join, this operator can also be very expensive. To guard against unexpected processing of large dataset, use the `bigframes.options.compute.sem_ops_confirmation_threshold` option to specify a threshold."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uG6FyMH_iouY"
+      },
+      "source": [
+        "## Semantic Cluster"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uIh3ViNciouY"
+      },
+      "source": [
+        "Semantic Cluster group similar values together. For example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jyQ_aT9qiouY"
+      },
+      "outputs": [],
+      "source": [
+        "df = bpd.DataFrame({'Product': ['Smartphone', 'Laptop', 'Coffee Maker', 'T-shirt', 'Jeans']})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "K3IMIFrtiouY"
+      },
+      "source": [
+        "You want to cluster these products into 3 groups:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 253
+        },
+        "id": "0Tc0DqXJiouY",
+        "outputId": "1c8b6e28-713c-4666-e623-3b2c42c50b30"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "        Product  Cluster ID\n",
+              "0    Smartphone           1\n",
+              "1        Laptop           1\n",
+              "2  Coffee Maker           1\n",
+              "3       T-shirt           1\n",
+              "4         Jeans           1\n",
+              "\n",
+              "[5 rows x 2 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Product</th>\n",
+              "      <th>Cluster ID</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Smartphone</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Laptop</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Coffee Maker</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>T-shirt</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Jeans</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 2 columns</p>\n",
+              "</div>[5 rows x 2 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 27
+        }
       ],
-      "text/plain": [
-       "     title                                               text              by  \\\n",
-       "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
-       "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
-       "814   <NA>  New iPhone should be announced on September. L...         meerita   \n",
-       "1515  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
-       "1562  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
-       "\n",
-       "      score                  timestamp     type  \\\n",
-       "9      <NA>  2023-04-21 16:45:13+00:00  comment   \n",
-       "420    <NA>  2021-01-11 19:27:27+00:00  comment   \n",
-       "814    <NA>  2019-07-30 20:54:42+00:00  comment   \n",
-       "1515   <NA>  2021-06-08 09:25:24+00:00  comment   \n",
-       "1562   <NA>  2022-09-05 13:16:02+00:00  comment   \n",
-       "\n",
-       "                             sentiment  \n",
-       "9           Frustrated, but hopeful. \n",
-       "  \n",
-       "420            Frustrated and angry. \n",
-       "  \n",
-       "814            Excited anticipation. \n",
-       "  \n",
-       "1515  Frustrated, critical, obvious. \n",
-       "  \n",
-       "1562     Negative, clickbait, Apple. \n",
-       "  \n",
-       "\n",
-       "[5 rows x 7 columns]"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "iphone_comments.semantics.map(\"Summarize the sentiment of the {text}. Your answer should have at most 3 words\", output_column=\"sentiment\", model=gemini_model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Here is another example: count the number of rows whose authors have animals in their names."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3577: UserWarning: Reading cached table from 2025-01-03 01:18:29.080474+00:00 to avoid incompatibilies with previous reads of this table. To read the latest version, set `use_cache=False` or close the current session with Session.close() or bigframes.pandas.close_session().\n",
-      "  exec(code_obj, self.user_global_ns, self.user_ns)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>title</th>\n",
-       "      <th>text</th>\n",
-       "      <th>by</th>\n",
-       "      <th>score</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>type</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Well, most people aren&amp;#x27;t alcoholics, so I...</td>\n",
-       "      <td>slipframe</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-06-26 02:37:56+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>No, you don&amp;#x27;t really &lt;i&gt;need&lt;/i&gt; a smartp...</td>\n",
-       "      <td>vetinari</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-04-19 15:56:34+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>It&amp;#x27;s for the late Paul Allen RIP. Should&amp;...</td>\n",
-       "      <td>lsr_ssri</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2018-10-16 01:07:55+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Yup they are dangerous. Be careful Donald Trump.</td>\n",
-       "      <td>Sven7</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2015-08-10 16:05:54+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Sure, it&amp;#x27;s totally reasonable. Just point...</td>\n",
-       "      <td>nicoburns</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2020-10-05 11:20:51+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I wonder how long before special forces start ...</td>\n",
-       "      <td>autisticcurio</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2020-09-01 15:38:50+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>The Impending NY Tech Apocalypse: Here's What ...</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>gaoprea</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2011-09-27 22:43:27+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Where would you relocate to? I'm assuming that...</td>\n",
-       "      <td>pavel_lishin</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2011-09-16 19:02:01+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>Eureca beta is live. A place for your business...</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>ricardos</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2012-10-15 13:09:32+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
-       "      <td>archiewood</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-04-21 16:45:13+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I guess I don’t see the relevance. Vegans eat ...</td>\n",
-       "      <td>stevula</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-01-19 20:05:54+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I remember watching the American news media go...</td>\n",
-       "      <td>fareesh</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2019-06-17 19:49:17+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>This article is incorrectly using the current ...</td>\n",
-       "      <td>stale2002</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2018-03-18 18:57:21+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>In the firm I made my internship, we have to u...</td>\n",
-       "      <td>iserlohnmage</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2019-10-22 10:41:01+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>The main reason it requires unsafe is for memo...</td>\n",
-       "      <td>comex</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2017-05-05 20:45:37+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>Discord vs. IRC Rough Notes</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>todsacerdoti</td>\n",
-       "      <td>48</td>\n",
-       "      <td>2024-07-12 18:39:52+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>you have to auth again when you use apple pay.</td>\n",
-       "      <td>empath75</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2017-09-12 18:58:20+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>It goes consumer grade, automotive, military, ...</td>\n",
-       "      <td>moftz</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-04-13 01:24:03+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I don&amp;#x27;t have a link handy but the differe...</td>\n",
-       "      <td>KennyBlanken</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2022-05-13 16:08:38+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&amp;gt; I don&amp;#x27;t think the use case you menti...</td>\n",
-       "      <td>colanderman</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2017-09-28 05:16:06+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I think you need to watch it again, because yo...</td>\n",
-       "      <td>vladimirralev</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2018-12-07 11:25:52+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>Oh dear: new Yahoo anti-spoofing measures brea...</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>joshreads</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2014-04-08 13:29:50+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>How Much Warmer Was Your City in 2016?</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>smb06</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2017-02-16 23:26:34+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Except that they clearly never tried to incent...</td>\n",
-       "      <td>aenis</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2022-01-31 17:08:57+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>Working Best at Coffee Shops</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>GiraffeNecktie</td>\n",
-       "      <td>249</td>\n",
-       "      <td>2011-04-19 14:25:17+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>25 rows × 6 columns</p>\n",
-       "</div>[3000 rows x 6 columns in total]"
+      "source": [
+        "df.semantics.cluster_by(column='Product', output_column='Cluster ID', model=text_embedding_model, n_clusters=3)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zWIzYX3niouY"
+      },
+      "source": [
+        "This operator uses the the embedding model to generate vectors for each value, and then the KMeans algorithm for clustering."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hgj8GoQhiouY"
+      },
+      "source": [
+        "# Performance Analyses"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EZomL0BciouY"
+      },
+      "source": [
+        "In this section, you will use BigQuery's public data of hacker news to perform some heavy work. We recommend you to check the code without executing them in order to save your time and money. The execution results are attached after each cell for your reference.\n",
+        "\n",
+        "First, load 3k rows from the table:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 880
+        },
+        "id": "wRR0SrcSiouY",
+        "outputId": "3b25f3a3-09c7-4396-9107-4aa4cdb4b963"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                title  \\\n",
+              "0                                                <NA>   \n",
+              "1                                                <NA>   \n",
+              "2                                                <NA>   \n",
+              "3                                                <NA>   \n",
+              "4                                                <NA>   \n",
+              "5                                                <NA>   \n",
+              "6   The Impending NY Tech Apocalypse: Here's What ...   \n",
+              "7                                                <NA>   \n",
+              "8   Eureca beta is live. A place for your business...   \n",
+              "9                                                <NA>   \n",
+              "10                                               <NA>   \n",
+              "11                                               <NA>   \n",
+              "12                                               <NA>   \n",
+              "13                                               <NA>   \n",
+              "14                                               <NA>   \n",
+              "15                        Discord vs. IRC Rough Notes   \n",
+              "16                                               <NA>   \n",
+              "17                                               <NA>   \n",
+              "18                                               <NA>   \n",
+              "19                                               <NA>   \n",
+              "20                                               <NA>   \n",
+              "21  Oh dear: new Yahoo anti-spoofing measures brea...   \n",
+              "22             How Much Warmer Was Your City in 2016?   \n",
+              "23                                               <NA>   \n",
+              "24                       Working Best at Coffee Shops   \n",
+              "\n",
+              "                                                 text              by  score  \\\n",
+              "0   Well, most people aren&#x27;t alcoholics, so I...       slipframe   <NA>   \n",
+              "1   No, you don&#x27;t really <i>need</i> a smartp...        vetinari   <NA>   \n",
+              "2   It&#x27;s for the late Paul Allen RIP. Should&...        lsr_ssri   <NA>   \n",
+              "3    Yup they are dangerous. Be careful Donald Trump.           Sven7   <NA>   \n",
+              "4   Sure, it&#x27;s totally reasonable. Just point...       nicoburns   <NA>   \n",
+              "5   I wonder how long before special forces start ...   autisticcurio   <NA>   \n",
+              "6                                                <NA>         gaoprea      3   \n",
+              "7   Where would you relocate to? I'm assuming that...    pavel_lishin   <NA>   \n",
+              "8                                                <NA>        ricardos      1   \n",
+              "9   It doesn’t work on Safari, and WebKit based br...      archiewood   <NA>   \n",
+              "10  I guess I don’t see the relevance. Vegans eat ...         stevula   <NA>   \n",
+              "11  I remember watching the American news media go...         fareesh   <NA>   \n",
+              "12  This article is incorrectly using the current ...       stale2002   <NA>   \n",
+              "13  In the firm I made my internship, we have to u...    iserlohnmage   <NA>   \n",
+              "14  The main reason it requires unsafe is for memo...           comex   <NA>   \n",
+              "15                                               <NA>    todsacerdoti     48   \n",
+              "16     you have to auth again when you use apple pay.        empath75   <NA>   \n",
+              "17  It goes consumer grade, automotive, military, ...           moftz   <NA>   \n",
+              "18  I don&#x27;t have a link handy but the differe...    KennyBlanken   <NA>   \n",
+              "19  &gt; I don&#x27;t think the use case you menti...     colanderman   <NA>   \n",
+              "20  I think you need to watch it again, because yo...   vladimirralev   <NA>   \n",
+              "21                                               <NA>       joshreads      1   \n",
+              "22                                               <NA>           smb06      1   \n",
+              "23  Except that they clearly never tried to incent...           aenis   <NA>   \n",
+              "24                                               <NA>  GiraffeNecktie    249   \n",
+              "\n",
+              "                    timestamp     type  \n",
+              "0   2021-06-26 02:37:56+00:00  comment  \n",
+              "1   2023-04-19 15:56:34+00:00  comment  \n",
+              "2   2018-10-16 01:07:55+00:00  comment  \n",
+              "3   2015-08-10 16:05:54+00:00  comment  \n",
+              "4   2020-10-05 11:20:51+00:00  comment  \n",
+              "5   2020-09-01 15:38:50+00:00  comment  \n",
+              "6   2011-09-27 22:43:27+00:00    story  \n",
+              "7   2011-09-16 19:02:01+00:00  comment  \n",
+              "8   2012-10-15 13:09:32+00:00    story  \n",
+              "9   2023-04-21 16:45:13+00:00  comment  \n",
+              "10  2023-01-19 20:05:54+00:00  comment  \n",
+              "11  2019-06-17 19:49:17+00:00  comment  \n",
+              "12  2018-03-18 18:57:21+00:00  comment  \n",
+              "13  2019-10-22 10:41:01+00:00  comment  \n",
+              "14  2017-05-05 20:45:37+00:00  comment  \n",
+              "15  2024-07-12 18:39:52+00:00    story  \n",
+              "16  2017-09-12 18:58:20+00:00  comment  \n",
+              "17  2021-04-13 01:24:03+00:00  comment  \n",
+              "18  2022-05-13 16:08:38+00:00  comment  \n",
+              "19  2017-09-28 05:16:06+00:00  comment  \n",
+              "20  2018-12-07 11:25:52+00:00  comment  \n",
+              "21  2014-04-08 13:29:50+00:00    story  \n",
+              "22  2017-02-16 23:26:34+00:00    story  \n",
+              "23  2022-01-31 17:08:57+00:00  comment  \n",
+              "24  2011-04-19 14:25:17+00:00    story  \n",
+              "...\n",
+              "\n",
+              "[3000 rows x 6 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>title</th>\n",
+              "      <th>text</th>\n",
+              "      <th>by</th>\n",
+              "      <th>score</th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>type</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Well, most people aren&amp;#x27;t alcoholics, so I...</td>\n",
+              "      <td>slipframe</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-06-26 02:37:56+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>No, you don&amp;#x27;t really &lt;i&gt;need&lt;/i&gt; a smartp...</td>\n",
+              "      <td>vetinari</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-04-19 15:56:34+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It&amp;#x27;s for the late Paul Allen RIP. Should&amp;...</td>\n",
+              "      <td>lsr_ssri</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2018-10-16 01:07:55+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Yup they are dangerous. Be careful Donald Trump.</td>\n",
+              "      <td>Sven7</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2015-08-10 16:05:54+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Sure, it&amp;#x27;s totally reasonable. Just point...</td>\n",
+              "      <td>nicoburns</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-10-05 11:20:51+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I wonder how long before special forces start ...</td>\n",
+              "      <td>autisticcurio</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-09-01 15:38:50+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>The Impending NY Tech Apocalypse: Here's What ...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>gaoprea</td>\n",
+              "      <td>3</td>\n",
+              "      <td>2011-09-27 22:43:27+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>7</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Where would you relocate to? I'm assuming that...</td>\n",
+              "      <td>pavel_lishin</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2011-09-16 19:02:01+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>Eureca beta is live. A place for your business...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>ricardos</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2012-10-15 13:09:32+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
+              "      <td>archiewood</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-04-21 16:45:13+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I guess I don’t see the relevance. Vegans eat ...</td>\n",
+              "      <td>stevula</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-01-19 20:05:54+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>11</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I remember watching the American news media go...</td>\n",
+              "      <td>fareesh</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-06-17 19:49:17+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>12</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>This article is incorrectly using the current ...</td>\n",
+              "      <td>stale2002</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2018-03-18 18:57:21+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>13</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>In the firm I made my internship, we have to u...</td>\n",
+              "      <td>iserlohnmage</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-10-22 10:41:01+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>14</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>The main reason it requires unsafe is for memo...</td>\n",
+              "      <td>comex</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-05-05 20:45:37+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>15</th>\n",
+              "      <td>Discord vs. IRC Rough Notes</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>todsacerdoti</td>\n",
+              "      <td>48</td>\n",
+              "      <td>2024-07-12 18:39:52+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>you have to auth again when you use apple pay.</td>\n",
+              "      <td>empath75</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-09-12 18:58:20+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It goes consumer grade, automotive, military, ...</td>\n",
+              "      <td>moftz</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-04-13 01:24:03+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>18</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I don&amp;#x27;t have a link handy but the differe...</td>\n",
+              "      <td>KennyBlanken</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-05-13 16:08:38+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>19</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; I don&amp;#x27;t think the use case you menti...</td>\n",
+              "      <td>colanderman</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-09-28 05:16:06+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>20</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I think you need to watch it again, because yo...</td>\n",
+              "      <td>vladimirralev</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2018-12-07 11:25:52+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>21</th>\n",
+              "      <td>Oh dear: new Yahoo anti-spoofing measures brea...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>joshreads</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2014-04-08 13:29:50+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>22</th>\n",
+              "      <td>How Much Warmer Was Your City in 2016?</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>smb06</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2017-02-16 23:26:34+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>23</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Except that they clearly never tried to incent...</td>\n",
+              "      <td>aenis</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-01-31 17:08:57+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>24</th>\n",
+              "      <td>Working Best at Coffee Shops</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>GiraffeNecktie</td>\n",
+              "      <td>249</td>\n",
+              "      <td>2011-04-19 14:25:17+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>25 rows × 6 columns</p>\n",
+              "</div>[3000 rows x 6 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 9
+        }
       ],
-      "text/plain": [
-       "                                                title  \\\n",
-       "0                                                <NA>   \n",
-       "1                                                <NA>   \n",
-       "2                                                <NA>   \n",
-       "3                                                <NA>   \n",
-       "4                                                <NA>   \n",
-       "5                                                <NA>   \n",
-       "6   The Impending NY Tech Apocalypse: Here's What ...   \n",
-       "7                                                <NA>   \n",
-       "8   Eureca beta is live. A place for your business...   \n",
-       "9                                                <NA>   \n",
-       "10                                               <NA>   \n",
-       "11                                               <NA>   \n",
-       "12                                               <NA>   \n",
-       "13                                               <NA>   \n",
-       "14                                               <NA>   \n",
-       "15                        Discord vs. IRC Rough Notes   \n",
-       "16                                               <NA>   \n",
-       "17                                               <NA>   \n",
-       "18                                               <NA>   \n",
-       "19                                               <NA>   \n",
-       "20                                               <NA>   \n",
-       "21  Oh dear: new Yahoo anti-spoofing measures brea...   \n",
-       "22             How Much Warmer Was Your City in 2016?   \n",
-       "23                                               <NA>   \n",
-       "24                       Working Best at Coffee Shops   \n",
-       "\n",
-       "                                                 text              by  score  \\\n",
-       "0   Well, most people aren&#x27;t alcoholics, so I...       slipframe   <NA>   \n",
-       "1   No, you don&#x27;t really <i>need</i> a smartp...        vetinari   <NA>   \n",
-       "2   It&#x27;s for the late Paul Allen RIP. Should&...        lsr_ssri   <NA>   \n",
-       "3    Yup they are dangerous. Be careful Donald Trump.           Sven7   <NA>   \n",
-       "4   Sure, it&#x27;s totally reasonable. Just point...       nicoburns   <NA>   \n",
-       "5   I wonder how long before special forces start ...   autisticcurio   <NA>   \n",
-       "6                                                <NA>         gaoprea      3   \n",
-       "7   Where would you relocate to? I'm assuming that...    pavel_lishin   <NA>   \n",
-       "8                                                <NA>        ricardos      1   \n",
-       "9   It doesn’t work on Safari, and WebKit based br...      archiewood   <NA>   \n",
-       "10  I guess I don’t see the relevance. Vegans eat ...         stevula   <NA>   \n",
-       "11  I remember watching the American news media go...         fareesh   <NA>   \n",
-       "12  This article is incorrectly using the current ...       stale2002   <NA>   \n",
-       "13  In the firm I made my internship, we have to u...    iserlohnmage   <NA>   \n",
-       "14  The main reason it requires unsafe is for memo...           comex   <NA>   \n",
-       "15                                               <NA>    todsacerdoti     48   \n",
-       "16     you have to auth again when you use apple pay.        empath75   <NA>   \n",
-       "17  It goes consumer grade, automotive, military, ...           moftz   <NA>   \n",
-       "18  I don&#x27;t have a link handy but the differe...    KennyBlanken   <NA>   \n",
-       "19  &gt; I don&#x27;t think the use case you menti...     colanderman   <NA>   \n",
-       "20  I think you need to watch it again, because yo...   vladimirralev   <NA>   \n",
-       "21                                               <NA>       joshreads      1   \n",
-       "22                                               <NA>           smb06      1   \n",
-       "23  Except that they clearly never tried to incent...           aenis   <NA>   \n",
-       "24                                               <NA>  GiraffeNecktie    249   \n",
-       "\n",
-       "                    timestamp     type  \n",
-       "0   2021-06-26 02:37:56+00:00  comment  \n",
-       "1   2023-04-19 15:56:34+00:00  comment  \n",
-       "2   2018-10-16 01:07:55+00:00  comment  \n",
-       "3   2015-08-10 16:05:54+00:00  comment  \n",
-       "4   2020-10-05 11:20:51+00:00  comment  \n",
-       "5   2020-09-01 15:38:50+00:00  comment  \n",
-       "6   2011-09-27 22:43:27+00:00    story  \n",
-       "7   2011-09-16 19:02:01+00:00  comment  \n",
-       "8   2012-10-15 13:09:32+00:00    story  \n",
-       "9   2023-04-21 16:45:13+00:00  comment  \n",
-       "10  2023-01-19 20:05:54+00:00  comment  \n",
-       "11  2019-06-17 19:49:17+00:00  comment  \n",
-       "12  2018-03-18 18:57:21+00:00  comment  \n",
-       "13  2019-10-22 10:41:01+00:00  comment  \n",
-       "14  2017-05-05 20:45:37+00:00  comment  \n",
-       "15  2024-07-12 18:39:52+00:00    story  \n",
-       "16  2017-09-12 18:58:20+00:00  comment  \n",
-       "17  2021-04-13 01:24:03+00:00  comment  \n",
-       "18  2022-05-13 16:08:38+00:00  comment  \n",
-       "19  2017-09-28 05:16:06+00:00  comment  \n",
-       "20  2018-12-07 11:25:52+00:00  comment  \n",
-       "21  2014-04-08 13:29:50+00:00    story  \n",
-       "22  2017-02-16 23:26:34+00:00    story  \n",
-       "23  2022-01-31 17:08:57+00:00  comment  \n",
-       "24  2011-04-19 14:25:17+00:00    story  \n",
-       "...\n",
-       "\n",
-       "[3000 rows x 6 columns]"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "hacker_news = bpd.read_gbq(\"bigquery-public-data.hacker_news.full\")[['title', 'text', 'by', 'score', 'timestamp', 'type']].head(3000)\n",
-    "hacker_news"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "This operation will process about 3000 rows. Proceed? [Y/n]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:109: PreviewWarning: Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>title</th>\n",
-       "      <th>text</th>\n",
-       "      <th>by</th>\n",
-       "      <th>score</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>type</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>Working Best at Coffee Shops</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>GiraffeNecktie</td>\n",
-       "      <td>249</td>\n",
-       "      <td>2011-04-19 14:25:17+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>98</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>i resisted switching to chrome for months beca...</td>\n",
-       "      <td>catshirt</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2011-04-06 08:02:24+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>137</th>\n",
-       "      <td>FDA reverses marketing ban on Juul e-cigarettes</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>anigbrowl</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2024-06-06 16:42:40+00:00</td>\n",
-       "      <td>story</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>188</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I think it&amp;#x27;s more than hazing. It may be ...</td>\n",
-       "      <td>bayesianhorse</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2015-06-18 16:42:53+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>209</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I like the idea of moving that arrow the way h...</td>\n",
-       "      <td>rattray</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2015-06-08 02:15:30+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>228</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I don&amp;#x27;t understand why a beginner would s...</td>\n",
-       "      <td>wolco</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2019-02-03 14:35:43+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>290</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I leaerned more with one minute of this than a...</td>\n",
-       "      <td>agumonkey</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2016-07-16 06:19:39+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>303</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I've suggested a &lt;i&gt;rationale&lt;/i&gt; for the tabo...</td>\n",
-       "      <td>mechanical_fish</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2008-12-17 04:42:02+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>312</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Do you have any reference for this?&lt;p&gt;I&amp;#x27;m...</td>\n",
-       "      <td>banashark</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-11-13 19:57:00+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>322</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Default search scope is an option in the Finde...</td>\n",
-       "      <td>kitsunesoba</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2017-08-13 17:15:19+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>391</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Orthogonality and biology aren&amp;#x27;t friends.</td>\n",
-       "      <td>agumonkey</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2016-04-24 16:33:41+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>I chose some random physics book that was good...</td>\n",
-       "      <td>prawn</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2011-03-27 22:29:51+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>424</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Seeing this get huge on Twitter. It&amp;#x27;s the...</td>\n",
-       "      <td>shenanigoat</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2016-01-09 03:04:22+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>428</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Looking through the comments there are a numbe...</td>\n",
-       "      <td>moomin</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2024-10-01 14:37:04+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>429</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Legacy media is a tough business. GBTC is payi...</td>\n",
-       "      <td>arcticbull</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-04-16 16:30:33+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>436</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Same thing if you sell unsafe food, yet we hav...</td>\n",
-       "      <td>jabradoodle</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-08-03 20:47:52+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>438</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>There was briefly a thing called HSCSD (&amp;quot;...</td>\n",
-       "      <td>LeoPanthera</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2019-02-11 19:49:29+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>446</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&amp;gt; This article is a bit comical to read and...</td>\n",
-       "      <td>lapcat</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-01-02 16:00:49+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>453</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Large positions are most likely sold off in sm...</td>\n",
-       "      <td>meowkit</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-01-27 23:22:48+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>507</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>A US-based VPN (or really any VPN) is only goi...</td>\n",
-       "      <td>RandomBacon</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2019-04-05 00:58:58+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>543</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;a href=\"https:&amp;#x2F;&amp;#x2F;codeberg.org&amp;#x2F;A...</td>\n",
-       "      <td>ElectronBadger</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2023-12-13 08:13:15+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>565</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>It’s much harder for people without hands to w...</td>\n",
-       "      <td>Aeolun</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2024-05-03 11:58:13+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>612</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>So by using ADMIN_SL0T instead was it just set...</td>\n",
-       "      <td>minitoar</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2021-03-05 16:07:56+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>660</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>Outstanding!</td>\n",
-       "      <td>cafard</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2022-06-09 09:51:54+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>673</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>On the other hand, something can be said for &amp;...</td>\n",
-       "      <td>babby</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2013-08-12 00:31:02+00:00</td>\n",
-       "      <td>comment</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>25 rows × 6 columns</p>\n",
-       "</div>[123 rows x 6 columns in total]"
+      "source": [
+        "hacker_news = bpd.read_gbq(\"bigquery-public-data.hacker_news.full\")[['title', 'text', 'by', 'score', 'timestamp', 'type']].head(3000)\n",
+        "hacker_news"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3e94DPOdiouY"
+      },
+      "source": [
+        "Then, keep only the rows that have text content:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "mQl8hc1biouY",
+        "outputId": "2b4ffa85-9d95-4a20-9040-0420c67da2d4"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "2556"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 10
+        }
       ],
-      "text/plain": [
-       "                                               title  \\\n",
-       "24                      Working Best at Coffee Shops   \n",
-       "98                                              <NA>   \n",
-       "137  FDA reverses marketing ban on Juul e-cigarettes   \n",
-       "188                                             <NA>   \n",
-       "209                                             <NA>   \n",
-       "228                                             <NA>   \n",
-       "290                                             <NA>   \n",
-       "303                                             <NA>   \n",
-       "312                                             <NA>   \n",
-       "322                                             <NA>   \n",
-       "391                                             <NA>   \n",
-       "396                                             <NA>   \n",
-       "424                                             <NA>   \n",
-       "428                                             <NA>   \n",
-       "429                                             <NA>   \n",
-       "436                                             <NA>   \n",
-       "438                                             <NA>   \n",
-       "446                                             <NA>   \n",
-       "453                                             <NA>   \n",
-       "507                                             <NA>   \n",
-       "543                                             <NA>   \n",
-       "565                                             <NA>   \n",
-       "612                                             <NA>   \n",
-       "660                                             <NA>   \n",
-       "673                                             <NA>   \n",
-       "\n",
-       "                                                  text               by  \\\n",
-       "24                                                <NA>   GiraffeNecktie   \n",
-       "98   i resisted switching to chrome for months beca...         catshirt   \n",
-       "137                                               <NA>        anigbrowl   \n",
-       "188  I think it&#x27;s more than hazing. It may be ...    bayesianhorse   \n",
-       "209  I like the idea of moving that arrow the way h...          rattray   \n",
-       "228  I don&#x27;t understand why a beginner would s...            wolco   \n",
-       "290  I leaerned more with one minute of this than a...        agumonkey   \n",
-       "303  I've suggested a <i>rationale</i> for the tabo...  mechanical_fish   \n",
-       "312  Do you have any reference for this?<p>I&#x27;m...        banashark   \n",
-       "322  Default search scope is an option in the Finde...      kitsunesoba   \n",
-       "391     Orthogonality and biology aren&#x27;t friends.        agumonkey   \n",
-       "396  I chose some random physics book that was good...            prawn   \n",
-       "424  Seeing this get huge on Twitter. It&#x27;s the...      shenanigoat   \n",
-       "428  Looking through the comments there are a numbe...           moomin   \n",
-       "429  Legacy media is a tough business. GBTC is payi...       arcticbull   \n",
-       "436  Same thing if you sell unsafe food, yet we hav...      jabradoodle   \n",
-       "438  There was briefly a thing called HSCSD (&quot;...      LeoPanthera   \n",
-       "446  &gt; This article is a bit comical to read and...           lapcat   \n",
-       "453  Large positions are most likely sold off in sm...          meowkit   \n",
-       "507  A US-based VPN (or really any VPN) is only goi...      RandomBacon   \n",
-       "543  <a href=\"https:&#x2F;&#x2F;codeberg.org&#x2F;A...   ElectronBadger   \n",
-       "565  It’s much harder for people without hands to w...           Aeolun   \n",
-       "612  So by using ADMIN_SL0T instead was it just set...         minitoar   \n",
-       "660                                       Outstanding!           cafard   \n",
-       "673  On the other hand, something can be said for &...            babby   \n",
-       "\n",
-       "     score                  timestamp     type  \n",
-       "24     249  2011-04-19 14:25:17+00:00    story  \n",
-       "98    <NA>  2011-04-06 08:02:24+00:00  comment  \n",
-       "137      2  2024-06-06 16:42:40+00:00    story  \n",
-       "188   <NA>  2015-06-18 16:42:53+00:00  comment  \n",
-       "209   <NA>  2015-06-08 02:15:30+00:00  comment  \n",
-       "228   <NA>  2019-02-03 14:35:43+00:00  comment  \n",
-       "290   <NA>  2016-07-16 06:19:39+00:00  comment  \n",
-       "303   <NA>  2008-12-17 04:42:02+00:00  comment  \n",
-       "312   <NA>  2023-11-13 19:57:00+00:00  comment  \n",
-       "322   <NA>  2017-08-13 17:15:19+00:00  comment  \n",
-       "391   <NA>  2016-04-24 16:33:41+00:00  comment  \n",
-       "396   <NA>  2011-03-27 22:29:51+00:00  comment  \n",
-       "424   <NA>  2016-01-09 03:04:22+00:00  comment  \n",
-       "428   <NA>  2024-10-01 14:37:04+00:00  comment  \n",
-       "429   <NA>  2021-04-16 16:30:33+00:00  comment  \n",
-       "436   <NA>  2023-08-03 20:47:52+00:00  comment  \n",
-       "438   <NA>  2019-02-11 19:49:29+00:00  comment  \n",
-       "446   <NA>  2023-01-02 16:00:49+00:00  comment  \n",
-       "453   <NA>  2021-01-27 23:22:48+00:00  comment  \n",
-       "507   <NA>  2019-04-05 00:58:58+00:00  comment  \n",
-       "543   <NA>  2023-12-13 08:13:15+00:00  comment  \n",
-       "565   <NA>  2024-05-03 11:58:13+00:00  comment  \n",
-       "612   <NA>  2021-03-05 16:07:56+00:00  comment  \n",
-       "660   <NA>  2022-06-09 09:51:54+00:00  comment  \n",
-       "673   <NA>  2013-08-12 00:31:02+00:00  comment  \n",
-       "...\n",
-       "\n",
-       "[123 rows x 6 columns]"
-      ]
-     },
-     "execution_count": 33,
-     "metadata": {},
-     "output_type": "execute_result"
+      "source": [
+        "hacker_news_with_texts = hacker_news[hacker_news['text'].isnull() == False]\n",
+        "len(hacker_news_with_texts)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JWalDtLDiouZ"
+      },
+      "source": [
+        "You can get an idea of the input token length by calculating the average string length."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "PZeg4LCUiouZ",
+        "outputId": "05b67cac-6b3d-42ef-d6d6-b578a9734f4c"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "390.05125195618155"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 11
+        }
+      ],
+      "source": [
+        "hacker_news_with_texts['text'].str.len().mean()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2IXqskHHiouZ"
+      },
+      "source": [
+        "**Optional**: You can raise the confirmation threshold for a smoother experience."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EpjXQ4FViouZ"
+      },
+      "outputs": [],
+      "source": [
+        "if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
+        "    bigframes.options.compute.semantic_ops_confirmation_threshold = 5000"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SYFB-X1RiouZ"
+      },
+      "source": [
+        "Now it's LLM's turn. You want to keep only the rows whose texts are talking about iPhone. This will take several minutes to finish."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 253
+        },
+        "id": "rditQlmoiouZ",
+        "outputId": "2b44dcbf-2ef5-4119-ca05-9b082db9c0c1"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "     title                                               text              by  \\\n",
+              "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
+              "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
+              "815   <NA>  New iPhone should be announced on September. L...         meerita   \n",
+              "1516  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
+              "1563  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
+              "\n",
+              "      score                  timestamp     type  \n",
+              "9      <NA>  2023-04-21 16:45:13+00:00  comment  \n",
+              "420    <NA>  2021-01-11 19:27:27+00:00  comment  \n",
+              "815    <NA>  2019-07-30 20:54:42+00:00  comment  \n",
+              "1516   <NA>  2021-06-08 09:25:24+00:00  comment  \n",
+              "1563   <NA>  2022-09-05 13:16:02+00:00  comment  \n",
+              "\n",
+              "[5 rows x 6 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>title</th>\n",
+              "      <th>text</th>\n",
+              "      <th>by</th>\n",
+              "      <th>score</th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>type</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
+              "      <td>archiewood</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-04-21 16:45:13+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>420</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Well last time I got angry down votes for sayi...</td>\n",
+              "      <td>drieddust</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-01-11 19:27:27+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>815</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>New iPhone should be announced on September. L...</td>\n",
+              "      <td>meerita</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-07-30 20:54:42+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1516</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Why would this take a week? i(phone)OS was ori...</td>\n",
+              "      <td>TheOtherHobbes</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-06-08 09:25:24+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1563</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt;or because Apple drama brings many clicks?...</td>\n",
+              "      <td>weberer</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-09-05 13:16:02+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 6 columns</p>\n",
+              "</div>[5 rows x 6 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
+      ],
+      "source": [
+        "iphone_comments = hacker_news_with_texts.semantics.filter(\"The {text} is mainly focused on iPhone\", gemini_model)\n",
+        "iphone_comments"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yl24sJFIiouZ"
+      },
+      "source": [
+        "The performance of the semantic operators depends on the length of your input as well as your quota. Here are our benchmarks for running the previous operation over data of different sizes. Here are the estimates supposing your quota is [the default 200 requests per minute](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas):\n",
+        "\n",
+        "* 800 Rows -> ~4m\n",
+        "* 2550 Rows -> ~13m\n",
+        "* 8500 Rows -> ~40m\n",
+        "\n",
+        "These numbers can give you a general idea of how fast the operators run."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eo4nfISuiouZ"
+      },
+      "source": [
+        "Now, use LLM to summarize the sentiments towards iPhone:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 253
+        },
+        "id": "IlKBrNxUiouZ",
+        "outputId": "818d01e4-1cdf-42a2-9e02-61c4736a8905"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "     title                                               text              by  \\\n",
+              "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
+              "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
+              "815   <NA>  New iPhone should be announced on September. L...         meerita   \n",
+              "1516  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
+              "1563  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
+              "\n",
+              "      score                  timestamp     type  \\\n",
+              "9      <NA>  2023-04-21 16:45:13+00:00  comment   \n",
+              "420    <NA>  2021-01-11 19:27:27+00:00  comment   \n",
+              "815    <NA>  2019-07-30 20:54:42+00:00  comment   \n",
+              "1516   <NA>  2021-06-08 09:25:24+00:00  comment   \n",
+              "1563   <NA>  2022-09-05 13:16:02+00:00  comment   \n",
+              "\n",
+              "                             sentiment  \n",
+              "9           Frustrated, but hopeful. \n",
+              "  \n",
+              "420            Frustrated and angry. \n",
+              "  \n",
+              "815            Excited anticipation. \n",
+              "  \n",
+              "1516  Frustrated, critical, obvious. \n",
+              "  \n",
+              "1563     Negative, clickbait, Apple. \n",
+              "  \n",
+              "\n",
+              "[5 rows x 7 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>title</th>\n",
+              "      <th>text</th>\n",
+              "      <th>by</th>\n",
+              "      <th>score</th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>type</th>\n",
+              "      <th>sentiment</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
+              "      <td>archiewood</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-04-21 16:45:13+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Frustrated, but hopeful.</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>420</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Well last time I got angry down votes for sayi...</td>\n",
+              "      <td>drieddust</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-01-11 19:27:27+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Frustrated and angry.</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>815</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>New iPhone should be announced on September. L...</td>\n",
+              "      <td>meerita</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-07-30 20:54:42+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Excited anticipation.</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1516</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Why would this take a week? i(phone)OS was ori...</td>\n",
+              "      <td>TheOtherHobbes</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-06-08 09:25:24+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Frustrated, critical, obvious.</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1563</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt;or because Apple drama brings many clicks?...</td>\n",
+              "      <td>weberer</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-09-05 13:16:02+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "      <td>Negative, clickbait, Apple.</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 7 columns</p>\n",
+              "</div>[5 rows x 7 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 14
+        }
+      ],
+      "source": [
+        "iphone_comments.semantics.map(\"Summarize the sentiment of the {text}. Your answer should have at most 3 words\", output_column=\"sentiment\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "y7_16T2xiouZ"
+      },
+      "source": [
+        "Here is another example: count the number of rows whose authors have animals in their names."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 880
+        },
+        "id": "CbGwc_uXiouZ",
+        "outputId": "138acca0-7fb9-495a-e797-0d42495d65e6"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                title  \\\n",
+              "0                                                <NA>   \n",
+              "1                                                <NA>   \n",
+              "2                                                <NA>   \n",
+              "3                                                <NA>   \n",
+              "4                                                <NA>   \n",
+              "5                                                <NA>   \n",
+              "6   The Impending NY Tech Apocalypse: Here's What ...   \n",
+              "7                                                <NA>   \n",
+              "8   Eureca beta is live. A place for your business...   \n",
+              "9                                                <NA>   \n",
+              "10                                               <NA>   \n",
+              "11                                               <NA>   \n",
+              "12                                               <NA>   \n",
+              "13                                               <NA>   \n",
+              "14                                               <NA>   \n",
+              "15                        Discord vs. IRC Rough Notes   \n",
+              "16                                               <NA>   \n",
+              "17                                               <NA>   \n",
+              "18                                               <NA>   \n",
+              "19                                               <NA>   \n",
+              "20                                               <NA>   \n",
+              "21  Oh dear: new Yahoo anti-spoofing measures brea...   \n",
+              "22             How Much Warmer Was Your City in 2016?   \n",
+              "23                                               <NA>   \n",
+              "24                       Working Best at Coffee Shops   \n",
+              "\n",
+              "                                                 text              by  score  \\\n",
+              "0   Well, most people aren&#x27;t alcoholics, so I...       slipframe   <NA>   \n",
+              "1   No, you don&#x27;t really <i>need</i> a smartp...        vetinari   <NA>   \n",
+              "2   It&#x27;s for the late Paul Allen RIP. Should&...        lsr_ssri   <NA>   \n",
+              "3    Yup they are dangerous. Be careful Donald Trump.           Sven7   <NA>   \n",
+              "4   Sure, it&#x27;s totally reasonable. Just point...       nicoburns   <NA>   \n",
+              "5   I wonder how long before special forces start ...   autisticcurio   <NA>   \n",
+              "6                                                <NA>         gaoprea      3   \n",
+              "7   Where would you relocate to? I'm assuming that...    pavel_lishin   <NA>   \n",
+              "8                                                <NA>        ricardos      1   \n",
+              "9   It doesn’t work on Safari, and WebKit based br...      archiewood   <NA>   \n",
+              "10  I guess I don’t see the relevance. Vegans eat ...         stevula   <NA>   \n",
+              "11  I remember watching the American news media go...         fareesh   <NA>   \n",
+              "12  This article is incorrectly using the current ...       stale2002   <NA>   \n",
+              "13  In the firm I made my internship, we have to u...    iserlohnmage   <NA>   \n",
+              "14  The main reason it requires unsafe is for memo...           comex   <NA>   \n",
+              "15                                               <NA>    todsacerdoti     48   \n",
+              "16     you have to auth again when you use apple pay.        empath75   <NA>   \n",
+              "17  It goes consumer grade, automotive, military, ...           moftz   <NA>   \n",
+              "18  I don&#x27;t have a link handy but the differe...    KennyBlanken   <NA>   \n",
+              "19  &gt; I don&#x27;t think the use case you menti...     colanderman   <NA>   \n",
+              "20  I think you need to watch it again, because yo...   vladimirralev   <NA>   \n",
+              "21                                               <NA>       joshreads      1   \n",
+              "22                                               <NA>           smb06      1   \n",
+              "23  Except that they clearly never tried to incent...           aenis   <NA>   \n",
+              "24                                               <NA>  GiraffeNecktie    249   \n",
+              "\n",
+              "                    timestamp     type  \n",
+              "0   2021-06-26 02:37:56+00:00  comment  \n",
+              "1   2023-04-19 15:56:34+00:00  comment  \n",
+              "2   2018-10-16 01:07:55+00:00  comment  \n",
+              "3   2015-08-10 16:05:54+00:00  comment  \n",
+              "4   2020-10-05 11:20:51+00:00  comment  \n",
+              "5   2020-09-01 15:38:50+00:00  comment  \n",
+              "6   2011-09-27 22:43:27+00:00    story  \n",
+              "7   2011-09-16 19:02:01+00:00  comment  \n",
+              "8   2012-10-15 13:09:32+00:00    story  \n",
+              "9   2023-04-21 16:45:13+00:00  comment  \n",
+              "10  2023-01-19 20:05:54+00:00  comment  \n",
+              "11  2019-06-17 19:49:17+00:00  comment  \n",
+              "12  2018-03-18 18:57:21+00:00  comment  \n",
+              "13  2019-10-22 10:41:01+00:00  comment  \n",
+              "14  2017-05-05 20:45:37+00:00  comment  \n",
+              "15  2024-07-12 18:39:52+00:00    story  \n",
+              "16  2017-09-12 18:58:20+00:00  comment  \n",
+              "17  2021-04-13 01:24:03+00:00  comment  \n",
+              "18  2022-05-13 16:08:38+00:00  comment  \n",
+              "19  2017-09-28 05:16:06+00:00  comment  \n",
+              "20  2018-12-07 11:25:52+00:00  comment  \n",
+              "21  2014-04-08 13:29:50+00:00    story  \n",
+              "22  2017-02-16 23:26:34+00:00    story  \n",
+              "23  2022-01-31 17:08:57+00:00  comment  \n",
+              "24  2011-04-19 14:25:17+00:00    story  \n",
+              "...\n",
+              "\n",
+              "[3000 rows x 6 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>title</th>\n",
+              "      <th>text</th>\n",
+              "      <th>by</th>\n",
+              "      <th>score</th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>type</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Well, most people aren&amp;#x27;t alcoholics, so I...</td>\n",
+              "      <td>slipframe</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-06-26 02:37:56+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>No, you don&amp;#x27;t really &lt;i&gt;need&lt;/i&gt; a smartp...</td>\n",
+              "      <td>vetinari</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-04-19 15:56:34+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It&amp;#x27;s for the late Paul Allen RIP. Should&amp;...</td>\n",
+              "      <td>lsr_ssri</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2018-10-16 01:07:55+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Yup they are dangerous. Be careful Donald Trump.</td>\n",
+              "      <td>Sven7</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2015-08-10 16:05:54+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Sure, it&amp;#x27;s totally reasonable. Just point...</td>\n",
+              "      <td>nicoburns</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-10-05 11:20:51+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I wonder how long before special forces start ...</td>\n",
+              "      <td>autisticcurio</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2020-09-01 15:38:50+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>The Impending NY Tech Apocalypse: Here's What ...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>gaoprea</td>\n",
+              "      <td>3</td>\n",
+              "      <td>2011-09-27 22:43:27+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>7</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Where would you relocate to? I'm assuming that...</td>\n",
+              "      <td>pavel_lishin</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2011-09-16 19:02:01+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>Eureca beta is live. A place for your business...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>ricardos</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2012-10-15 13:09:32+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It doesn’t work on Safari, and WebKit based br...</td>\n",
+              "      <td>archiewood</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-04-21 16:45:13+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I guess I don’t see the relevance. Vegans eat ...</td>\n",
+              "      <td>stevula</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-01-19 20:05:54+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>11</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I remember watching the American news media go...</td>\n",
+              "      <td>fareesh</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-06-17 19:49:17+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>12</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>This article is incorrectly using the current ...</td>\n",
+              "      <td>stale2002</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2018-03-18 18:57:21+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>13</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>In the firm I made my internship, we have to u...</td>\n",
+              "      <td>iserlohnmage</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-10-22 10:41:01+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>14</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>The main reason it requires unsafe is for memo...</td>\n",
+              "      <td>comex</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-05-05 20:45:37+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>15</th>\n",
+              "      <td>Discord vs. IRC Rough Notes</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>todsacerdoti</td>\n",
+              "      <td>48</td>\n",
+              "      <td>2024-07-12 18:39:52+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>you have to auth again when you use apple pay.</td>\n",
+              "      <td>empath75</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-09-12 18:58:20+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It goes consumer grade, automotive, military, ...</td>\n",
+              "      <td>moftz</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-04-13 01:24:03+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>18</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I don&amp;#x27;t have a link handy but the differe...</td>\n",
+              "      <td>KennyBlanken</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-05-13 16:08:38+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>19</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; I don&amp;#x27;t think the use case you menti...</td>\n",
+              "      <td>colanderman</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-09-28 05:16:06+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>20</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I think you need to watch it again, because yo...</td>\n",
+              "      <td>vladimirralev</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2018-12-07 11:25:52+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>21</th>\n",
+              "      <td>Oh dear: new Yahoo anti-spoofing measures brea...</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>joshreads</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2014-04-08 13:29:50+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>22</th>\n",
+              "      <td>How Much Warmer Was Your City in 2016?</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>smb06</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2017-02-16 23:26:34+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>23</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Except that they clearly never tried to incent...</td>\n",
+              "      <td>aenis</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-01-31 17:08:57+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>24</th>\n",
+              "      <td>Working Best at Coffee Shops</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>GiraffeNecktie</td>\n",
+              "      <td>249</td>\n",
+              "      <td>2011-04-19 14:25:17+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>25 rows × 6 columns</p>\n",
+              "</div>[3000 rows x 6 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 7
+        }
+      ],
+      "source": [
+        "hacker_news = bpd.read_gbq(\"bigquery-public-data.hacker_news.full\")[['title', 'text', 'by', 'score', 'timestamp', 'type']].head(3000)\n",
+        "hacker_news"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 880
+        },
+        "id": "9dzU8SNziouZ",
+        "outputId": "da8815c1-c411-4afc-d1ca-5e44c75b5b48"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                               title  \\\n",
+              "24                      Working Best at Coffee Shops   \n",
+              "98                                              <NA>   \n",
+              "137  FDA reverses marketing ban on Juul e-cigarettes   \n",
+              "188                                             <NA>   \n",
+              "209                                             <NA>   \n",
+              "228                                             <NA>   \n",
+              "290                                             <NA>   \n",
+              "303                                             <NA>   \n",
+              "312                                             <NA>   \n",
+              "322                                             <NA>   \n",
+              "391                                             <NA>   \n",
+              "396                                             <NA>   \n",
+              "424                                             <NA>   \n",
+              "428                                             <NA>   \n",
+              "429                                             <NA>   \n",
+              "436                                             <NA>   \n",
+              "438                                             <NA>   \n",
+              "446                                             <NA>   \n",
+              "453                                             <NA>   \n",
+              "507                                             <NA>   \n",
+              "543                                             <NA>   \n",
+              "565                                             <NA>   \n",
+              "612                                             <NA>   \n",
+              "660                                             <NA>   \n",
+              "673                                             <NA>   \n",
+              "\n",
+              "                                                  text               by  \\\n",
+              "24                                                <NA>   GiraffeNecktie   \n",
+              "98   i resisted switching to chrome for months beca...         catshirt   \n",
+              "137                                               <NA>        anigbrowl   \n",
+              "188  I think it&#x27;s more than hazing. It may be ...    bayesianhorse   \n",
+              "209  I like the idea of moving that arrow the way h...          rattray   \n",
+              "228  I don&#x27;t understand why a beginner would s...            wolco   \n",
+              "290  I leaerned more with one minute of this than a...        agumonkey   \n",
+              "303  I've suggested a <i>rationale</i> for the tabo...  mechanical_fish   \n",
+              "312  Do you have any reference for this?<p>I&#x27;m...        banashark   \n",
+              "322  Default search scope is an option in the Finde...      kitsunesoba   \n",
+              "391     Orthogonality and biology aren&#x27;t friends.        agumonkey   \n",
+              "396  I chose some random physics book that was good...            prawn   \n",
+              "424  Seeing this get huge on Twitter. It&#x27;s the...      shenanigoat   \n",
+              "428  Looking through the comments there are a numbe...           moomin   \n",
+              "429  Legacy media is a tough business. GBTC is payi...       arcticbull   \n",
+              "436  Same thing if you sell unsafe food, yet we hav...      jabradoodle   \n",
+              "438  There was briefly a thing called HSCSD (&quot;...      LeoPanthera   \n",
+              "446  &gt; This article is a bit comical to read and...           lapcat   \n",
+              "453  Large positions are most likely sold off in sm...          meowkit   \n",
+              "507  A US-based VPN (or really any VPN) is only goi...      RandomBacon   \n",
+              "543  <a href=\"https:&#x2F;&#x2F;codeberg.org&#x2F;A...   ElectronBadger   \n",
+              "565  It’s much harder for people without hands to w...           Aeolun   \n",
+              "612  So by using ADMIN_SL0T instead was it just set...         minitoar   \n",
+              "660                                       Outstanding!           cafard   \n",
+              "673  On the other hand, something can be said for &...            babby   \n",
+              "\n",
+              "     score                  timestamp     type  \n",
+              "24     249  2011-04-19 14:25:17+00:00    story  \n",
+              "98    <NA>  2011-04-06 08:02:24+00:00  comment  \n",
+              "137      2  2024-06-06 16:42:40+00:00    story  \n",
+              "188   <NA>  2015-06-18 16:42:53+00:00  comment  \n",
+              "209   <NA>  2015-06-08 02:15:30+00:00  comment  \n",
+              "228   <NA>  2019-02-03 14:35:43+00:00  comment  \n",
+              "290   <NA>  2016-07-16 06:19:39+00:00  comment  \n",
+              "303   <NA>  2008-12-17 04:42:02+00:00  comment  \n",
+              "312   <NA>  2023-11-13 19:57:00+00:00  comment  \n",
+              "322   <NA>  2017-08-13 17:15:19+00:00  comment  \n",
+              "391   <NA>  2016-04-24 16:33:41+00:00  comment  \n",
+              "396   <NA>  2011-03-27 22:29:51+00:00  comment  \n",
+              "424   <NA>  2016-01-09 03:04:22+00:00  comment  \n",
+              "428   <NA>  2024-10-01 14:37:04+00:00  comment  \n",
+              "429   <NA>  2021-04-16 16:30:33+00:00  comment  \n",
+              "436   <NA>  2023-08-03 20:47:52+00:00  comment  \n",
+              "438   <NA>  2019-02-11 19:49:29+00:00  comment  \n",
+              "446   <NA>  2023-01-02 16:00:49+00:00  comment  \n",
+              "453   <NA>  2021-01-27 23:22:48+00:00  comment  \n",
+              "507   <NA>  2019-04-05 00:58:58+00:00  comment  \n",
+              "543   <NA>  2023-12-13 08:13:15+00:00  comment  \n",
+              "565   <NA>  2024-05-03 11:58:13+00:00  comment  \n",
+              "612   <NA>  2021-03-05 16:07:56+00:00  comment  \n",
+              "660   <NA>  2022-06-09 09:51:54+00:00  comment  \n",
+              "673   <NA>  2013-08-12 00:31:02+00:00  comment  \n",
+              "...\n",
+              "\n",
+              "[123 rows x 6 columns]"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>title</th>\n",
+              "      <th>text</th>\n",
+              "      <th>by</th>\n",
+              "      <th>score</th>\n",
+              "      <th>timestamp</th>\n",
+              "      <th>type</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>24</th>\n",
+              "      <td>Working Best at Coffee Shops</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>GiraffeNecktie</td>\n",
+              "      <td>249</td>\n",
+              "      <td>2011-04-19 14:25:17+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>98</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>i resisted switching to chrome for months beca...</td>\n",
+              "      <td>catshirt</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2011-04-06 08:02:24+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>137</th>\n",
+              "      <td>FDA reverses marketing ban on Juul e-cigarettes</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>anigbrowl</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2024-06-06 16:42:40+00:00</td>\n",
+              "      <td>story</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>188</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I think it&amp;#x27;s more than hazing. It may be ...</td>\n",
+              "      <td>bayesianhorse</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2015-06-18 16:42:53+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>209</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I like the idea of moving that arrow the way h...</td>\n",
+              "      <td>rattray</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2015-06-08 02:15:30+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>228</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I don&amp;#x27;t understand why a beginner would s...</td>\n",
+              "      <td>wolco</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-02-03 14:35:43+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>290</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I leaerned more with one minute of this than a...</td>\n",
+              "      <td>agumonkey</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2016-07-16 06:19:39+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>303</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I've suggested a &lt;i&gt;rationale&lt;/i&gt; for the tabo...</td>\n",
+              "      <td>mechanical_fish</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2008-12-17 04:42:02+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>312</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Do you have any reference for this?&lt;p&gt;I&amp;#x27;m...</td>\n",
+              "      <td>banashark</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-11-13 19:57:00+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>322</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Default search scope is an option in the Finde...</td>\n",
+              "      <td>kitsunesoba</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2017-08-13 17:15:19+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>391</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Orthogonality and biology aren&amp;#x27;t friends.</td>\n",
+              "      <td>agumonkey</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2016-04-24 16:33:41+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>396</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>I chose some random physics book that was good...</td>\n",
+              "      <td>prawn</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2011-03-27 22:29:51+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>424</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Seeing this get huge on Twitter. It&amp;#x27;s the...</td>\n",
+              "      <td>shenanigoat</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2016-01-09 03:04:22+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>428</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Looking through the comments there are a numbe...</td>\n",
+              "      <td>moomin</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-10-01 14:37:04+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>429</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Legacy media is a tough business. GBTC is payi...</td>\n",
+              "      <td>arcticbull</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-04-16 16:30:33+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>436</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Same thing if you sell unsafe food, yet we hav...</td>\n",
+              "      <td>jabradoodle</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-08-03 20:47:52+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>438</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>There was briefly a thing called HSCSD (&amp;quot;...</td>\n",
+              "      <td>LeoPanthera</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-02-11 19:49:29+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>446</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&amp;gt; This article is a bit comical to read and...</td>\n",
+              "      <td>lapcat</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-01-02 16:00:49+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>453</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Large positions are most likely sold off in sm...</td>\n",
+              "      <td>meowkit</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-01-27 23:22:48+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>507</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>A US-based VPN (or really any VPN) is only goi...</td>\n",
+              "      <td>RandomBacon</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2019-04-05 00:58:58+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>543</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>&lt;a href=\"https:&amp;#x2F;&amp;#x2F;codeberg.org&amp;#x2F;A...</td>\n",
+              "      <td>ElectronBadger</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2023-12-13 08:13:15+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>565</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>It’s much harder for people without hands to w...</td>\n",
+              "      <td>Aeolun</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2024-05-03 11:58:13+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>612</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>So by using ADMIN_SL0T instead was it just set...</td>\n",
+              "      <td>minitoar</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2021-03-05 16:07:56+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>660</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>Outstanding!</td>\n",
+              "      <td>cafard</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2022-06-09 09:51:54+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>673</th>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>On the other hand, something can be said for &amp;...</td>\n",
+              "      <td>babby</td>\n",
+              "      <td>&lt;NA&gt;</td>\n",
+              "      <td>2013-08-12 00:31:02+00:00</td>\n",
+              "      <td>comment</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>25 rows × 6 columns</p>\n",
+              "</div>[123 rows x 6 columns in total]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
+      ],
+      "source": [
+        "hacker_news.semantics.filter(\"{by} contains animal name\", model=gemini_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3bpkaspoiouZ"
+      },
+      "source": [
+        "Here are the runtime numbers with 500 requests per minute [raised quota](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas):\n",
+        "* 3000 rows -> ~6m\n",
+        "* 10000 rows -> ~26m"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "venv",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.11.9"
+    },
+    "colab": {
+      "provenance": [],
+      "include_colab_link": true
     }
-   ],
-   "source": [
-    "hacker_news.semantics.filter(\"{by} contains animal name\", model=gemini_model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Here are our performance numbers:\n",
-    "* 3000 rows -> 6m 9.2s\n",
-    "* 10000 rows -> 26m 42.4s"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "venv",
-   "language": "python",
-   "name": "python3"
   },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "nbformat": 4,
+  "nbformat_minor": 0
 }

From bc5f9461880113c120309c9076df4348c5abd335 Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Mon, 6 Jan 2025 10:30:37 -0800
Subject: [PATCH 9/9] chore(main): release 1.31.0 (#1255)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 CHANGELOG.md                              | 20 ++++++++++++++++++++
 bigframes/version.py                      |  2 +-
 third_party/bigframes_vendored/version.py |  2 +-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0ce5aed9dd..ff5ce11006 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,26 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [1.31.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.30.0...v1.31.0) (2025-01-05)
+
+
+### Features
+
+* Implement confirmation threshold for semantic operators ([#1251](https://github.com/googleapis/python-bigquery-dataframes/issues/1251)) ([5ba4511](https://github.com/googleapis/python-bigquery-dataframes/commit/5ba4511ad85cf02f0e5ad4e33ea3826b19527293))
+
+
+### Bug Fixes
+
+* Raise if trying to change `ordering_mode` after session has started ([#1252](https://github.com/googleapis/python-bigquery-dataframes/issues/1252)) ([8cfaae8](https://github.com/googleapis/python-bigquery-dataframes/commit/8cfaae8718f3c4c6739b7155a02ef13dbed73425))
+* Reduce the number of labels added to query jobs ([#1245](https://github.com/googleapis/python-bigquery-dataframes/issues/1245)) ([fdcdc18](https://github.com/googleapis/python-bigquery-dataframes/commit/fdcdc189e5fcae9de68bf8fb3872136f55be36cb))
+
+
+### Documentation
+
+* Remove bq studio link ([#1258](https://github.com/googleapis/python-bigquery-dataframes/issues/1258)) ([dd4fd2e](https://github.com/googleapis/python-bigquery-dataframes/commit/dd4fd2e8bafa73b4b5d99f095943bd9a757cd5b5))
+* Update bigframes.pandas.DatetimeMethods docstrings ([#1246](https://github.com/googleapis/python-bigquery-dataframes/issues/1246)) ([10f08da](https://github.com/googleapis/python-bigquery-dataframes/commit/10f08daec6034aafe48096be56683c953accc79a))
+* Update semantic_operators.ipynb ([#1260](https://github.com/googleapis/python-bigquery-dataframes/issues/1260)) ([a2ed989](https://github.com/googleapis/python-bigquery-dataframes/commit/a2ed989fac789b0debacc0ec8a044b473cc6112c))
+
 ## [1.30.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.29.0...v1.30.0) (2024-12-30)
 
 
diff --git a/bigframes/version.py b/bigframes/version.py
index 74c8363d7d..7b6d1f2153 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.30.0"
+__version__ = "1.31.0"
diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py
index 74c8363d7d..7b6d1f2153 100644
--- a/third_party/bigframes_vendored/version.py
+++ b/third_party/bigframes_vendored/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.30.0"
+__version__ = "1.31.0"