From e8532b1d999d26ea1ebdd30efb8f2c0a93a6a28d Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Thu, 16 Nov 2023 10:08:27 -0800
Subject: [PATCH 01/26] fix: polish the llm+kmeans notebook (#208)

---
 .../bq_dataframes_llm_code_generation.ipynb   |    2 +-
 .../bq_dataframes_llm_kmeans.ipynb            | 1181 +++++++++++++++--
 2 files changed, 1057 insertions(+), 126 deletions(-)
diff --git a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
index 0f113b84c6..0a41447a53 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
@@ -34,7 +34,7 @@
         "<table align=\"left\">\n",
         "\n",
         "  <td>\n",
-        "    <a href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/tree/main/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb\">\n",
+        "    <a href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb\">\n",
         "      <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"> Run in Colab\n",
         "    </a>\n",
         "  </td>\n",
diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
index 46c4955288..ae03813639 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
@@ -31,7 +31,7 @@
         "<table align=\"left\">\n",
         "\n",
         "  <td>\n",
-        "    <a href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/tree/main/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb\">\n",
+        "    <a href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb\">\n",
         "      <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"> Run in Colab\n",
         "    </a>\n",
         "  </td>\n",
@@ -118,14 +118,10 @@
         "\n",
         "2. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).\n",
         "\n",
-        "3. [Click here](https://console.cloud.google.com/flows/enableapi?apiid=bigquery.googleapis.com,bigqueryconnection.googleapis.com,run.googleapis.com,artifactregistry.googleapis.com,cloudbuild.googleapis.com,cloudresourcemanager.googleapis.com) to enable the following APIs:\n",
+        "3. [Click here](https://console.cloud.google.com/flows/enableapi?apiid=bigquery.googleapis.com,bigqueryconnection.googleapis.com,aiplatform.googleapis.com) to enable the following APIs:\n",
         "\n",
         "  * BigQuery API\n",
         "  * BigQuery Connection API\n",
-        "  * Cloud Run API\n",
-        "  * Artifact Registry API\n",
-        "  * Cloud Build API\n",
-        "  * Cloud Resource Manager API\n",
         "  * Vertex AI API\n",
         "\n",
         "4. If you are running this notebook locally, install the [Cloud SDK](https://cloud.google.com/sdk)."
@@ -143,9 +139,17 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 1,
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Updated property [core/project].\n"
+          ]
+        }
+      ],
       "source": [
         "# set your project ID below\n",
         "PROJECT_ID = \"\"  # @param {type:\"string\"}\n",
@@ -166,7 +170,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 2,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -232,87 +236,6 @@
         "# auth.authenticate_user()"
       ]
     },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "If you want to reset the location of the created DataFrame or Series objects, reset the session by executing `bf.close_session()`. After that, you can reuse `bf.options.bigquery.location` to specify another location."
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Connect to Vertex AI\n",
-        "\n",
-        "In order to use PaLM2TextGenerator, we will need to set up a [cloud resource connection](https://cloud.google.com/bigquery/docs/create-cloud-resource-connection)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from google.cloud import bigquery_connection_v1 as bq_connection\n",
-        "\n",
-        "CONN_NAME = \"bqdf-llm\"\n",
-        "\n",
-        "client = bq_connection.ConnectionServiceClient()\n",
-        "new_conn_parent = f\"projects/{PROJECT_ID}/locations/{REGION}\"\n",
-        "exists_conn_parent = f\"projects/{PROJECT_ID}/locations/{REGION}/connections/{CONN_NAME}\"\n",
-        "cloud_resource_properties = bq_connection.CloudResourceProperties({})\n",
-        "\n",
-        "try:\n",
-        "    request = client.get_connection(\n",
-        "        request=bq_connection.GetConnectionRequest(name=exists_conn_parent)\n",
-        "    )\n",
-        "    CONN_SERVICE_ACCOUNT = f\"serviceAccount:{request.cloud_resource.service_account_id}\"\n",
-        "except Exception:\n",
-        "    connection = bq_connection.types.Connection(\n",
-        "        {\"friendly_name\": CONN_NAME, \"cloud_resource\": cloud_resource_properties}\n",
-        "    )\n",
-        "    request = bq_connection.CreateConnectionRequest(\n",
-        "        {\n",
-        "            \"parent\": new_conn_parent,\n",
-        "            \"connection_id\": CONN_NAME,\n",
-        "            \"connection\": connection,\n",
-        "        }\n",
-        "    )\n",
-        "    response = client.create_connection(request)\n",
-        "    CONN_SERVICE_ACCOUNT = (\n",
-        "        f\"serviceAccount:{response.cloud_resource.service_account_id}\"\n",
-        "    )\n",
-        "print(CONN_SERVICE_ACCOUNT)"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Set permissions for the service account\n",
-        "\n",
-        "The resource connection service account requires certain project-level permissions:\n",
-        " - `roles/aiplatform.user` and `roles/bigquery.connectionUser`: These roles are required for the connection to create a model definition using the LLM model in Vertex AI ([documentation](https://cloud.google.com/bigquery/docs/generate-text#give_the_service_account_access)).\n",
-        " - `roles/run.invoker`: This role is required for the connection to have read-only access to Cloud Run services that back custom/remote functions ([documentation](https://cloud.google.com/bigquery/docs/remote-functions#grant_permission_on_function)).\n",
-        "\n",
-        "Set these permissions by running the following `gcloud` commands:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "!gcloud projects add-iam-policy-binding {PROJECT_ID} --condition=None --no-user-output-enabled --member={CONN_SERVICE_ACCOUNT} --role='roles/bigquery.connectionUser'\n",
-        "!gcloud projects add-iam-policy-binding {PROJECT_ID} --condition=None --no-user-output-enabled --member={CONN_SERVICE_ACCOUNT} --role='roles/aiplatform.user'\n",
-        "!gcloud projects add-iam-policy-binding {PROJECT_ID} --condition=None --no-user-output-enabled --member={CONN_SERVICE_ACCOUNT} --role='roles/run.invoker'"
-      ]
-    },
     {
       "attachments": {},
       "cell_type": "markdown",
@@ -336,12 +259,12 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Project Setup"
+        "BigQuery DataFrames setup"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 3,
       "metadata": {
         "id": "R7STCS8xB5d2"
       },
@@ -353,6 +276,14 @@
         "bf.options.bigquery.location = REGION"
       ]
     },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "If you want to reset the location of the created DataFrame or Series objects, reset the session by executing `bf.close_session()`. After that, you can reuse `bf.options.bigquery.location` to specify another location."
+      ]
+    },
     {
       "attachments": {},
       "cell_type": "markdown",
@@ -365,7 +296,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 4,
       "metadata": {
         "id": "zDSwoBo1CU3G"
       },
@@ -376,11 +307,101 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 5,
       "metadata": {
         "id": "tYDoaKgJChiq"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Query job 9f096761-e3b5-4d58-a9f7-485ced67afca is DONE. 2.3 GB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9f096761-e3b5-4d58-a9f7-485ced67afca&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job ee8fecb1-2e30-407d-9e2e-9e76061da9e7 is DONE. 2.3 GB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ee8fecb1-2e30-407d-9e2e-9e76061da9e7&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>consumer_complaint_narrative</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>I signed a contract as a condition of employme...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>First, I want to disclose that XXXX and XXXX b...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Frequent calls from Focused Receivables Manage...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>I recently contacted Enhanced Recovery Company...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>This began when I subscribed to XXXX XXXX inte...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1 columns</p>\n",
+              "</div>[5 rows x 1 columns in total]"
+            ],
+            "text/plain": [
+              "                        consumer_complaint_narrative\n",
+              "0  I signed a contract as a condition of employme...\n",
+              "1  First, I want to disclose that XXXX and XXXX b...\n",
+              "2  Frequent calls from Focused Receivables Manage...\n",
+              "3  I recently contacted Enhanced Recovery Company...\n",
+              "4  This began when I subscribed to XXXX XXXX inte...\n",
+              "\n",
+              "[5 rows x 1 columns]"
+            ]
+          },
+          "execution_count": 5,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "issues_df = input_df[[\"consumer_complaint_narrative\"]].dropna()\n",
         "issues_df.head(n=5) # View the first five complaints"
@@ -391,12 +412,12 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Download 10000 complaints to use with PaLM2TextEmbeddingGenerator"
+        "Downsample DataFrame to 10,000 records for model training."
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 6,
       "metadata": {
         "id": "OltYSUEcsSOW"
       },
@@ -418,11 +439,24 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 7,
       "metadata": {
         "id": "li38q8FzDDMu"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Query job 52d2e961-7896-497c-8b03-ab7374737679 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:52d2e961-7896-497c-8b03-ab7374737679&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
       "source": [
         "from bigframes.ml.llm import PaLM2TextEmbeddingGenerator\n",
         "\n",
@@ -431,11 +465,125 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 29,
       "metadata": {
         "id": "cOuSOQ5FDewD"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Query job d093d51a-8eda-442f-80cd-568cb76e00b3 is DONE. 10.6 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d093d51a-8eda-442f-80cd-568cb76e00b3&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job 6419df65-3e96-41a7-a7b5-3d058e18763a is DONE. 80.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6419df65-3e96-41a7-a7b5-3d058e18763a&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job 917f09ea-c468-4363-a856-b1091e5f775f is DONE. 80.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:917f09ea-c468-4363-a856-b1091e5f775f&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job 5c9679e7-192c-40b5-a14b-edc0fa113eaa is DONE. 61.5 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5c9679e7-192c-40b5-a14b-edc0fa113eaa&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>text_embedding</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>422</th>\n",
+              "      <td>[-0.012013785541057587, 0.003669967409223318, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>616</th>\n",
+              "      <td>[-0.014948881231248379, -0.04672442376613617, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>833</th>\n",
+              "      <td>[-0.01951478235423565, -0.027120858430862427, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1370</th>\n",
+              "      <td>[-0.03140445053577423, -0.048797041177749634, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1430</th>\n",
+              "      <td>[-0.02244548313319683, -0.03336532413959503, 0...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1 columns</p>\n",
+              "</div>[5 rows x 1 columns in total]"
+            ],
+            "text/plain": [
+              "                                         text_embedding\n",
+              "422   [-0.012013785541057587, 0.003669967409223318, ...\n",
+              "616   [-0.014948881231248379, -0.04672442376613617, ...\n",
+              "833   [-0.01951478235423565, -0.027120858430862427, ...\n",
+              "1370  [-0.03140445053577423, -0.048797041177749634, ...\n",
+              "1430  [-0.02244548313319683, -0.03336532413959503, 0...\n",
+              "\n",
+              "[5 rows x 1 columns]"
+            ]
+          },
+          "execution_count": 29,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "# Will take ~3 minutes to compute the embeddings\n",
         "predicted_embeddings = model.predict(downsampled_issues_df)\n",
@@ -445,14 +593,263 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 30,
       "metadata": {
         "id": "4H_etYfsEOFP"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Query job ce9cb0f9-4b0d-40a1-81f3-d6e60dd6c684 is DONE. 160.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ce9cb0f9-4b0d-40a1-81f3-d6e60dd6c684&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job aa692a30-5706-46ad-8029-faf2fac66234 is DONE. 72.2 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:aa692a30-5706-46ad-8029-faf2fac66234&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>consumer_complaint_narrative</th>\n",
+              "      <th>text_embedding</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>2580664</th>\n",
+              "      <td>Hello, my name is XXXX XXXX, and I am writing ...</td>\n",
+              "      <td>[0.0003211698785889894, -0.01816680282354355, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1806973</th>\n",
+              "      <td>This is XXXX XXXX and I am submitting this com...</td>\n",
+              "      <td>[-0.009485247544944286, -0.025846892967820168,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2055053</th>\n",
+              "      <td>XXXX XXXX XXXX, XXXX. ( address : XXXX XXXX XX...</td>\n",
+              "      <td>[-0.010950954630970955, -0.0249345600605011, 0...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2515231</th>\n",
+              "      <td>When I reinvestigated my credit report, I real...</td>\n",
+              "      <td>[-0.009660656563937664, -0.05793113633990288, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2633049</th>\n",
+              "      <td>Checking my credit report XX/XX/2018 with all ...</td>\n",
+              "      <td>[-0.0022159104701131582, -0.03330004960298538,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3117273</th>\n",
+              "      <td>I contacted TransUnion and spoke a credit rep ...</td>\n",
+              "      <td>[-0.015955328941345215, -0.006488671060651541,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>698814</th>\n",
+              "      <td>XXXX XXXX XXXX. makes daily calls to me cell c...</td>\n",
+              "      <td>[0.005397460889071226, -0.01276913657784462, 0...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>267826</th>\n",
+              "      <td>Can we please reopen Case : XXXX? \n",
+              "\n",
+              "Wells Farg...</td>\n",
+              "      <td>[0.004065403249114752, -0.0005381882656365633,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>54019</th>\n",
+              "      <td>My rights under 15 USC 1681 have been violated...</td>\n",
+              "      <td>[0.013823015615344048, -0.02010691538453102, 0...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>141050</th>\n",
+              "      <td>To whom it may concern : My personal informati...</td>\n",
+              "      <td>[0.008104532025754452, -0.01856449618935585, 0...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2962076</th>\n",
+              "      <td>I have had a CashApp account since last year, ...</td>\n",
+              "      <td>[-0.0003019514260813594, -0.03750108182430267,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2481105</th>\n",
+              "      <td>that some of the information was erroneous. Th...</td>\n",
+              "      <td>[-0.014868081547319889, -0.0443895161151886, -...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>431562</th>\n",
+              "      <td>I have disputed the referenced accounts to the...</td>\n",
+              "      <td>[-0.0020524838473647833, -0.04830990731716156,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1953029</th>\n",
+              "      <td>On, XX/XX/22, I attempted to complete a transa...</td>\n",
+              "      <td>[-0.01599179394543171, -0.0074900356121361256,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2395979</th>\n",
+              "      <td>Subject : XXXX XXXX XXXX compensation, refund,...</td>\n",
+              "      <td>[-0.0035950862802565098, -0.014652969315648079...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>455524</th>\n",
+              "      <td>I paid off my mortgage on XX/XX/2019. The comp...</td>\n",
+              "      <td>[-0.01100730150938034, -0.03495829552412033, 0...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2155924</th>\n",
+              "      <td>This kind of account is placed as a charged of...</td>\n",
+              "      <td>[-0.028635455295443535, -0.028604287654161453,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1069497</th>\n",
+              "      <td>This is one of many issues I have had with Wel...</td>\n",
+              "      <td>[0.008871790021657944, -0.028502725064754486, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3181689</th>\n",
+              "      <td>I have disputed this account with MONTEREY FIN...</td>\n",
+              "      <td>[-0.004721717908978462, -0.03673810139298439, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>274268</th>\n",
+              "      <td>Lender is not updating my loan status in the V...</td>\n",
+              "      <td>[-0.009221495129168034, -0.0289347805082798, 0...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1671305</th>\n",
+              "      <td>XXXX is a peer to peer lending conmpany that u...</td>\n",
+              "      <td>[-0.02911308966577053, -0.01850792020559311, -...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>886026</th>\n",
+              "      <td>( DISPUTE CODE - XXXX ) My personal informatio...</td>\n",
+              "      <td>[-0.007220877334475517, -0.016615957021713257,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1044431</th>\n",
+              "      <td>I filed a complaint against PNC this year and ...</td>\n",
+              "      <td>[0.002848619595170021, -0.035117778927087784, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1938481</th>\n",
+              "      <td>I applied for a modification and was approved....</td>\n",
+              "      <td>[-0.03114932030439377, -0.0421406552195549, 0....</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1987834</th>\n",
+              "      <td>Ive been Disputting my XXXX XXXX I opened this...</td>\n",
+              "      <td>[-0.009406660683453083, -0.020967338234186172,...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>25 rows × 2 columns</p>\n",
+              "</div>[10000 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "                              consumer_complaint_narrative  \\\n",
+              "2580664  Hello, my name is XXXX XXXX, and I am writing ...   \n",
+              "1806973  This is XXXX XXXX and I am submitting this com...   \n",
+              "2055053  XXXX XXXX XXXX, XXXX. ( address : XXXX XXXX XX...   \n",
+              "2515231  When I reinvestigated my credit report, I real...   \n",
+              "2633049  Checking my credit report XX/XX/2018 with all ...   \n",
+              "3117273  I contacted TransUnion and spoke a credit rep ...   \n",
+              "698814   XXXX XXXX XXXX. makes daily calls to me cell c...   \n",
+              "267826   Can we please reopen Case : XXXX? \n",
+              "\n",
+              "Wells Farg...   \n",
+              "54019    My rights under 15 USC 1681 have been violated...   \n",
+              "141050   To whom it may concern : My personal informati...   \n",
+              "2962076  I have had a CashApp account since last year, ...   \n",
+              "2481105  that some of the information was erroneous. Th...   \n",
+              "431562   I have disputed the referenced accounts to the...   \n",
+              "1953029  On, XX/XX/22, I attempted to complete a transa...   \n",
+              "2395979  Subject : XXXX XXXX XXXX compensation, refund,...   \n",
+              "455524   I paid off my mortgage on XX/XX/2019. The comp...   \n",
+              "2155924  This kind of account is placed as a charged of...   \n",
+              "1069497  This is one of many issues I have had with Wel...   \n",
+              "3181689  I have disputed this account with MONTEREY FIN...   \n",
+              "274268   Lender is not updating my loan status in the V...   \n",
+              "1671305  XXXX is a peer to peer lending conmpany that u...   \n",
+              "886026   ( DISPUTE CODE - XXXX ) My personal informatio...   \n",
+              "1044431  I filed a complaint against PNC this year and ...   \n",
+              "1938481  I applied for a modification and was approved....   \n",
+              "1987834  Ive been Disputting my XXXX XXXX I opened this...   \n",
+              "\n",
+              "                                            text_embedding  \n",
+              "2580664  [0.0003211698785889894, -0.01816680282354355, ...  \n",
+              "1806973  [-0.009485247544944286, -0.025846892967820168,...  \n",
+              "2055053  [-0.010950954630970955, -0.0249345600605011, 0...  \n",
+              "2515231  [-0.009660656563937664, -0.05793113633990288, ...  \n",
+              "2633049  [-0.0022159104701131582, -0.03330004960298538,...  \n",
+              "3117273  [-0.015955328941345215, -0.006488671060651541,...  \n",
+              "698814   [0.005397460889071226, -0.01276913657784462, 0...  \n",
+              "267826   [0.004065403249114752, -0.0005381882656365633,...  \n",
+              "54019    [0.013823015615344048, -0.02010691538453102, 0...  \n",
+              "141050   [0.008104532025754452, -0.01856449618935585, 0...  \n",
+              "2962076  [-0.0003019514260813594, -0.03750108182430267,...  \n",
+              "2481105  [-0.014868081547319889, -0.0443895161151886, -...  \n",
+              "431562   [-0.0020524838473647833, -0.04830990731716156,...  \n",
+              "1953029  [-0.01599179394543171, -0.0074900356121361256,...  \n",
+              "2395979  [-0.0035950862802565098, -0.014652969315648079...  \n",
+              "455524   [-0.01100730150938034, -0.03495829552412033, 0...  \n",
+              "2155924  [-0.028635455295443535, -0.028604287654161453,...  \n",
+              "1069497  [0.008871790021657944, -0.028502725064754486, ...  \n",
+              "3181689  [-0.004721717908978462, -0.03673810139298439, ...  \n",
+              "274268   [-0.009221495129168034, -0.0289347805082798, 0...  \n",
+              "1671305  [-0.02911308966577053, -0.01850792020559311, -...  \n",
+              "886026   [-0.007220877334475517, -0.016615957021713257,...  \n",
+              "1044431  [0.002848619595170021, -0.035117778927087784, ...  \n",
+              "1938481  [-0.03114932030439377, -0.0421406552195549, 0....  \n",
+              "1987834  [-0.009406660683453083, -0.020967338234186172,...  \n",
+              "...\n",
+              "\n",
+              "[10000 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 30,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "# Join the complaints with their embeddings in the same DataFrame\n",
-        "combined_df = downsampled_issues_df.join(predicted_embeddings)"
+        "combined_df = downsampled_issues_df.join(predicted_embeddings, how=\"left\")\n",
+        "combined_df"
       ]
     },
     {
@@ -470,12 +867,12 @@
         "id": "OUZ3NNbzo1Tb"
       },
       "source": [
-        "## Step 2: KMeans clustering"
+        "## Step 2: Create k-means model and predict clusters"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 31,
       "metadata": {
         "id": "AhNTnEC5FRz2"
       },
@@ -496,14 +893,152 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 32,
       "metadata": {
         "id": "6poSxh-fGJF7"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Query job 65eb317d-59f1-4d10-acd1-4b7f3778114c is DONE. 61.7 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:65eb317d-59f1-4d10-acd1-4b7f3778114c&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job 156e445e-cc01-4b30-84cc-ac1c98a69b81 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:156e445e-cc01-4b30-84cc-ac1c98a69b81&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job 5befc212-f4a3-4e33-b1b2-01e809acdcbd is DONE. 61.9 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5befc212-f4a3-4e33-b1b2-01e809acdcbd&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job bd271178-8b8d-45dc-ac57-7f0194d0daac is DONE. 80.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bd271178-8b8d-45dc-ac57-7f0194d0daac&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job bbfb9cca-622d-4bf5-9fc0-6d9a85287d41 is DONE. 80.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bbfb9cca-622d-4bf5-9fc0-6d9a85287d41&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job a5f30b32-9fb0-42b4-b426-d8484f008bdb is DONE. 160.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:a5f30b32-9fb0-42b4-b426-d8484f008bdb&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>CENTROID_ID</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>422</th>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>616</th>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>833</th>\n",
+              "      <td>5</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1370</th>\n",
+              "      <td>7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1430</th>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 1 columns</p>\n",
+              "</div>[5 rows x 1 columns in total]"
+            ],
+            "text/plain": [
+              "      CENTROID_ID\n",
+              "422             2\n",
+              "616             3\n",
+              "833             5\n",
+              "1370            7\n",
+              "1430            3\n",
+              "\n",
+              "[5 rows x 1 columns]"
+            ]
+          },
+          "execution_count": 32,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "# Use KMeans clustering to calculate our groups. Will take ~3 minutes.\n",
-        "cluster_model.fit(combined_df[[\"text_embedding\"]])\n",
+        "cluster_model.fit(combined_df[\"text_embedding\"])\n",
         "clustered_result = cluster_model.predict(combined_df[[\"text_embedding\"]])\n",
         "# Notice the CENTROID_ID column, which is the ID number of the group that\n",
         "# each complaint belongs to.\n",
@@ -512,12 +1047,123 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 33,
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Query job 7a41196e-ea67-44ac-95a7-7dce620d6d21 is DONE. 320.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7a41196e-ea67-44ac-95a7-7dce620d6d21&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job 8008b482-1a0d-461f-a215-4676d9d918dc is DONE. 72.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8008b482-1a0d-461f-a215-4676d9d918dc&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>consumer_complaint_narrative</th>\n",
+              "      <th>text_embedding</th>\n",
+              "      <th>CENTROID_ID</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>2580664</th>\n",
+              "      <td>Hello, my name is XXXX XXXX, and I am writing ...</td>\n",
+              "      <td>[0.0003211698785889894, -0.01816680282354355, ...</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1806973</th>\n",
+              "      <td>This is XXXX XXXX and I am submitting this com...</td>\n",
+              "      <td>[-0.009485247544944286, -0.025846892967820168,...</td>\n",
+              "      <td>5</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2055053</th>\n",
+              "      <td>XXXX XXXX XXXX, XXXX. ( address : XXXX XXXX XX...</td>\n",
+              "      <td>[-0.010950954630970955, -0.0249345600605011, 0...</td>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2515231</th>\n",
+              "      <td>When I reinvestigated my credit report, I real...</td>\n",
+              "      <td>[-0.009660656563937664, -0.05793113633990288, ...</td>\n",
+              "      <td>5</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2633049</th>\n",
+              "      <td>Checking my credit report XX/XX/2018 with all ...</td>\n",
+              "      <td>[-0.0022159104701131582, -0.03330004960298538,...</td>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>5 rows × 3 columns</p>\n",
+              "</div>[5 rows x 3 columns in total]"
+            ],
+            "text/plain": [
+              "                              consumer_complaint_narrative  \\\n",
+              "2580664  Hello, my name is XXXX XXXX, and I am writing ...   \n",
+              "1806973  This is XXXX XXXX and I am submitting this com...   \n",
+              "2055053  XXXX XXXX XXXX, XXXX. ( address : XXXX XXXX XX...   \n",
+              "2515231  When I reinvestigated my credit report, I real...   \n",
+              "2633049  Checking my credit report XX/XX/2018 with all ...   \n",
+              "\n",
+              "                                            text_embedding  CENTROID_ID  \n",
+              "2580664  [0.0003211698785889894, -0.01816680282354355, ...            2  \n",
+              "1806973  [-0.009485247544944286, -0.025846892967820168,...            5  \n",
+              "2055053  [-0.010950954630970955, -0.0249345600605011, 0...            3  \n",
+              "2515231  [-0.009660656563937664, -0.05793113633990288, ...            5  \n",
+              "2633049  [-0.0022159104701131582, -0.03330004960298538,...            3  \n",
+              "\n",
+              "[5 rows x 3 columns]"
+            ]
+          },
+          "execution_count": 33,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "# Join the group number to the complaints and their text embeddings\n",
-        "combined_clustered_result = combined_df.join(clustered_result)"
+        "combined_clustered_result = combined_df.join(clustered_result)\n",
+        "\n",
+        "combined_clustered_result.head(n=5)"
       ]
     },
     {
@@ -535,7 +1181,7 @@
         "id": "21rNsFMHo8hO"
       },
       "source": [
-        "## Step 3: Summarize the complaints"
+        "## Step 3: Use PaLM2 LLM model to summarize complaint clusters"
       ]
     },
     {
@@ -548,11 +1194,36 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 34,
       "metadata": {
         "id": "2E7wXM_jGqo6"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Query job 50c7c0dd-94a2-494e-a37f-6a838a518f6c is DONE. 11.0 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:50c7c0dd-94a2-494e-a37f-6a838a518f6c&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job d96c847f-c292-4804-bd05-fd643c41c7a5 is DONE. 11.0 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d96c847f-c292-4804-bd05-fd643c41c7a5&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
       "source": [
         "# Using bigframes, with syntax identical to pandas,\n",
         "# filter out the first and second groups\n",
@@ -569,11 +1240,100 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 36,
       "metadata": {
         "id": "ZNDiueI9IP5e"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "comment list 1:\n",
+            "1. XXXX is a peer to peer lending conmpany that uses borrowers crypto to collateralize loans from investors ( like myself ). I've been investing with them for almost XXXX years and currently have {$240000.00} tied up in lending products with XXXX. \n",
+            "As of XXXX days ago we received an email saying all business operations have been ceased and no withdrawals or deposits will be allowed. They said they'll update customers within 10 days, but no one can reach anyone at the company to find out any more details as they are not answering calls nor returning emails. It also appears the company has scrubbed its XXXX page and the XXXX pages of top executives. \n",
+            "\n",
+            "All collateral and client 's investment funds are supposedly held at or processed through XXXX XXXX XXXX ( registered SEC company ). XXXX XXXX keeps telling us to contact XXXX and won't give us any information, so we have no way to find out what's happening with our funds/collateral or if everything is gone. We have a XXXX channel up where people are gathering evidence, documentation, etc. This is probably the best place to start to get a broad view of what's happening. Details below. \n",
+            "\n",
+            "XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX  CONST LLC ( Business ID : XXXX ) FoXXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX 'Cease of Operations ' email received by all investors XXXX XX/XX/2022 at XXXX : \" Dear XXXX Users, Given the collapses of several cryptocurrencies so far this year and the rapidly deteriorating market conditions that have been prompting heavy withdrawals across all XXXX lending and XXXX exchange platforms recently, we are sad to inform you that we are unable to continue to operate our business as usual. As such, we are limiting our business activities, including pausing user withdrawals as allowed under our Terms of XXXX. \n",
+            "No deposit or investment request will be processed at this time. \n",
+            "\n",
+            "Our team is working diligently towards our objective of maximizing value for all of our Users, and our top priority continues to be to protect your interests. As we explore all options available to us, we will provide updates to you as we go. \n",
+            "\n",
+            "We hope to communicate with you within the next XXXX business days on the next steps to address the situation. We appreciate your patience in this trying time. \n",
+            "\n",
+            "Sincerely yoursXXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX\n",
+            "2. Submitted XX/XX/XXXX\n",
+            "Typed XX/XX/XXXX:\n",
+            "\n",
+            "XX/XX/XXXX\n",
+            "XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX, XXXX XXXX\n",
+            "PH:. XXXX\n",
+            "PH: XXXX\n",
+            "EM:\n",
+            "XXXX\n",
+            "XXXX\n",
+            "XXXX XXXX \n",
+            "XXXX XXXX\n",
+            "Date of Birth XX/XX/XXXX\n",
+            "SS#: XXXX\n",
+            "TO:\n",
+            "*  Consumer Financial Protection Brueau\n",
+            "*  Department of Veteran Affairs, Office of the Inspector General\n",
+            "My name is XXXX XXXX XXXX,  I've received more than one email from Discover Card in my XXXX XXXX, past emails from Discover Card were unautherized deletions.\n",
+            "From:  Discover Card XXXX\n",
+            "To:  You XXXX\n",
+            "Date:  XX/XX/XXXX, XXXX XXXX XXXX From: Discover Card XXXX>\n",
+            "To Recipient \n",
+            "Date  Mon, XX/XX/XXXX XXXX XXXX\n",
+            "I dont and havent ever had a Discover Checking, Savings, Business Accounts nor Loans of any kind through any Bank called Discover. The 1st time I was contacted by Discover Card I resided alone from XX/XX/XXXX to XX/XX/XXXXat XXXX XXXX XXXX at XXXX XXXX XXXX XXXX XXXX in XXXX, XXXX years prior to me moving here to XXXX, XXXX in XX/XX/XXXX. When \n",
+            "\n",
+            "\n",
+            "Discover Card had 1st contacted me in XXXX, XXXX  it was associated with my XXXX XXXX XXXX website  related  online Merchants Account.  Not once have I ever applied for or had any Website Merchant Accounts here in XXXX; I only applied for online online Merchant Accounts associated with my XXXX related Accounts I purchased while residing in XXXX, XXXX.  Some of my website related  information was stolen both in  XXXX, XXXX and here in XXXX along with my other property that hasn't been returned to me.  I don't and haven't ever had any XXXX XXXX related Agreements,Contracts or Credit Cards offered to Veterans associated with ones businesses.  Nor have I ever applied for or had a Business License or Business Permit in any City or State inspite of my diverse interest.  Not once have I ever allowed another be it an Paralegal, Payee, Attorney, Employers, Landlords, Veteran Organizations including Vocational Rehabilitation Programs, XXXX( XXXX XXXX XXXX, XXXX  XXXX, Entertainment Companies, Banks, Celebrity Personal Assistant Agencies or Celebs, Shelters, Charities, HUD, Housing Arthority, Department of Veteran Affairs, Military, Law Enforcement or anyone else nor their employess to sign any business related Agreements or Contracts on my behalf; not even my family members or friends. \n",
+            "None of my XXXX XXXX attempts were associated with my Employers, Department of Veteran Affairs,Vocational Rehabilitation Programs Military, Landlords, HUD( Housing Authority),Friends, Family nor did I ever sign related Agreements or Contracts with them. Not once had I ever provided anyone the passwords to be able to sign into my accounts rather were aware of my accounts or not.  Yes, my desktop computer that was stolen along with my other property XX/XX/XXXX was registered with my Online Merchant Account.  I had paid for my Merchant related Accounts through my same XXXX XXXX XXXX Account I purchased both of my XXXX XXXX XXXX related accounts through.  That was 1st once during the Summer of XX/XX/XXXX and 2nd my related website months later, while I  resided in XXXX XXXX  and I worked for XXXX. I never offered nor did I ever sign any business Contracts or Agreements with XXXX nor my Landlord or their staff associted with any of my online websites or Merchant Accounts.  My XXXX XXXX XXXX Compensation was deposited into both of my XXXX  XXXX XXXX Accounts at that time.  My account was changed during the Summer of XX/XX/XXXXbecause of theft of my Bank Card. None of my Checking,Savings, past Credit Cards or  Business related  were shared accounts in which others were allowed to \n",
+            "use to make purchases.  I had written checks from my XXXX XXXX XXXX account to pay for my XXXX XXXX XXXX XXXX on the XXXX XXXX here in XXXX in XX/XX/XXXX before it's name changed to XXXX XXXX. Prior to me using my same account open a Checking account in person at XXXX XXXX before it's name was changed to XXXX  XXXX.  Where my XXXX XXXX XXXX XXXX has been deposited since that time. I had used my XXXX XXXX Checking to pay for my XXXX XXXX XXXX XXXX both before theft of my property XX/XX/XXXX and that was also prior to the theft of my property  from my XXXX XXXX XXXX XXXX in XX/XX/XXXX.\n",
+            "I've stated this many times:\n",
+            "I paid for my 1st XXXX XXXX XXXX Membership while employed at XXXX using my XXXX XXXX XXXX account XXXX my XXXX XXXX XXXX XXXX was also deposited.  That was changed to XXXX because I didn't receive my 1st XXXX XXXX XXXX Card the bank sent to XXXX XXXX residence on XXXX XXXX in XX/XX/XXXX while I was there.  In which both my XXXX  salary and XXXX XXXX XXXX XXXX were deposited into my account, no money from XXXX XXXX nor anyone else that was at that residence was given to nor were any of my children there.  Nor did XXXX or any other person at that residence ever give me my missing Bank Card not even after I moved out and stayed a month at XXXX XXXX XXXX using my replacement card to pay for my Hotel room. Which is the same account I used to pay for XXXX XXXX  Membership, XXXX XXXX XXXX, XXXX XXXX  Membership fees, and various online Merchant Account activation related fees.\n",
+            "*  XXXX XXXX XXXX.\n",
+            "XXXX XXXX  XXXX XXXX. Membership\n",
+            "\n",
+            "# XXXX\n",
+            "*  XXXX XXXX Membership\n",
+            "# XXXX\n",
+            "*  Total Merchant Services XXXX and XXXX.\n",
+            "*  XXXX XXXX XXXX XXXX XXXX\n",
+            "* XXXX XXXX changed my $XXXX a month fees to my XXXX  XXXX XXXX account #XXXX.\n",
+            "XX/XX/XXXX - XX/XX/XXXX XXXX XXXX, XXXX.\n",
+            "\n",
+            "Rep: XXXX XXXX XXXX, Fl \n",
+            "XXXX\n",
+            "XXXX Website \n",
+            "XXXX\n",
+            "Software and website owner, I performed Internet advertising and marketing, to promote this software and website. I worked and XXXX from my home XXXX XXXX XXXX XXXX XXXX , XXXX. I purchased XXXX XXXX XXXX-Software Electronic Book CD and was given a website to promote the software on the internet.  The XXXX was given a copy of my website owner certificate document submitted to me when I purchased the software marketing program as well copies of my other school transcripts in addition to XXXX  XXXX XXXX for example. XXXX, represented the first initials of my children's names.  I wasn't ever paid and I'm still owed the money.  Nor did my marketing program have anything to do with any schools, college nor university programs nor did I ever offer or sign any agreement to include it such.  Nor did my XXXX XXXX XXXX have anything to do with any other employers, Department of Family and Children, Military, Veteran Organizations or Food Stamp programs, Section 8 nor Indianapolis Housing Authority for example; only me.\n",
+            "Thank you,\n",
+            "XXXX XXXX\n",
+            "3. ACCORDING TO 15 U.S. CODE 6803-DISCLOSURE OF INSTITUTION PRIVACY POLICY, AND ACCORDING TO U.S. CODE 6802- OBLIGATIONS WITH RESPECT TO DISCLOSURES OF PERSONAL INFORMATION. ( b ) OPT OUT ( 1 ) IN GENERAL A FINANCIAL INSTITUTION MAY NOT DISCLOSE NONPUBLIC PERSONAL INFORMATION TO A NONAFFILIATED THIRD PARTY ( TRANSUNION, XXXX, AND XXXX. ) UNLESS- ( A ) SUCH FINANCIAL INSTITUTION CLEARLY AND CONSPICUOUSLY DISCLOSES TO THE CONSUMER, IN WRITING OR IN ELECTRONIC FORM OR OTHER FORM PERMITTED BY THE REGULATIONS PRESCRIBED UNDER SECTION 6804 OF THIS TITLE. ALSO ACCORDING TO THE \" XXXX  ACT '', FINANCIAL INSTITUTIONS MUST TELL THEIR CUSTOMERS ABOUT THEIR INFORMATION-SHARING PRACTICES AND EXPLAIN TO CUSTOMERS THEIR RIGHT TO \" OPT OUT '' IF THEY DON'T WANT THEIR INFORMATION SHARED WITH CERTAIN THIRD PARTIES. UNDER THE FDCPA, A COLLECTOR MUST PROVIDE YOU WITH INFORMATION ABOUT THE DEBT IN ITS INITIAL COMMUNICATION OR WITHIN FIVE DAYS AFTER THE INITIAL COMMUNICATION. ALSO, THE FDCPA STATES, \" YOU CAN NOT ATTEMPT TO COLLECT AN DEBT WHILE A PERSON ( THE CONSUMER ) SUPRESS VALIDATION. TRANSUNION, XXXX, XXXX, AND THE ACCOUNTS LISTED BELOW HAVE CLEARLY VIOLATED MY RIGHTS : XXXX ACCOUNT # XXXX, XXXX XXXX XXXX ACCOUNT # XXXXXXXX XXXX XXXX XXXX XXXX  ACCOUNT # XXXXXXXX XXXX XXXX XXXX  ACCOUNT # XXXX, XXXX XXXX XXXX XXXX ACCOUNT # XXXX, AND XXXX ACCOUNT # XXXX. FAILURE TO RESPOND SATISFACTORILY WITH DELETIONS OF ALL THE ABOVE ACCOUNTS WILL RESULT IN LEGAL ACTIONS BEING TAKEN AGAINST, TRANSUNION, XXXX, XXXX, WHICH I'LL BE SEEKING A {$1000.00} PER VIOLATION FOR DEFAMATION OF CHARACTER ( PER SE ) NEGLIGENT ENABLEMENT OF IDENTITY FRAUD. 15 USC 1681 VIOLATIONS FOR WILLFUL NONCOMPLIANCE-616 CIVIL LIABILITY FOR WILLFUL NONCOPLIANCE. THIS IS THE THIRD TIME I'VE SUBMITTED A COMPLAINT, AND THE REPONSE I GET IS \" YOU CAN NOT LOCATE MY CREDIT REPORT! '' THIS IS CLEARLY NEGLIGENCE.\n",
+            "4. I do not know how this works, but I need it done or somehow corrected. My name is XXXX XXXX, XXXX XXXX XXXX XXXX TN XXXXMy SS XXXX DOB XXXX. I had some issues with my income being affected by the COVID-19PANDEMICSHUTDOWN. I was under the 1 CARESAct, Pub. L. 116-136, section 4021, codified at FCRAsection 623 ( a ) ( 1 ) ( F ) ( i ) ( I ), 15 U.S.C.1681s- 2 ( a ) ( 1 ) ( F ) ( i ) ( I ). I am requesting some accommodations so I care to protect the integrity of my credit file. US DEPT OF ED / XXXX # XXXX, # XXXX accounts are reporting on XXXX, XXXX The was 30,60, 90 DAYS LATEsince requested assistance due to the pandemic. I found a few accounts that I have never done any business with these companies and the accounts do not belong on my report : XXXX XXXX # XXXX, XXXX XXXX XXXX XXXX # XXXX. \n",
+            "\n",
+            "I have some issues with the misspelling of my name, my correct spelling is XXXX XXXX. Please remove any other variation of my name they are not correct. The following addresses do not belong to me please delete them : XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXXSC, XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX\n",
+            "5. I want to know if this is even legal?! How can they disclose information without knowing its a correct email?!\n",
+            "\n",
+            "comment list 2:\n",
+            "1. Hello, my name is XXXX XXXX, and I am writing to delete the following information in my file. The items I need deleted are listed in the report. I am a victim of identity theft and did not make the charge. I ask that the items be deleted to correct my credit report. I reported the theft of my identity to the Federal Trade Commission and I also have enclosed copies of the Federal Trade Commissions Identity Theft Affidavit. Please delete the items as soon as possible. The accounts are being reported currently open and the accounts need to be closed. \n",
+            "XXXX account number XXXX opened on XX/XX/2022 for the amount {$530.00} XXXX XXXX XXXX  account number XXXX opened on XX/XX/2022 for the amount of {$140.00} The accounts are being reported currently open and need to be closed immediately. \n",
+            "Based on, 15 U.S. Code 1681c2 a consumer reporting agency shall block the reporting of any information in the file of a consumer that the consumer identifies as information that resulted from an alleged identity theft, not later than 4 business days after the date of receipt. This account should not be furnished on my consumer report. As a consumer I am demanding the deletion of the accounts listed IMMEDIATELY.\n",
+            "2. To whom it may concern : My personal information was breach in the internet as result accounts had been open in my name, I was advise to fill out an Id theft report to help me deal with this situation, I have listed each one of the accounts that do not belong to me. This is my second request to remove unverified items in my report, but XXXX keep rposting these account with out providing any type of original document as the FCRA provide, you need to provide me with original documents or remove these account immediately.\n",
+            "3. Ive been Disputting my XXXX XXXX I opened this account and someone got my information and used my card, I contacted XXXX over and over, they removed the negative reporting from my XXXX report but still reporting it negative on my XXXX and Expean this is very unfair to me because Im a victim of identity theft\n",
+            "4. Today, XX/XX/2021, I received three items in the mail, one envelope containing an unsolicited debit card from Navy Federal credit Union and the other two, with a letter each describing The Important Rights on two accounts should these accounts become delinquent under New York law. \n",
+            "\n",
+            "First of all, I never applied for these accounts with Navy Federal, not have I authorized anyone to do so on my behalf. I immediately contacted Navy Federal via phone and was told I was most likely a victim of identity theft and that I should monitor my credit and use a credit monitoring service. I was also asked for my email and mailing information in order to receive a letter from them regarding this issue. \n",
+            "\n",
+            "My main concern is having someone using my identity to illegally open bank accounts and commit fraud, destroying my credit and finances in the process. This bank is in another state from where I reside. I have not lived in Virginia nor do I intend to do so in the foreseeable future.\n",
+            "5. My personal information ( including my SSN, Drivers License Info, Addresses, and more ) was stolen from a hacking, and Equifax did n't tell the public about the hack until more than a month after the hacking. During this time, three Equifax executives were caught inside trading. It really shows how Equifax cares about other people!\n",
+            "\n"
+          ]
+        }
+      ],
       "source": [
         "# Build plain-text prompts to send to PaLM 2. Use only 5 complaints from each group.\n",
         "prompt1 = 'comment list 1:\\n'\n",
@@ -592,11 +1352,100 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 37,
       "metadata": {
         "id": "BfHGJLirzSvH"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Please highlight the most obvious difference betweenthe two lists of comments:\n",
+            "comment list 1:\n",
+            "1. XXXX is a peer to peer lending conmpany that uses borrowers crypto to collateralize loans from investors ( like myself ). I've been investing with them for almost XXXX years and currently have {$240000.00} tied up in lending products with XXXX. \n",
+            "As of XXXX days ago we received an email saying all business operations have been ceased and no withdrawals or deposits will be allowed. They said they'll update customers within 10 days, but no one can reach anyone at the company to find out any more details as they are not answering calls nor returning emails. It also appears the company has scrubbed its XXXX page and the XXXX pages of top executives. \n",
+            "\n",
+            "All collateral and client 's investment funds are supposedly held at or processed through XXXX XXXX XXXX ( registered SEC company ). XXXX XXXX keeps telling us to contact XXXX and won't give us any information, so we have no way to find out what's happening with our funds/collateral or if everything is gone. We have a XXXX channel up where people are gathering evidence, documentation, etc. This is probably the best place to start to get a broad view of what's happening. Details below. \n",
+            "\n",
+            "XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX  CONST LLC ( Business ID : XXXX ) FoXXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX 'Cease of Operations ' email received by all investors XXXX XX/XX/2022 at XXXX : \" Dear XXXX Users, Given the collapses of several cryptocurrencies so far this year and the rapidly deteriorating market conditions that have been prompting heavy withdrawals across all XXXX lending and XXXX exchange platforms recently, we are sad to inform you that we are unable to continue to operate our business as usual. As such, we are limiting our business activities, including pausing user withdrawals as allowed under our Terms of XXXX. \n",
+            "No deposit or investment request will be processed at this time. \n",
+            "\n",
+            "Our team is working diligently towards our objective of maximizing value for all of our Users, and our top priority continues to be to protect your interests. As we explore all options available to us, we will provide updates to you as we go. \n",
+            "\n",
+            "We hope to communicate with you within the next XXXX business days on the next steps to address the situation. We appreciate your patience in this trying time. \n",
+            "\n",
+            "Sincerely yoursXXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX\n",
+            "2. Submitted XX/XX/XXXX\n",
+            "Typed XX/XX/XXXX:\n",
+            "\n",
+            "XX/XX/XXXX\n",
+            "XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX, XXXX XXXX\n",
+            "PH:. XXXX\n",
+            "PH: XXXX\n",
+            "EM:\n",
+            "XXXX\n",
+            "XXXX\n",
+            "XXXX XXXX \n",
+            "XXXX XXXX\n",
+            "Date of Birth XX/XX/XXXX\n",
+            "SS#: XXXX\n",
+            "TO:\n",
+            "*  Consumer Financial Protection Brueau\n",
+            "*  Department of Veteran Affairs, Office of the Inspector General\n",
+            "My name is XXXX XXXX XXXX,  I've received more than one email from Discover Card in my XXXX XXXX, past emails from Discover Card were unautherized deletions.\n",
+            "From:  Discover Card XXXX\n",
+            "To:  You XXXX\n",
+            "Date:  XX/XX/XXXX, XXXX XXXX XXXX From: Discover Card XXXX>\n",
+            "To Recipient \n",
+            "Date  Mon, XX/XX/XXXX XXXX XXXX\n",
+            "I dont and havent ever had a Discover Checking, Savings, Business Accounts nor Loans of any kind through any Bank called Discover. The 1st time I was contacted by Discover Card I resided alone from XX/XX/XXXX to XX/XX/XXXXat XXXX XXXX XXXX at XXXX XXXX XXXX XXXX XXXX in XXXX, XXXX years prior to me moving here to XXXX, XXXX in XX/XX/XXXX. When \n",
+            "\n",
+            "\n",
+            "Discover Card had 1st contacted me in XXXX, XXXX  it was associated with my XXXX XXXX XXXX website  related  online Merchants Account.  Not once have I ever applied for or had any Website Merchant Accounts here in XXXX; I only applied for online online Merchant Accounts associated with my XXXX related Accounts I purchased while residing in XXXX, XXXX.  Some of my website related  information was stolen both in  XXXX, XXXX and here in XXXX along with my other property that hasn't been returned to me.  I don't and haven't ever had any XXXX XXXX related Agreements,Contracts or Credit Cards offered to Veterans associated with ones businesses.  Nor have I ever applied for or had a Business License or Business Permit in any City or State inspite of my diverse interest.  Not once have I ever allowed another be it an Paralegal, Payee, Attorney, Employers, Landlords, Veteran Organizations including Vocational Rehabilitation Programs, XXXX( XXXX XXXX XXXX, XXXX  XXXX, Entertainment Companies, Banks, Celebrity Personal Assistant Agencies or Celebs, Shelters, Charities, HUD, Housing Arthority, Department of Veteran Affairs, Military, Law Enforcement or anyone else nor their employess to sign any business related Agreements or Contracts on my behalf; not even my family members or friends. \n",
+            "None of my XXXX XXXX attempts were associated with my Employers, Department of Veteran Affairs,Vocational Rehabilitation Programs Military, Landlords, HUD( Housing Authority),Friends, Family nor did I ever sign related Agreements or Contracts with them. Not once had I ever provided anyone the passwords to be able to sign into my accounts rather were aware of my accounts or not.  Yes, my desktop computer that was stolen along with my other property XX/XX/XXXX was registered with my Online Merchant Account.  I had paid for my Merchant related Accounts through my same XXXX XXXX XXXX Account I purchased both of my XXXX XXXX XXXX related accounts through.  That was 1st once during the Summer of XX/XX/XXXX and 2nd my related website months later, while I  resided in XXXX XXXX  and I worked for XXXX. I never offered nor did I ever sign any business Contracts or Agreements with XXXX nor my Landlord or their staff associted with any of my online websites or Merchant Accounts.  My XXXX XXXX XXXX Compensation was deposited into both of my XXXX  XXXX XXXX Accounts at that time.  My account was changed during the Summer of XX/XX/XXXXbecause of theft of my Bank Card. None of my Checking,Savings, past Credit Cards or  Business related  were shared accounts in which others were allowed to \n",
+            "use to make purchases.  I had written checks from my XXXX XXXX XXXX account to pay for my XXXX XXXX XXXX XXXX on the XXXX XXXX here in XXXX in XX/XX/XXXX before it's name changed to XXXX XXXX. Prior to me using my same account open a Checking account in person at XXXX XXXX before it's name was changed to XXXX  XXXX.  Where my XXXX XXXX XXXX XXXX has been deposited since that time. I had used my XXXX XXXX Checking to pay for my XXXX XXXX XXXX XXXX both before theft of my property XX/XX/XXXX and that was also prior to the theft of my property  from my XXXX XXXX XXXX XXXX in XX/XX/XXXX.\n",
+            "I've stated this many times:\n",
+            "I paid for my 1st XXXX XXXX XXXX Membership while employed at XXXX using my XXXX XXXX XXXX account XXXX my XXXX XXXX XXXX XXXX was also deposited.  That was changed to XXXX because I didn't receive my 1st XXXX XXXX XXXX Card the bank sent to XXXX XXXX residence on XXXX XXXX in XX/XX/XXXX while I was there.  In which both my XXXX  salary and XXXX XXXX XXXX XXXX were deposited into my account, no money from XXXX XXXX nor anyone else that was at that residence was given to nor were any of my children there.  Nor did XXXX or any other person at that residence ever give me my missing Bank Card not even after I moved out and stayed a month at XXXX XXXX XXXX using my replacement card to pay for my Hotel room. Which is the same account I used to pay for XXXX XXXX  Membership, XXXX XXXX XXXX, XXXX XXXX  Membership fees, and various online Merchant Account activation related fees.\n",
+            "*  XXXX XXXX XXXX.\n",
+            "XXXX XXXX  XXXX XXXX. Membership\n",
+            "\n",
+            "# XXXX\n",
+            "*  XXXX XXXX Membership\n",
+            "# XXXX\n",
+            "*  Total Merchant Services XXXX and XXXX.\n",
+            "*  XXXX XXXX XXXX XXXX XXXX\n",
+            "* XXXX XXXX changed my $XXXX a month fees to my XXXX  XXXX XXXX account #XXXX.\n",
+            "XX/XX/XXXX - XX/XX/XXXX XXXX XXXX, XXXX.\n",
+            "\n",
+            "Rep: XXXX XXXX XXXX, Fl \n",
+            "XXXX\n",
+            "XXXX Website \n",
+            "XXXX\n",
+            "Software and website owner, I performed Internet advertising and marketing, to promote this software and website. I worked and XXXX from my home XXXX XXXX XXXX XXXX XXXX , XXXX. I purchased XXXX XXXX XXXX-Software Electronic Book CD and was given a website to promote the software on the internet.  The XXXX was given a copy of my website owner certificate document submitted to me when I purchased the software marketing program as well copies of my other school transcripts in addition to XXXX  XXXX XXXX for example. XXXX, represented the first initials of my children's names.  I wasn't ever paid and I'm still owed the money.  Nor did my marketing program have anything to do with any schools, college nor university programs nor did I ever offer or sign any agreement to include it such.  Nor did my XXXX XXXX XXXX have anything to do with any other employers, Department of Family and Children, Military, Veteran Organizations or Food Stamp programs, Section 8 nor Indianapolis Housing Authority for example; only me.\n",
+            "Thank you,\n",
+            "XXXX XXXX\n",
+            "3. ACCORDING TO 15 U.S. CODE 6803-DISCLOSURE OF INSTITUTION PRIVACY POLICY, AND ACCORDING TO U.S. CODE 6802- OBLIGATIONS WITH RESPECT TO DISCLOSURES OF PERSONAL INFORMATION. ( b ) OPT OUT ( 1 ) IN GENERAL A FINANCIAL INSTITUTION MAY NOT DISCLOSE NONPUBLIC PERSONAL INFORMATION TO A NONAFFILIATED THIRD PARTY ( TRANSUNION, XXXX, AND XXXX. ) UNLESS- ( A ) SUCH FINANCIAL INSTITUTION CLEARLY AND CONSPICUOUSLY DISCLOSES TO THE CONSUMER, IN WRITING OR IN ELECTRONIC FORM OR OTHER FORM PERMITTED BY THE REGULATIONS PRESCRIBED UNDER SECTION 6804 OF THIS TITLE. ALSO ACCORDING TO THE \" XXXX  ACT '', FINANCIAL INSTITUTIONS MUST TELL THEIR CUSTOMERS ABOUT THEIR INFORMATION-SHARING PRACTICES AND EXPLAIN TO CUSTOMERS THEIR RIGHT TO \" OPT OUT '' IF THEY DON'T WANT THEIR INFORMATION SHARED WITH CERTAIN THIRD PARTIES. UNDER THE FDCPA, A COLLECTOR MUST PROVIDE YOU WITH INFORMATION ABOUT THE DEBT IN ITS INITIAL COMMUNICATION OR WITHIN FIVE DAYS AFTER THE INITIAL COMMUNICATION. ALSO, THE FDCPA STATES, \" YOU CAN NOT ATTEMPT TO COLLECT AN DEBT WHILE A PERSON ( THE CONSUMER ) SUPRESS VALIDATION. TRANSUNION, XXXX, XXXX, AND THE ACCOUNTS LISTED BELOW HAVE CLEARLY VIOLATED MY RIGHTS : XXXX ACCOUNT # XXXX, XXXX XXXX XXXX ACCOUNT # XXXXXXXX XXXX XXXX XXXX XXXX  ACCOUNT # XXXXXXXX XXXX XXXX XXXX  ACCOUNT # XXXX, XXXX XXXX XXXX XXXX ACCOUNT # XXXX, AND XXXX ACCOUNT # XXXX. FAILURE TO RESPOND SATISFACTORILY WITH DELETIONS OF ALL THE ABOVE ACCOUNTS WILL RESULT IN LEGAL ACTIONS BEING TAKEN AGAINST, TRANSUNION, XXXX, XXXX, WHICH I'LL BE SEEKING A {$1000.00} PER VIOLATION FOR DEFAMATION OF CHARACTER ( PER SE ) NEGLIGENT ENABLEMENT OF IDENTITY FRAUD. 15 USC 1681 VIOLATIONS FOR WILLFUL NONCOMPLIANCE-616 CIVIL LIABILITY FOR WILLFUL NONCOPLIANCE. THIS IS THE THIRD TIME I'VE SUBMITTED A COMPLAINT, AND THE REPONSE I GET IS \" YOU CAN NOT LOCATE MY CREDIT REPORT! '' THIS IS CLEARLY NEGLIGENCE.\n",
+            "4. I do not know how this works, but I need it done or somehow corrected. My name is XXXX XXXX, XXXX XXXX XXXX XXXX TN XXXXMy SS XXXX DOB XXXX. I had some issues with my income being affected by the COVID-19PANDEMICSHUTDOWN. I was under the 1 CARESAct, Pub. L. 116-136, section 4021, codified at FCRAsection 623 ( a ) ( 1 ) ( F ) ( i ) ( I ), 15 U.S.C.1681s- 2 ( a ) ( 1 ) ( F ) ( i ) ( I ). I am requesting some accommodations so I care to protect the integrity of my credit file. US DEPT OF ED / XXXX # XXXX, # XXXX accounts are reporting on XXXX, XXXX The was 30,60, 90 DAYS LATEsince requested assistance due to the pandemic. I found a few accounts that I have never done any business with these companies and the accounts do not belong on my report : XXXX XXXX # XXXX, XXXX XXXX XXXX XXXX # XXXX. \n",
+            "\n",
+            "I have some issues with the misspelling of my name, my correct spelling is XXXX XXXX. Please remove any other variation of my name they are not correct. The following addresses do not belong to me please delete them : XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXXSC, XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX\n",
+            "5. I want to know if this is even legal?! How can they disclose information without knowing its a correct email?!\n",
+            "comment list 2:\n",
+            "1. Hello, my name is XXXX XXXX, and I am writing to delete the following information in my file. The items I need deleted are listed in the report. I am a victim of identity theft and did not make the charge. I ask that the items be deleted to correct my credit report. I reported the theft of my identity to the Federal Trade Commission and I also have enclosed copies of the Federal Trade Commissions Identity Theft Affidavit. Please delete the items as soon as possible. The accounts are being reported currently open and the accounts need to be closed. \n",
+            "XXXX account number XXXX opened on XX/XX/2022 for the amount {$530.00} XXXX XXXX XXXX  account number XXXX opened on XX/XX/2022 for the amount of {$140.00} The accounts are being reported currently open and need to be closed immediately. \n",
+            "Based on, 15 U.S. Code 1681c2 a consumer reporting agency shall block the reporting of any information in the file of a consumer that the consumer identifies as information that resulted from an alleged identity theft, not later than 4 business days after the date of receipt. This account should not be furnished on my consumer report. As a consumer I am demanding the deletion of the accounts listed IMMEDIATELY.\n",
+            "2. To whom it may concern : My personal information was breach in the internet as result accounts had been open in my name, I was advise to fill out an Id theft report to help me deal with this situation, I have listed each one of the accounts that do not belong to me. This is my second request to remove unverified items in my report, but XXXX keep rposting these account with out providing any type of original document as the FCRA provide, you need to provide me with original documents or remove these account immediately.\n",
+            "3. Ive been Disputting my XXXX XXXX I opened this account and someone got my information and used my card, I contacted XXXX over and over, they removed the negative reporting from my XXXX report but still reporting it negative on my XXXX and Expean this is very unfair to me because Im a victim of identity theft\n",
+            "4. Today, XX/XX/2021, I received three items in the mail, one envelope containing an unsolicited debit card from Navy Federal credit Union and the other two, with a letter each describing The Important Rights on two accounts should these accounts become delinquent under New York law. \n",
+            "\n",
+            "First of all, I never applied for these accounts with Navy Federal, not have I authorized anyone to do so on my behalf. I immediately contacted Navy Federal via phone and was told I was most likely a victim of identity theft and that I should monitor my credit and use a credit monitoring service. I was also asked for my email and mailing information in order to receive a letter from them regarding this issue. \n",
+            "\n",
+            "My main concern is having someone using my identity to illegally open bank accounts and commit fraud, destroying my credit and finances in the process. This bank is in another state from where I reside. I have not lived in Virginia nor do I intend to do so in the foreseeable future.\n",
+            "5. My personal information ( including my SSN, Drivers License Info, Addresses, and more ) was stolen from a hacking, and Equifax did n't tell the public about the hack until more than a month after the hacking. During this time, three Equifax executives were caught inside trading. It really shows how Equifax cares about other people!\n",
+            "\n"
+          ]
+        }
+      ],
       "source": [
         "# The plain English request we will make of PaLM 2\n",
         "prompt = (\n",
@@ -616,22 +1465,42 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 38,
       "metadata": {
         "id": "mL5P0_3X04dE"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Query job 66e3af22-91cb-400a-92c3-69e7cd12ee01 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:66e3af22-91cb-400a-92c3-69e7cd12ee01&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
       "source": [
         "from bigframes.ml.llm import PaLM2TextGenerator\n",
         "\n",
+<<<<<<< HEAD
+        "q_a_model = PaLM2TextGenerator()"
+=======
+        "# Create a BigQuery Cloud resource connection\n",
+        "CONN_NAME = \"bqdf-llm\"\n",
         "session = bf.get_global_session()\n",
+        "\n",
         "connection = f\"{PROJECT_ID}.{REGION}.{CONN_NAME}\"\n",
         "q_a_model = PaLM2TextGenerator(session=session, connection_name=connection)"
+>>>>>>> origin/lmm-kmeans-notebook
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 39,
       "metadata": {
         "id": "ICWHsqAW1FNk"
       },
@@ -643,11 +1512,58 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 40,
       "metadata": {
         "id": "gB7e1LXU1pst"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Query job 653add17-29be-408c-8882-064217f8556e is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:653add17-29be-408c-8882-064217f8556e&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job 8fd16954-853a-45fd-80bc-65b1242429e2 is DONE. 8 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8fd16954-853a-45fd-80bc-65b1242429e2&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job d9929bcb-26ce-4844-b68e-f4a980b90ede is DONE. 171 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d9929bcb-26ce-4844-b68e-f4a980b90ede&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/plain": [
+              "' The first comment list is about people complaining about companies or services, while the second comment list is about people reporting identity theft or fraud.'"
+            ]
+          },
+          "execution_count": 40,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "# Send the request for PaLM 2 to generate a response to our prompt\n",
         "major_difference = q_a_model.predict(df)\n",
@@ -662,6 +1578,21 @@
       "source": [
         "We now see PaLM2TextGenerator's characterization of the different comment groups. Thanks for using BigQuery DataFrames!"
       ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Summary and next steps\n",
+        "\n",
+<<<<<<< HEAD
+        "You've used the ML and LLM capabilities of BigQuery DataFrames to help analyze and understand a large dataset of unstructured feedback.\n",
+=======
+        "You've used BigQuery DataFrames' integration with LLM models (`bigframes.ml.llm`) to generate code samples, and have tranformed LLM output by creating and using a custom function in BigQuery DataFrames.\n",
+>>>>>>> origin/lmm-kmeans-notebook
+        "\n",
+        "Learn more about BigQuery DataFrames in the [documentation](https://cloud.google.com/python/docs/reference/bigframes/latest) and find more sample notebooks in the [GitHub repo](https://github.com/googleapis/python-bigquery-dataframes/tree/main/notebooks)."
+      ]
     }
   ],
   "metadata": {
@@ -682,7 +1613,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.16"
+      "version": "3.10.13"
     }
   },
   "nbformat": 4,

From 416171a70d91d4a6b71622ba72685147ab7d6186 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Thu, 16 Nov 2023 11:04:18 -0800
Subject: [PATCH 02/26] feat!: model.predict returns all the columns (#204)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/ml/cluster.py                       |    4 +-
 bigframes/ml/decomposition.py                 |    9 +-
 bigframes/ml/ensemble.py                      |   49 +-
 bigframes/ml/forecasting.py                   |    9 +-
 bigframes/ml/imported.py                      |   22 +-
 bigframes/ml/linear_model.py                  |   24 +-
 bigframes/ml/llm.py                           |   17 +-
 .../getting_started/ml_fundamentals.ipynb     | 3586 ++++++++++-------
 .../sklearn_linear_regression.ipynb           | 1192 +++---
 tests/system/large/ml/test_cluster.py         |    4 +-
 tests/system/large/ml/test_ensemble.py        |    2 +-
 tests/system/large/ml/test_pipeline.py        |    4 +-
 tests/system/small/ml/test_cluster.py         |    4 +-
 tests/system/small/ml/test_ensemble.py        |   20 +-
 tests/system/small/ml/test_forecasting.py     |    4 +-
 tests/system/small/ml/test_imported.py        |    8 +-
 tests/system/small/ml/test_linear_model.py    |    8 +-
 tests/system/small/ml/test_llm.py             |   22 +-
 .../sklearn/cluster/_kmeans.py                |   16 +-
 .../sklearn/linear_model/_base.py             |    6 +-
 .../bigframes_vendored/xgboost/sklearn.py     |    2 +-
 21 files changed, 2737 insertions(+), 2275 deletions(-)

diff --git a/bigframes/ml/cluster.py b/bigframes/ml/cluster.py
index 772b90f666..c9f52ba0b6 100644
--- a/bigframes/ml/cluster.py
+++ b/bigframes/ml/cluster.py
@@ -17,7 +17,7 @@
 
 from __future__ import annotations
 
-from typing import cast, Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 from google.cloud import bigquery
 
@@ -92,7 +92,7 @@ def predict(
 
         (X,) = utils.convert_to_dataframe(X)
 
-        return cast(bpd.DataFrame, self._bqml_model.predict(X)[["CENTROID_ID"]])
+        return self._bqml_model.predict(X)
 
     def to_gbq(self, model_name: str, replace: bool = False) -> KMeans:
         """Save the model to BigQuery.
diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py
index 8e6be6d28c..7cda7a6993 100644
--- a/bigframes/ml/decomposition.py
+++ b/bigframes/ml/decomposition.py
@@ -17,7 +17,7 @@
 
 from __future__ import annotations
 
-from typing import cast, List, Optional, Union
+from typing import List, Optional, Union
 
 from google.cloud import bigquery
 
@@ -106,12 +106,7 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
 
         (X,) = utils.convert_to_dataframe(X)
 
-        return cast(
-            bpd.DataFrame,
-            self._bqml_model.predict(X)[
-                ["principal_component_" + str(i + 1) for i in range(self.n_components)]
-            ],
-        )
+        return self._bqml_model.predict(X)
 
     def to_gbq(self, model_name: str, replace: bool = False) -> PCA:
         """Save the model to BigQuery.
diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py
index 19ca8608ff..fcb3fe5343 100644
--- a/bigframes/ml/ensemble.py
+++ b/bigframes/ml/ensemble.py
@@ -17,7 +17,7 @@
 
 from __future__ import annotations
 
-from typing import cast, Dict, List, Literal, Optional, Union
+from typing import Dict, List, Literal, Optional, Union
 
 from google.cloud import bigquery
 
@@ -168,16 +168,7 @@ def predict(
             raise RuntimeError("A model must be fitted before predict")
         (X,) = utils.convert_to_dataframe(X)
 
-        df = self._bqml_model.predict(X)
-        return cast(
-            bpd.DataFrame,
-            df[
-                [
-                    cast(str, field.name)
-                    for field in self._bqml_model.model.label_columns
-                ]
-            ],
-        )
+        return self._bqml_model.predict(X)
 
     def score(
         self,
@@ -328,19 +319,9 @@ def _fit(
     def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
         if not self._bqml_model:
             raise RuntimeError("A model must be fitted before predict")
-
         (X,) = utils.convert_to_dataframe(X)
 
-        df = self._bqml_model.predict(X)
-        return cast(
-            bpd.DataFrame,
-            df[
-                [
-                    cast(str, field.name)
-                    for field in self._bqml_model.model.label_columns
-                ]
-            ],
-        )
+        return self._bqml_model.predict(X)
 
     def score(
         self,
@@ -486,19 +467,9 @@ def predict(
     ) -> bpd.DataFrame:
         if not self._bqml_model:
             raise RuntimeError("A model must be fitted before predict")
-
         (X,) = utils.convert_to_dataframe(X)
 
-        df = self._bqml_model.predict(X)
-        return cast(
-            bpd.DataFrame,
-            df[
-                [
-                    cast(str, field.name)
-                    for field in self._bqml_model.model.label_columns
-                ]
-            ],
-        )
+        return self._bqml_model.predict(X)
 
     def score(
         self,
@@ -661,19 +632,9 @@ def predict(
     ) -> bpd.DataFrame:
         if not self._bqml_model:
             raise RuntimeError("A model must be fitted before predict")
-
         (X,) = utils.convert_to_dataframe(X)
 
-        df = self._bqml_model.predict(X)
-        return cast(
-            bpd.DataFrame,
-            df[
-                [
-                    cast(str, field.name)
-                    for field in self._bqml_model.model.label_columns
-                ]
-            ],
-        )
+        return self._bqml_model.predict(X)
 
     def score(
         self,
diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py
index 8e309d5e73..cf23854fa0 100644
--- a/bigframes/ml/forecasting.py
+++ b/bigframes/ml/forecasting.py
@@ -16,7 +16,7 @@
 
 from __future__ import annotations
 
-from typing import cast, Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 from google.cloud import bigquery
 
@@ -24,8 +24,6 @@
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 
-_PREDICT_OUTPUT_COLUMNS = ["forecast_timestamp", "forecast_value"]
-
 
 class ARIMAPlus(base.SupervisedTrainablePredictor):
     """Time Series ARIMA Plus model."""
@@ -100,10 +98,7 @@ def predict(self, X=None) -> bpd.DataFrame:
         if not self._bqml_model:
             raise RuntimeError("A model must be fitted before predict")
 
-        return cast(
-            bpd.DataFrame,
-            self._bqml_model.forecast()[_PREDICT_OUTPUT_COLUMNS],
-        )
+        return self._bqml_model.forecast()
 
     def score(
         self,
diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py
index fb8aa98bef..f6afc9aa38 100644
--- a/bigframes/ml/imported.py
+++ b/bigframes/ml/imported.py
@@ -78,16 +78,7 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
 
         (X,) = utils.convert_to_dataframe(X)
 
-        df = self._bqml_model.predict(X)
-        return cast(
-            bpd.DataFrame,
-            df[
-                [
-                    cast(str, field.name)
-                    for field in self._bqml_model.model.label_columns
-                ]
-            ],
-        )
+        return self._bqml_model.predict(X)
 
     def to_gbq(self, model_name: str, replace: bool = False) -> TensorFlowModel:
         """Save the model to BigQuery.
@@ -161,16 +152,7 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
 
         (X,) = utils.convert_to_dataframe(X)
 
-        df = self._bqml_model.predict(X)
-        return cast(
-            bpd.DataFrame,
-            df[
-                [
-                    cast(str, field.name)
-                    for field in self._bqml_model.model.label_columns
-                ]
-            ],
-        )
+        return self._bqml_model.predict(X)
 
     def to_gbq(self, model_name: str, replace: bool = False) -> ONNXModel:
         """Save the model to BigQuery.
diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index f11879500b..433d9fbc38 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -17,7 +17,7 @@
 
 from __future__ import annotations
 
-from typing import cast, Dict, List, Literal, Optional, Union
+from typing import Dict, List, Literal, Optional, Union
 
 from google.cloud import bigquery
 
@@ -145,16 +145,7 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
 
         (X,) = utils.convert_to_dataframe(X)
 
-        df = self._bqml_model.predict(X)
-        return cast(
-            bpd.DataFrame,
-            df[
-                [
-                    cast(str, field.name)
-                    for field in self._bqml_model.model.label_columns
-                ]
-            ],
-        )
+        return self._bqml_model.predict(X)
 
     def score(
         self,
@@ -267,16 +258,7 @@ def predict(
 
         (X,) = utils.convert_to_dataframe(X)
 
-        df = self._bqml_model.predict(X)
-        return cast(
-            bpd.DataFrame,
-            df[
-                [
-                    cast(str, field.name)
-                    for field in self._bqml_model.model.label_columns
-                ]
-            ],
-        )
+        return self._bqml_model.predict(X)
 
     def score(
         self,
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 3cfc28e61f..93e2ba825f 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -149,7 +149,8 @@ def predict(
 
 
         Returns:
-            bigframes.dataframe.DataFrame: Output DataFrame with only 1 column as the output text results."""
+            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
+        """
 
         # Params reference: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
         if temperature < 0.0 or temperature > 1.0:
@@ -181,11 +182,7 @@ def predict(
             "top_p": top_p,
             "flatten_json_output": True,
         }
-        df = self._bqml_model.generate_text(X, options)
-        return cast(
-            bpd.DataFrame,
-            df[[_TEXT_GENERATE_RESULT_COLUMN]],
-        )
+        return self._bqml_model.generate_text(X, options)
 
 
 class PaLM2TextEmbeddingGenerator(base.Predictor):
@@ -269,7 +266,7 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
                 Input DataFrame, which needs to contain a column with name "content". Only the column will be used as input. Content can include preamble, questions, suggestions, instructions, or examples.
 
         Returns:
-            bigframes.dataframe.DataFrame: Output DataFrame with only 1 column as the output embedding results
+            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
         """
 
         # Params reference: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
@@ -287,8 +284,4 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
         options = {
             "flatten_json_output": True,
         }
-        df = self._bqml_model.generate_text_embedding(X, options)
-        return cast(
-            bpd.DataFrame,
-            df[[_EMBED_TEXT_RESULT_COLUMN]],
-        )
+        return self._bqml_model.generate_text_embedding(X, options)
diff --git a/notebooks/getting_started/ml_fundamentals.ipynb b/notebooks/getting_started/ml_fundamentals.ipynb
index 2f566dd704..165bd90f31 100644
--- a/notebooks/getting_started/ml_fundamentals.ipynb
+++ b/notebooks/getting_started/ml_fundamentals.ipynb
@@ -14,46 +14,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0c8a8bc0b4d64448aef68d6a98fae666",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 28e903c6-e874-4b99-8f53-0755e0b0c188 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8955d1977e4a4fd2aae991763cd7843a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 32d772f8-3d61-43bf-a152-d930e3ecbf29 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e970e14dff4a434a9641abab6a7a7cf5",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 7ddb1bda-402a-4e8e-8476-7904010fb4ef is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7ddb1bda-402a-4e8e-8476-7904010fb4ef&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 5a435e8f-9960-4fec-a8d8-1230e7b229a3 is DONE. 28.9 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -61,13 +31,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9680fd748e0546b4a010fda0155c5027",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job e8aba858-7660-4274-8d90-8d2b0382f8f6 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e8aba858-7660-4274-8d90-8d2b0382f8f6&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 7950d6a7-3747-4454-bba2-9660e830647f is DONE. 31.7 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -117,250 +85,250 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>36.6</td>\n",
-       "      <td>18.4</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>3475.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.1</td>\n",
+       "      <td>18.9</td>\n",
+       "      <td>188.0</td>\n",
+       "      <td>4300.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.8</td>\n",
-       "      <td>19.1</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>4650.0</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>39.1</td>\n",
+       "      <td>18.7</td>\n",
+       "      <td>181.0</td>\n",
+       "      <td>3750.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.9</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>3900.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>47.4</td>\n",
+       "      <td>14.6</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>4725.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>46.5</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>3500.0</td>\n",
+       "      <td>42.5</td>\n",
+       "      <td>16.7</td>\n",
+       "      <td>187.0</td>\n",
+       "      <td>3350.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.3</td>\n",
-       "      <td>16.8</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>43.2</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>197.0</td>\n",
+       "      <td>4775.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>43.2</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>4100.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.7</td>\n",
+       "      <td>15.3</td>\n",
+       "      <td>219.0</td>\n",
+       "      <td>5200.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>46.9</td>\n",
-       "      <td>16.6</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>2700.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>41.3</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>4400.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>50.5</td>\n",
-       "      <td>18.4</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.2</td>\n",
+       "      <td>13.8</td>\n",
+       "      <td>215.0</td>\n",
+       "      <td>4750.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>49.5</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3800.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.5</td>\n",
+       "      <td>13.5</td>\n",
+       "      <td>210.0</td>\n",
+       "      <td>4550.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>20.1</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3975.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>50.5</td>\n",
+       "      <td>15.2</td>\n",
+       "      <td>216.0</td>\n",
+       "      <td>5000.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.8</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>208.0</td>\n",
-       "      <td>4300.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>48.2</td>\n",
+       "      <td>15.6</td>\n",
+       "      <td>221.0</td>\n",
+       "      <td>5100.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>39.0</td>\n",
-       "      <td>18.7</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3650.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>38.1</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3700.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>12</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>16.9</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>50.7</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>223.0</td>\n",
+       "      <td>5550.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>13</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>17.3</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3700.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>37.8</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>4250.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>14</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>17.9</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3450.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>15</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>16.5</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3400.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>48.7</td>\n",
+       "      <td>15.7</td>\n",
+       "      <td>208.0</td>\n",
+       "      <td>5350.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>16</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>45.7</td>\n",
-       "      <td>17.3</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3600.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>198.0</td>\n",
+       "      <td>4400.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>17</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>50.6</td>\n",
-       "      <td>19.4</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3800.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.8</td>\n",
+       "      <td>15.4</td>\n",
+       "      <td>215.0</td>\n",
+       "      <td>5150.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>18</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>39.7</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>4250.0</td>\n",
+       "      <td>50.3</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>197.0</td>\n",
+       "      <td>3300.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>19</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>37.8</td>\n",
+       "      <td>37.2</td>\n",
        "      <td>18.1</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3750.0</td>\n",
+       "      <td>178.0</td>\n",
+       "      <td>3900.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>20</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>46.6</td>\n",
-       "      <td>17.8</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3800.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>18.8</td>\n",
+       "      <td>203.0</td>\n",
+       "      <td>4100.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>21</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>51.3</td>\n",
-       "      <td>19.2</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3650.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.5</td>\n",
+       "      <td>17.9</td>\n",
+       "      <td>187.0</td>\n",
+       "      <td>3200.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>22</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.5</td>\n",
+       "      <td>13.9</td>\n",
+       "      <td>210.0</td>\n",
+       "      <td>4200.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>23</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>36.8</td>\n",
+       "      <td>42.2</td>\n",
        "      <td>18.5</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3500.0</td>\n",
+       "      <td>180.0</td>\n",
+       "      <td>3550.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>24</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>49.6</td>\n",
-       "      <td>18.2</td>\n",
-       "      <td>193.0</td>\n",
+       "      <td>51.7</td>\n",
+       "      <td>20.3</td>\n",
+       "      <td>194.0</td>\n",
        "      <td>3775.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
@@ -370,86 +338,86 @@
        "</div>[334 rows x 7 columns in total]"
       ],
       "text/plain": [
-       "                                              species island  \\\n",
-       "penguin_id                                                     \n",
-       "0                 Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "1                 Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "2                 Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "3           Chinstrap penguin (Pygoscelis antarctica)  Dream   \n",
-       "4                 Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "5                 Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "6           Chinstrap penguin (Pygoscelis antarctica)  Dream   \n",
-       "7           Chinstrap penguin (Pygoscelis antarctica)  Dream   \n",
-       "8           Chinstrap penguin (Pygoscelis antarctica)  Dream   \n",
-       "9                 Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "10                Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "11                Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "12                Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "13          Chinstrap penguin (Pygoscelis antarctica)  Dream   \n",
-       "14                Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "15                Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "16          Chinstrap penguin (Pygoscelis antarctica)  Dream   \n",
-       "17          Chinstrap penguin (Pygoscelis antarctica)  Dream   \n",
-       "18                Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "19                Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "20          Chinstrap penguin (Pygoscelis antarctica)  Dream   \n",
-       "21          Chinstrap penguin (Pygoscelis antarctica)  Dream   \n",
-       "22                Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "23                Adelie Penguin (Pygoscelis adeliae)  Dream   \n",
-       "24          Chinstrap penguin (Pygoscelis antarctica)  Dream   \n",
+       "                                              species     island  \\\n",
+       "penguin_id                                                         \n",
+       "0                 Adelie Penguin (Pygoscelis adeliae)     Biscoe   \n",
+       "1                 Adelie Penguin (Pygoscelis adeliae)  Torgersen   \n",
+       "2                   Gentoo penguin (Pygoscelis papua)     Biscoe   \n",
+       "3           Chinstrap penguin (Pygoscelis antarctica)      Dream   \n",
+       "4                 Adelie Penguin (Pygoscelis adeliae)     Biscoe   \n",
+       "5                   Gentoo penguin (Pygoscelis papua)     Biscoe   \n",
+       "6                 Adelie Penguin (Pygoscelis adeliae)     Biscoe   \n",
+       "7                   Gentoo penguin (Pygoscelis papua)     Biscoe   \n",
+       "8                   Gentoo penguin (Pygoscelis papua)     Biscoe   \n",
+       "9                   Gentoo penguin (Pygoscelis papua)     Biscoe   \n",
+       "10                  Gentoo penguin (Pygoscelis papua)     Biscoe   \n",
+       "11                Adelie Penguin (Pygoscelis adeliae)      Dream   \n",
+       "12                  Gentoo penguin (Pygoscelis papua)     Biscoe   \n",
+       "13                Adelie Penguin (Pygoscelis adeliae)     Biscoe   \n",
+       "14                Adelie Penguin (Pygoscelis adeliae)     Biscoe   \n",
+       "15                  Gentoo penguin (Pygoscelis papua)     Biscoe   \n",
+       "16                Adelie Penguin (Pygoscelis adeliae)  Torgersen   \n",
+       "17                  Gentoo penguin (Pygoscelis papua)     Biscoe   \n",
+       "18          Chinstrap penguin (Pygoscelis antarctica)      Dream   \n",
+       "19                Adelie Penguin (Pygoscelis adeliae)      Dream   \n",
+       "20          Chinstrap penguin (Pygoscelis antarctica)      Dream   \n",
+       "21                Adelie Penguin (Pygoscelis adeliae)     Biscoe   \n",
+       "22                  Gentoo penguin (Pygoscelis papua)     Biscoe   \n",
+       "23                Adelie Penguin (Pygoscelis adeliae)      Dream   \n",
+       "24          Chinstrap penguin (Pygoscelis antarctica)      Dream   \n",
        "\n",
        "            culmen_length_mm  culmen_depth_mm  flipper_length_mm  body_mass_g  \\\n",
        "penguin_id                                                                      \n",
-       "0                       36.6             18.4              184.0       3475.0   \n",
-       "1                       39.8             19.1              184.0       4650.0   \n",
-       "2                       40.9             18.9              184.0       3900.0   \n",
-       "3                       46.5             17.9              192.0       3500.0   \n",
-       "4                       37.3             16.8              192.0       3000.0   \n",
-       "5                       43.2             18.5              192.0       4100.0   \n",
-       "6                       46.9             16.6              192.0       2700.0   \n",
-       "7                       50.5             18.4              200.0       3400.0   \n",
-       "8                       49.5             19.0              200.0       3800.0   \n",
-       "9                       40.2             20.1              200.0       3975.0   \n",
-       "10                      40.8             18.9              208.0       4300.0   \n",
-       "11                      39.0             18.7              185.0       3650.0   \n",
-       "12                      37.0             16.9              185.0       3000.0   \n",
-       "13                      47.0             17.3              185.0       3700.0   \n",
-       "14                      34.0             17.1              185.0       3400.0   \n",
-       "15                      37.0             16.5              185.0       3400.0   \n",
-       "16                      45.7             17.3              193.0       3600.0   \n",
-       "17                      50.6             19.4              193.0       3800.0   \n",
-       "18                      39.7             17.9              193.0       4250.0   \n",
-       "19                      37.8             18.1              193.0       3750.0   \n",
-       "20                      46.6             17.8              193.0       3800.0   \n",
-       "21                      51.3             19.2              193.0       3650.0   \n",
-       "22                      40.2             17.1              193.0       3400.0   \n",
-       "23                      36.8             18.5              193.0       3500.0   \n",
-       "24                      49.6             18.2              193.0       3775.0   \n",
+       "0                       40.1             18.9              188.0       4300.0   \n",
+       "1                       39.1             18.7              181.0       3750.0   \n",
+       "2                       47.4             14.6              212.0       4725.0   \n",
+       "3                       42.5             16.7              187.0       3350.0   \n",
+       "4                       43.2             19.0              197.0       4775.0   \n",
+       "5                       46.7             15.3              219.0       5200.0   \n",
+       "6                       41.3             21.1              195.0       4400.0   \n",
+       "7                       45.2             13.8              215.0       4750.0   \n",
+       "8                       46.5             13.5              210.0       4550.0   \n",
+       "9                       50.5             15.2              216.0       5000.0   \n",
+       "10                      48.2             15.6              221.0       5100.0   \n",
+       "11                      38.1             18.6              190.0       3700.0   \n",
+       "12                      50.7             15.0              223.0       5550.0   \n",
+       "13                      37.8             20.0              190.0       4250.0   \n",
+       "14                      35.0             17.9              190.0       3450.0   \n",
+       "15                      48.7             15.7              208.0       5350.0   \n",
+       "16                      34.6             21.1              198.0       4400.0   \n",
+       "17                      46.8             15.4              215.0       5150.0   \n",
+       "18                      50.3             20.0              197.0       3300.0   \n",
+       "19                      37.2             18.1              178.0       3900.0   \n",
+       "20                      51.0             18.8              203.0       4100.0   \n",
+       "21                      40.5             17.9              187.0       3200.0   \n",
+       "22                      45.5             13.9              210.0       4200.0   \n",
+       "23                      42.2             18.5              180.0       3550.0   \n",
+       "24                      51.7             20.3              194.0       3775.0   \n",
        "\n",
        "               sex  \n",
        "penguin_id          \n",
-       "0           FEMALE  \n",
+       "0             MALE  \n",
        "1             MALE  \n",
-       "2             MALE  \n",
+       "2           FEMALE  \n",
        "3           FEMALE  \n",
-       "4           FEMALE  \n",
+       "4             MALE  \n",
        "5             MALE  \n",
-       "6           FEMALE  \n",
+       "6             MALE  \n",
        "7           FEMALE  \n",
-       "8             MALE  \n",
-       "9             MALE  \n",
+       "8           FEMALE  \n",
+       "9           FEMALE  \n",
        "10            MALE  \n",
-       "11            MALE  \n",
-       "12          FEMALE  \n",
-       "13          FEMALE  \n",
+       "11          FEMALE  \n",
+       "12            MALE  \n",
+       "13            MALE  \n",
        "14          FEMALE  \n",
-       "15          FEMALE  \n",
-       "16          FEMALE  \n",
+       "15            MALE  \n",
+       "16            MALE  \n",
        "17            MALE  \n",
        "18            MALE  \n",
        "19            MALE  \n",
-       "20          FEMALE  \n",
-       "21            MALE  \n",
+       "20            MALE  \n",
+       "21          FEMALE  \n",
        "22          FEMALE  \n",
        "23          FEMALE  \n",
        "24            MALE  \n",
@@ -458,7 +426,7 @@
        "[334 rows x 7 columns]"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 1,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -490,18 +458,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "171160f246eb43d1832aeefb055c0851",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job deda90a8-6ec7-419c-8067-e85777bd916f is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:deda90a8-6ec7-419c-8067-e85777bd916f&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 1408053d-cb80-4870-af28-e94b90a20a6d is DONE. 28.9 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -509,13 +475,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "eaffac40f94745728e6bd618bebd2c53",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job efe8fa0a-d450-475a-99d5-36beeb985247 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:efe8fa0a-d450-475a-99d5-36beeb985247&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 262885fe-973c-4338-a853-227f9db4835a is DONE. 31.7 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -523,13 +487,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "68e7ecdc639f4d3ab482830bf6a9da04",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 5022c56d-e605-4cab-be1b-1ecf189588a1 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5022c56d-e605-4cab-be1b-1ecf189588a1&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job fb1dc831-7f6f-42ce-96da-1292d73919b4 is DONE. 31.7 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -537,13 +499,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ebfe197fd88348129ebe2f7d288bf4b9",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 175bd293-d448-4510-b926-1d8cfb4eb5e7 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:175bd293-d448-4510-b926-1d8cfb4eb5e7&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job e79add79-f1e4-4cf0-bb97-04d153222f19 is DONE. 31.7 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -551,13 +511,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2ae69ea7da5247e8a1f7cd0e049629cb",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job a3a2e68c-f5f3-4237-99ad-44974f29d090 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:a3a2e68c-f5f3-4237-99ad-44974f29d090&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job cb5ee343-f86e-4795-b0ce-d58854e72e5c is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -596,18 +554,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5ed4206cd3ad4cd485315605bf033df2",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job db3365fb-67ca-44cc-a117-88a80dc63cca is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:db3365fb-67ca-44cc-a117-88a80dc63cca&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job e65af31c-feda-468d-89c9-dec033574640 is DONE. 31.7 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -615,13 +571,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ac72db21945542558fdd62093d9dc0c3",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job ab78f7ab-a115-448b-92d0-19c091a831ca is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ab78f7ab-a115-448b-92d0-19c091a831ca&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 0455f252-2b94-457e-bad5-672b91d9b51f is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -667,47 +621,47 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>156</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>46.2</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>209.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <th>249</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>41.1</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>189.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>189</th>\n",
+       "      <th>36</th>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>35.3</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>187.0</td>\n",
+       "      <td>43.4</td>\n",
+       "      <td>14.4</td>\n",
+       "      <td>218.0</td>\n",
        "      <td>FEMALE</td>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>279</th>\n",
+       "      <th>74</th>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>45.1</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>215.0</td>\n",
+       "      <td>42.8</td>\n",
+       "      <td>14.2</td>\n",
+       "      <td>209.0</td>\n",
        "      <td>FEMALE</td>\n",
        "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>245</th>\n",
-       "      <td>Biscoe</td>\n",
-       "      <td>49.5</td>\n",
-       "      <td>16.2</td>\n",
-       "      <td>229.0</td>\n",
-       "      <td>MALE</td>\n",
-       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <th>235</th>\n",
+       "      <td>Dream</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>17.1</td>\n",
+       "      <td>185.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>343</th>\n",
-       "      <td>Torgersen</td>\n",
-       "      <td>37.3</td>\n",
-       "      <td>20.5</td>\n",
-       "      <td>199.0</td>\n",
+       "      <th>117</th>\n",
+       "      <td>Dream</td>\n",
+       "      <td>37.8</td>\n",
+       "      <td>18.1</td>\n",
+       "      <td>193.0</td>\n",
        "      <td>MALE</td>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
@@ -719,24 +673,24 @@
       "text/plain": [
        "               island  culmen_length_mm  culmen_depth_mm  flipper_length_mm  \\\n",
        "penguin_id                                                                    \n",
-       "156            Biscoe              46.2             14.5              209.0   \n",
-       "189            Biscoe              35.3             18.9              187.0   \n",
-       "279            Biscoe              45.1             14.5              215.0   \n",
-       "245            Biscoe              49.5             16.2              229.0   \n",
-       "343         Torgersen              37.3             20.5              199.0   \n",
+       "249         Torgersen              41.1             18.6              189.0   \n",
+       "36             Biscoe              43.4             14.4              218.0   \n",
+       "74             Biscoe              42.8             14.2              209.0   \n",
+       "235             Dream              34.0             17.1              185.0   \n",
+       "117             Dream              37.8             18.1              193.0   \n",
        "\n",
        "               sex                              species  \n",
        "penguin_id                                               \n",
-       "156         FEMALE    Gentoo penguin (Pygoscelis papua)  \n",
-       "189         FEMALE  Adelie Penguin (Pygoscelis adeliae)  \n",
-       "279         FEMALE    Gentoo penguin (Pygoscelis papua)  \n",
-       "245           MALE    Gentoo penguin (Pygoscelis papua)  \n",
-       "343           MALE  Adelie Penguin (Pygoscelis adeliae)  \n",
+       "249           MALE  Adelie Penguin (Pygoscelis adeliae)  \n",
+       "36          FEMALE    Gentoo penguin (Pygoscelis papua)  \n",
+       "74          FEMALE    Gentoo penguin (Pygoscelis papua)  \n",
+       "235         FEMALE  Adelie Penguin (Pygoscelis adeliae)  \n",
+       "117           MALE  Adelie Penguin (Pygoscelis adeliae)  \n",
        "\n",
        "[5 rows x 6 columns]"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -749,18 +703,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d6dd794f89724099950dcc927d63d0f5",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 22a72cad-11a6-4f8e-b16d-f92853b8112e is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:22a72cad-11a6-4f8e-b16d-f92853b8112e&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job d5a173bd-a7dc-42fa-8468-b088d47ccfe0 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -768,13 +720,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a8ab7ca12e0d43a6803483480e837c6e",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job bc952727-8806-4fe2-abf2-c3a8a2bd9b6d is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bc952727-8806-4fe2-abf2-c3a8a2bd9b6d&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job c6b6518b-2689-4dc1-a5b0-2a9ab75301eb is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -810,24 +760,24 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>156</th>\n",
-       "      <td>4800.0</td>\n",
+       "      <th>249</th>\n",
+       "      <td>3325.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>189</th>\n",
-       "      <td>3800.0</td>\n",
+       "      <th>36</th>\n",
+       "      <td>4600.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>279</th>\n",
-       "      <td>5000.0</td>\n",
+       "      <th>74</th>\n",
+       "      <td>4700.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>245</th>\n",
-       "      <td>5800.0</td>\n",
+       "      <th>235</th>\n",
+       "      <td>3400.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>343</th>\n",
-       "      <td>3775.0</td>\n",
+       "      <th>117</th>\n",
+       "      <td>3750.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -837,16 +787,16 @@
       "text/plain": [
        "            body_mass_g\n",
        "penguin_id             \n",
-       "156              4800.0\n",
-       "189              3800.0\n",
-       "279              5000.0\n",
-       "245              5800.0\n",
-       "343              3775.0\n",
+       "249              3325.0\n",
+       "36               4600.0\n",
+       "74               4700.0\n",
+       "235              3400.0\n",
+       "117              3750.0\n",
        "\n",
        "[5 rows x 1 columns]"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -880,18 +830,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "380c57dc3fe54fbd8ad2fb23f1e66e37",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job f239341e-785f-43e1-bfe0-683132d6f15f is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:f239341e-785f-43e1-bfe0-683132d6f15f&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 03a0eb1c-747e-4c2a-b7b5-d3e4e5a78134 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -899,13 +847,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3db47aadba854beca71960d846838dc4",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 2d5bbbb9-efc4-4f4e-a8dc-2c7b66b0e5e0 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:2d5bbbb9-efc4-4f4e-a8dc-2c7b66b0e5e0&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 70608c84-dac8-4e77-8a9e-00d823b24f37 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -913,13 +859,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1de81f2944a44cbda3f16fa8a1fae813",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 66120e1c-2471-4a0c-8b82-aeb189c8866a is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:66120e1c-2471-4a0c-8b82-aeb189c8866a&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job d18fdc32-2152-45d3-8c62-bf9b1556ec47 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -927,13 +871,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b06cae61a4534388a4e9ed26ce442cc2",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 62825fc4-5b77-43e5-a3e4-525ebfd1285b is DONE. 2.1 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:62825fc4-5b77-43e5-a3e4-525ebfd1285b&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 2a022682-535f-4dc0-80ba-1640306ad9ef is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -941,13 +883,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "977c8eae2c9848e98c5478c41af82633",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 656d1d69-b4ff-4db6-9f2d-28dcf91e2fd7 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:656d1d69-b4ff-4db6-9f2d-28dcf91e2fd7&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job c145b39d-7d02-4394-80f0-fc605b2ba256 is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -955,13 +895,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "aefc3085fee04c438d0327d400b4b72a",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 466507c8-1474-4725-93e5-baf8ee292e39 is DONE. 8.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:466507c8-1474-4725-93e5-baf8ee292e39&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job fc156a2b-db95-44a3-9ad1-d95b9d290080 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1002,153 +940,153 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-1.344188</td>\n",
-       "      <td>0.642519</td>\n",
-       "      <td>-1.193942</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-0.750047</td>\n",
-       "      <td>1.005876</td>\n",
-       "      <td>-1.193942</td>\n",
+       "      <td>-0.750505</td>\n",
+       "      <td>0.84903</td>\n",
+       "      <td>-0.937262</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.545811</td>\n",
-       "      <td>0.90206</td>\n",
-       "      <td>-1.193942</td>\n",
+       "      <td>0.622496</td>\n",
+       "      <td>-1.322402</td>\n",
+       "      <td>0.804051</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-1.214219</td>\n",
-       "      <td>-0.188011</td>\n",
-       "      <td>-0.619171</td>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.299107</td>\n",
+       "      <td>-0.261935</td>\n",
+       "      <td>-1.009817</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>-0.118772</td>\n",
-       "      <td>0.694427</td>\n",
-       "      <td>-0.619171</td>\n",
+       "      <td>0.490839</td>\n",
+       "      <td>-0.968913</td>\n",
+       "      <td>1.311935</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>0.568203</td>\n",
-       "      <td>-0.291828</td>\n",
-       "      <td>-0.619171</td>\n",
+       "      <td>-0.524806</td>\n",
+       "      <td>1.959995</td>\n",
+       "      <td>-0.429379</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>1.236611</td>\n",
-       "      <td>0.642519</td>\n",
-       "      <td>-0.044401</td>\n",
+       "      <td>0.208715</td>\n",
+       "      <td>-1.726389</td>\n",
+       "      <td>1.021716</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>-0.675779</td>\n",
-       "      <td>1.524957</td>\n",
-       "      <td>-0.044401</td>\n",
+       "      <td>1.205551</td>\n",
+       "      <td>-1.019412</td>\n",
+       "      <td>1.09427</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
-       "      <td>-0.564378</td>\n",
-       "      <td>0.90206</td>\n",
-       "      <td>0.530369</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>-0.898582</td>\n",
-       "      <td>0.798243</td>\n",
-       "      <td>-1.122096</td>\n",
+       "      <td>0.772962</td>\n",
+       "      <td>-0.817418</td>\n",
+       "      <td>1.457044</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>12</th>\n",
-       "      <td>-1.26992</td>\n",
-       "      <td>-0.136103</td>\n",
-       "      <td>-1.122096</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>0.58677</td>\n",
-       "      <td>0.071529</td>\n",
-       "      <td>-1.122096</td>\n",
+       "      <td>1.243168</td>\n",
+       "      <td>-1.120408</td>\n",
+       "      <td>1.602153</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>14</th>\n",
-       "      <td>-1.826927</td>\n",
-       "      <td>-0.032287</td>\n",
-       "      <td>-1.122096</td>\n",
+       "      <td>-1.709725</td>\n",
+       "      <td>0.344046</td>\n",
+       "      <td>-0.792152</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>-1.26992</td>\n",
-       "      <td>-0.343736</td>\n",
-       "      <td>-1.122096</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>0.3454</td>\n",
-       "      <td>0.071529</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <th>17</th>\n",
+       "      <td>0.509647</td>\n",
+       "      <td>-0.918415</td>\n",
+       "      <td>1.021716</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>18</th>\n",
-       "      <td>-0.768614</td>\n",
-       "      <td>0.382978</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <td>1.167935</td>\n",
+       "      <td>1.404513</td>\n",
+       "      <td>-0.284269</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>19</th>\n",
-       "      <td>-1.121385</td>\n",
-       "      <td>0.486795</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <td>-1.295944</td>\n",
+       "      <td>0.445043</td>\n",
+       "      <td>-1.662809</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>20</th>\n",
-       "      <td>0.512502</td>\n",
-       "      <td>0.33107</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <td>1.299593</td>\n",
+       "      <td>0.798532</td>\n",
+       "      <td>0.151059</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>21</th>\n",
-       "      <td>1.385146</td>\n",
-       "      <td>1.057784</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <td>-0.675272</td>\n",
+       "      <td>0.344046</td>\n",
+       "      <td>-1.009817</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>22</th>\n",
-       "      <td>-0.675779</td>\n",
-       "      <td>-0.032287</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <td>0.26514</td>\n",
+       "      <td>-1.675891</td>\n",
+       "      <td>0.658942</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>24</th>\n",
-       "      <td>1.069509</td>\n",
-       "      <td>0.538703</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <td>1.43125</td>\n",
+       "      <td>1.556008</td>\n",
+       "      <td>-0.501934</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>0.302756</td>\n",
+       "      <td>0.041055</td>\n",
+       "      <td>-0.574488</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>26</th>\n",
-       "      <td>-0.43441</td>\n",
-       "      <td>0.694427</td>\n",
-       "      <td>0.027445</td>\n",
+       "      <td>0.302756</td>\n",
+       "      <td>-1.675891</td>\n",
+       "      <td>0.949161</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>0.227523</td>\n",
+       "      <td>-1.776888</td>\n",
+       "      <td>0.658942</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>28</th>\n",
-       "      <td>1.923586</td>\n",
-       "      <td>1.888314</td>\n",
-       "      <td>0.027445</td>\n",
+       "      <td>1.318401</td>\n",
+       "      <td>-0.362932</td>\n",
+       "      <td>1.747263</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>2.202388</td>\n",
+       "      <td>1.303516</td>\n",
+       "      <td>0.441278</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>30</th>\n",
-       "      <td>1.292312</td>\n",
-       "      <td>0.694427</td>\n",
-       "      <td>0.027445</td>\n",
+       "      <td>-0.919779</td>\n",
+       "      <td>1.959995</td>\n",
+       "      <td>-0.356824</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>31</th>\n",
-       "      <td>-1.994029</td>\n",
-       "      <td>-0.551368</td>\n",
-       "      <td>-1.62502</td>\n",
+       "      <td>1.036277</td>\n",
+       "      <td>-0.615424</td>\n",
+       "      <td>1.747263</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>-0.223874</td>\n",
+       "      <td>0.19255</td>\n",
+       "      <td>-0.356824</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1158,65 +1096,65 @@
       "text/plain": [
        "            standard_scaled_culmen_length_mm  standard_scaled_culmen_depth_mm  \\\n",
        "penguin_id                                                                      \n",
-       "0                                  -1.344188                         0.642519   \n",
-       "1                                  -0.750047                         1.005876   \n",
-       "2                                  -0.545811                          0.90206   \n",
-       "4                                  -1.214219                        -0.188011   \n",
-       "5                                  -0.118772                         0.694427   \n",
-       "6                                   0.568203                        -0.291828   \n",
-       "7                                   1.236611                         0.642519   \n",
-       "9                                  -0.675779                         1.524957   \n",
-       "10                                 -0.564378                          0.90206   \n",
-       "11                                 -0.898582                         0.798243   \n",
-       "12                                  -1.26992                        -0.136103   \n",
-       "13                                   0.58677                         0.071529   \n",
-       "14                                 -1.826927                        -0.032287   \n",
-       "15                                  -1.26992                        -0.343736   \n",
-       "16                                    0.3454                         0.071529   \n",
-       "18                                 -0.768614                         0.382978   \n",
-       "19                                 -1.121385                         0.486795   \n",
-       "20                                  0.512502                          0.33107   \n",
-       "21                                  1.385146                         1.057784   \n",
-       "22                                 -0.675779                        -0.032287   \n",
-       "24                                  1.069509                         0.538703   \n",
-       "26                                  -0.43441                         0.694427   \n",
-       "28                                  1.923586                         1.888314   \n",
-       "30                                  1.292312                         0.694427   \n",
-       "31                                 -1.994029                        -0.551368   \n",
+       "0                                  -0.750505                          0.84903   \n",
+       "2                                   0.622496                        -1.322402   \n",
+       "3                                  -0.299107                        -0.261935   \n",
+       "5                                   0.490839                        -0.968913   \n",
+       "6                                  -0.524806                         1.959995   \n",
+       "7                                   0.208715                        -1.726389   \n",
+       "9                                   1.205551                        -1.019412   \n",
+       "10                                  0.772962                        -0.817418   \n",
+       "12                                  1.243168                        -1.120408   \n",
+       "14                                 -1.709725                         0.344046   \n",
+       "17                                  0.509647                        -0.918415   \n",
+       "18                                  1.167935                         1.404513   \n",
+       "19                                 -1.295944                         0.445043   \n",
+       "20                                  1.299593                         0.798532   \n",
+       "21                                 -0.675272                         0.344046   \n",
+       "22                                   0.26514                        -1.675891   \n",
+       "24                                   1.43125                         1.556008   \n",
+       "25                                  0.302756                         0.041055   \n",
+       "26                                  0.302756                        -1.675891   \n",
+       "27                                  0.227523                        -1.776888   \n",
+       "28                                  1.318401                        -0.362932   \n",
+       "29                                  2.202388                         1.303516   \n",
+       "30                                 -0.919779                         1.959995   \n",
+       "31                                  1.036277                        -0.615424   \n",
+       "32                                 -0.223874                          0.19255   \n",
        "\n",
        "            standard_scaled_flipper_length_mm  \n",
        "penguin_id                                     \n",
-       "0                                   -1.193942  \n",
-       "1                                   -1.193942  \n",
-       "2                                   -1.193942  \n",
-       "4                                   -0.619171  \n",
-       "5                                   -0.619171  \n",
-       "6                                   -0.619171  \n",
-       "7                                   -0.044401  \n",
-       "9                                   -0.044401  \n",
-       "10                                   0.530369  \n",
-       "11                                  -1.122096  \n",
-       "12                                  -1.122096  \n",
-       "13                                  -1.122096  \n",
-       "14                                  -1.122096  \n",
-       "15                                  -1.122096  \n",
-       "16                                  -0.547325  \n",
-       "18                                  -0.547325  \n",
-       "19                                  -0.547325  \n",
-       "20                                  -0.547325  \n",
-       "21                                  -0.547325  \n",
-       "22                                  -0.547325  \n",
-       "24                                  -0.547325  \n",
-       "26                                   0.027445  \n",
-       "28                                   0.027445  \n",
-       "30                                   0.027445  \n",
-       "31                                   -1.62502  \n",
+       "0                                   -0.937262  \n",
+       "2                                    0.804051  \n",
+       "3                                   -1.009817  \n",
+       "5                                    1.311935  \n",
+       "6                                   -0.429379  \n",
+       "7                                    1.021716  \n",
+       "9                                     1.09427  \n",
+       "10                                   1.457044  \n",
+       "12                                   1.602153  \n",
+       "14                                  -0.792152  \n",
+       "17                                   1.021716  \n",
+       "18                                  -0.284269  \n",
+       "19                                  -1.662809  \n",
+       "20                                   0.151059  \n",
+       "21                                  -1.009817  \n",
+       "22                                   0.658942  \n",
+       "24                                  -0.501934  \n",
+       "25                                  -0.574488  \n",
+       "26                                   0.949161  \n",
+       "27                                   0.658942  \n",
+       "28                                   1.747263  \n",
+       "29                                   0.441278  \n",
+       "30                                  -0.356824  \n",
+       "31                                   1.747263  \n",
+       "32                                  -0.356824  \n",
        "...\n",
        "\n",
        "[267 rows x 3 columns]"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1237,32 +1175,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "74f3c24c0a434e12bf6a56dc4809b501",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job c6268b07-0d3d-4fe0-971d-cc99fd98cd7e is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b6b5999749114de9971d9dcb6c9d1098",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 845c6cff-ac6c-46c1-8e9b-061519f1fa1a is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:845c6cff-ac6c-46c1-8e9b-061519f1fa1a&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 31550d88-fc7b-4fcb-9975-9ed24bf2e009 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1270,13 +1192,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5a04e46a7d0248b1ae523f2ca6903ee8",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 1e17f5f7-2956-4bdd-baa9-c07591481341 is DONE. 536 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1e17f5f7-2956-4bdd-baa9-c07591481341&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 5ec7c8b1-037c-466c-a51e-963f8274e76b is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1284,13 +1204,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "62563820bfb245be85bbc1bf3dfb993c",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job e2fde7a6-67b4-45a4-91d4-1cb9eff66ae5 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e2fde7a6-67b4-45a4-91d4-1cb9eff66ae5&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 4e860716-bc41-4ef6-83ff-310d085ed7cc is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1298,13 +1216,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "98aff3bfded44868bf120451c89df9f5",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job e0683619-23c5-44fd-8930-9d3c9d02729a is DONE. 2.1 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e0683619-23c5-44fd-8930-9d3c9d02729a&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 6b96a757-42fe-4b65-92fd-a3ae339fe769 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1344,154 +1260,154 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.493935</td>\n",
-       "      <td>0.382978</td>\n",
-       "      <td>-0.619171</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>1.050942</td>\n",
-       "      <td>0.953968</td>\n",
-       "      <td>-0.044401</td>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.938587</td>\n",
+       "      <td>0.748033</td>\n",
+       "      <td>-1.445145</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>1.255178</td>\n",
-       "      <td>1.1616</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.16745</td>\n",
+       "      <td>0.899528</td>\n",
+       "      <td>-0.284269</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>-1.307054</td>\n",
-       "      <td>0.694427</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <th>8</th>\n",
+       "      <td>0.453222</td>\n",
+       "      <td>-1.877885</td>\n",
+       "      <td>0.658942</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>1.515114</td>\n",
-       "      <td>0.486795</td>\n",
-       "      <td>0.027445</td>\n",
+       "      <th>11</th>\n",
+       "      <td>-1.12667</td>\n",
+       "      <td>0.697535</td>\n",
+       "      <td>-0.792152</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>1.236611</td>\n",
-       "      <td>1.265417</td>\n",
-       "      <td>0.027445</td>\n",
+       "      <th>13</th>\n",
+       "      <td>-1.183094</td>\n",
+       "      <td>1.404513</td>\n",
+       "      <td>-0.792152</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>1.403713</td>\n",
-       "      <td>0.953968</td>\n",
-       "      <td>0.027445</td>\n",
+       "      <th>15</th>\n",
+       "      <td>0.867003</td>\n",
+       "      <td>-0.766919</td>\n",
+       "      <td>0.513833</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>34</th>\n",
-       "      <td>0.419668</td>\n",
-       "      <td>0.538703</td>\n",
-       "      <td>-1.62502</td>\n",
+       "      <th>16</th>\n",
+       "      <td>-1.784958</td>\n",
+       "      <td>1.959995</td>\n",
+       "      <td>-0.211715</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>35</th>\n",
-       "      <td>-1.455589</td>\n",
-       "      <td>0.694427</td>\n",
-       "      <td>-1.050249</td>\n",
+       "      <th>23</th>\n",
+       "      <td>-0.355532</td>\n",
+       "      <td>0.647036</td>\n",
+       "      <td>-1.5177</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>39</th>\n",
-       "      <td>0.326833</td>\n",
-       "      <td>1.1616</td>\n",
-       "      <td>-0.475479</td>\n",
+       "      <th>34</th>\n",
+       "      <td>-0.600039</td>\n",
+       "      <td>-1.776888</td>\n",
+       "      <td>0.949161</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>51</th>\n",
-       "      <td>-1.065684</td>\n",
-       "      <td>0.227254</td>\n",
-       "      <td>-0.978403</td>\n",
+       "      <th>36</th>\n",
+       "      <td>-0.129833</td>\n",
+       "      <td>-1.423399</td>\n",
+       "      <td>1.23938</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>52</th>\n",
-       "      <td>-0.248741</td>\n",
-       "      <td>0.071529</td>\n",
-       "      <td>-0.978403</td>\n",
+       "      <th>42</th>\n",
+       "      <td>-1.615684</td>\n",
+       "      <td>-0.514427</td>\n",
+       "      <td>-0.429379</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>60</th>\n",
-       "      <td>0.531069</td>\n",
-       "      <td>0.382978</td>\n",
-       "      <td>-0.403633</td>\n",
+       "      <th>48</th>\n",
+       "      <td>0.415606</td>\n",
+       "      <td>-0.716421</td>\n",
+       "      <td>1.021716</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>61</th>\n",
-       "      <td>0.401101</td>\n",
-       "      <td>0.90206</td>\n",
-       "      <td>-0.403633</td>\n",
+       "      <td>0.396797</td>\n",
+       "      <td>-1.170907</td>\n",
+       "      <td>1.457044</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>64</th>\n",
-       "      <td>-1.455589</td>\n",
-       "      <td>0.33107</td>\n",
-       "      <td>-0.403633</td>\n",
+       "      <td>0.434414</td>\n",
+       "      <td>-1.120408</td>\n",
+       "      <td>1.09427</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>65</th>\n",
-       "      <td>-0.564378</td>\n",
-       "      <td>0.642519</td>\n",
-       "      <td>-0.403633</td>\n",
+       "      <td>-1.220711</td>\n",
+       "      <td>1.051024</td>\n",
+       "      <td>-1.445145</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>67</th>\n",
-       "      <td>1.273745</td>\n",
-       "      <td>1.317325</td>\n",
-       "      <td>0.171138</td>\n",
+       "      <th>68</th>\n",
+       "      <td>-1.484026</td>\n",
+       "      <td>-0.009443</td>\n",
+       "      <td>-1.009817</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>83</th>\n",
-       "      <td>2.629128</td>\n",
-       "      <td>0.33107</td>\n",
-       "      <td>-1.409481</td>\n",
+       "      <th>70</th>\n",
+       "      <td>1.638141</td>\n",
+       "      <td>1.404513</td>\n",
+       "      <td>0.296168</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>85</th>\n",
-       "      <td>-1.288487</td>\n",
-       "      <td>0.746335</td>\n",
-       "      <td>-0.83471</td>\n",
+       "      <th>72</th>\n",
+       "      <td>0.829387</td>\n",
+       "      <td>0.142052</td>\n",
+       "      <td>-0.719598</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>93</th>\n",
-       "      <td>-0.508677</td>\n",
-       "      <td>0.486795</td>\n",
-       "      <td>0.314831</td>\n",
+       "      <th>74</th>\n",
+       "      <td>-0.242683</td>\n",
+       "      <td>-1.524396</td>\n",
+       "      <td>0.586387</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>104</th>\n",
-       "      <td>0.382534</td>\n",
-       "      <td>-0.032287</td>\n",
-       "      <td>-0.762864</td>\n",
+       "      <th>77</th>\n",
+       "      <td>-1.277136</td>\n",
+       "      <td>-0.211437</td>\n",
+       "      <td>-0.647043</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>105</th>\n",
-       "      <td>-1.065684</td>\n",
-       "      <td>0.746335</td>\n",
-       "      <td>-0.762864</td>\n",
+       "      <th>81</th>\n",
+       "      <td>0.208715</td>\n",
+       "      <td>-1.221405</td>\n",
+       "      <td>0.804051</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>91</th>\n",
+       "      <td>1.261976</td>\n",
+       "      <td>0.647036</td>\n",
+       "      <td>0.005949</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>108</th>\n",
-       "      <td>1.162343</td>\n",
-       "      <td>0.382978</td>\n",
-       "      <td>-0.762864</td>\n",
+       "      <th>96</th>\n",
+       "      <td>0.246331</td>\n",
+       "      <td>-1.322402</td>\n",
+       "      <td>0.731497</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>113</th>\n",
-       "      <td>1.496547</td>\n",
-       "      <td>1.213509</td>\n",
-       "      <td>0.386677</td>\n",
+       "      <th>105</th>\n",
+       "      <td>-1.803766</td>\n",
+       "      <td>0.445043</td>\n",
+       "      <td>-1.009817</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>130</th>\n",
-       "      <td>-0.341575</td>\n",
-       "      <td>1.213509</td>\n",
-       "      <td>-0.044401</td>\n",
+       "      <th>111</th>\n",
+       "      <td>-1.164286</td>\n",
+       "      <td>0.697535</td>\n",
+       "      <td>-2.098138</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1501,65 +1417,65 @@
       "text/plain": [
        "            standard_scaled_culmen_length_mm  standard_scaled_culmen_depth_mm  \\\n",
        "penguin_id                                                                      \n",
-       "3                                   0.493935                         0.382978   \n",
-       "8                                   1.050942                         0.953968   \n",
-       "17                                  1.255178                           1.1616   \n",
-       "23                                 -1.307054                         0.694427   \n",
-       "25                                  1.515114                         0.486795   \n",
-       "27                                  1.236611                         1.265417   \n",
-       "29                                  1.403713                         0.953968   \n",
-       "34                                  0.419668                         0.538703   \n",
-       "35                                 -1.455589                         0.694427   \n",
-       "39                                  0.326833                           1.1616   \n",
-       "51                                 -1.065684                         0.227254   \n",
-       "52                                 -0.248741                         0.071529   \n",
-       "60                                  0.531069                         0.382978   \n",
-       "61                                  0.401101                          0.90206   \n",
-       "64                                 -1.455589                          0.33107   \n",
-       "65                                 -0.564378                         0.642519   \n",
-       "67                                  1.273745                         1.317325   \n",
-       "83                                  2.629128                          0.33107   \n",
-       "85                                 -1.288487                         0.746335   \n",
-       "93                                 -0.508677                         0.486795   \n",
-       "104                                 0.382534                        -0.032287   \n",
-       "105                                -1.065684                         0.746335   \n",
-       "108                                 1.162343                         0.382978   \n",
-       "113                                 1.496547                         1.213509   \n",
-       "130                                -0.341575                         1.213509   \n",
+       "1                                  -0.938587                         0.748033   \n",
+       "4                                   -0.16745                         0.899528   \n",
+       "8                                   0.453222                        -1.877885   \n",
+       "11                                  -1.12667                         0.697535   \n",
+       "13                                 -1.183094                         1.404513   \n",
+       "15                                  0.867003                        -0.766919   \n",
+       "16                                 -1.784958                         1.959995   \n",
+       "23                                 -0.355532                         0.647036   \n",
+       "34                                 -0.600039                        -1.776888   \n",
+       "36                                 -0.129833                        -1.423399   \n",
+       "42                                 -1.615684                        -0.514427   \n",
+       "48                                  0.415606                        -0.716421   \n",
+       "61                                  0.396797                        -1.170907   \n",
+       "64                                  0.434414                        -1.120408   \n",
+       "65                                 -1.220711                         1.051024   \n",
+       "68                                 -1.484026                        -0.009443   \n",
+       "70                                  1.638141                         1.404513   \n",
+       "72                                  0.829387                         0.142052   \n",
+       "74                                 -0.242683                        -1.524396   \n",
+       "77                                 -1.277136                        -0.211437   \n",
+       "81                                  0.208715                        -1.221405   \n",
+       "91                                  1.261976                         0.647036   \n",
+       "96                                  0.246331                        -1.322402   \n",
+       "105                                -1.803766                         0.445043   \n",
+       "111                                -1.164286                         0.697535   \n",
        "\n",
        "            standard_scaled_flipper_length_mm  \n",
        "penguin_id                                     \n",
-       "3                                   -0.619171  \n",
-       "8                                   -0.044401  \n",
-       "17                                  -0.547325  \n",
-       "23                                  -0.547325  \n",
-       "25                                   0.027445  \n",
-       "27                                   0.027445  \n",
-       "29                                   0.027445  \n",
-       "34                                   -1.62502  \n",
-       "35                                  -1.050249  \n",
-       "39                                  -0.475479  \n",
-       "51                                  -0.978403  \n",
-       "52                                  -0.978403  \n",
-       "60                                  -0.403633  \n",
-       "61                                  -0.403633  \n",
-       "64                                  -0.403633  \n",
-       "65                                  -0.403633  \n",
-       "67                                   0.171138  \n",
-       "83                                  -1.409481  \n",
-       "85                                   -0.83471  \n",
-       "93                                   0.314831  \n",
-       "104                                 -0.762864  \n",
-       "105                                 -0.762864  \n",
-       "108                                 -0.762864  \n",
-       "113                                  0.386677  \n",
-       "130                                 -0.044401  \n",
+       "1                                   -1.445145  \n",
+       "4                                   -0.284269  \n",
+       "8                                    0.658942  \n",
+       "11                                  -0.792152  \n",
+       "13                                  -0.792152  \n",
+       "15                                   0.513833  \n",
+       "16                                  -0.211715  \n",
+       "23                                    -1.5177  \n",
+       "34                                   0.949161  \n",
+       "36                                    1.23938  \n",
+       "42                                  -0.429379  \n",
+       "48                                   1.021716  \n",
+       "61                                   1.457044  \n",
+       "64                                    1.09427  \n",
+       "65                                  -1.445145  \n",
+       "68                                  -1.009817  \n",
+       "70                                   0.296168  \n",
+       "72                                  -0.719598  \n",
+       "74                                   0.586387  \n",
+       "77                                  -0.647043  \n",
+       "81                                   0.804051  \n",
+       "91                                   0.005949  \n",
+       "96                                   0.731497  \n",
+       "105                                 -1.009817  \n",
+       "111                                 -2.098138  \n",
        "...\n",
        "\n",
        "[67 rows x 3 columns]"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1581,32 +1497,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d642a617d27f4e2493c80dbdd1686193",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job a8d8afa4-d91e-487e-8709-8727a73ab453 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "693297c9ca8245f58f7e10071a6278b4",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 75c1ce67-e5d7-4f4c-947e-381fc5298236 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:75c1ce67-e5d7-4f4c-947e-381fc5298236&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job b9afd624-4345-4160-8809-05786563ce35 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1614,13 +1514,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "09217776c2294e8b929a56e7a73fbfa8",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 41962e2e-4d14-4053-9297-3ce61699551a is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:41962e2e-4d14-4053-9297-3ce61699551a&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job c918fc7c-a956-4259-b5c5-09c2eac615cd is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1628,13 +1526,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9c1581fc9fcb49739d1d81b73506b894",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 5d3c22c9-c972-4213-8557-726c9e0aca37 is DONE. 22.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5d3c22c9-c972-4213-8557-726c9e0aca37&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 1d855341-282f-4d10-9ba9-3ce6683b729a is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1642,13 +1538,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d7749eb7cf554697a60c90f3718ad582",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 9cb7b33f-ea05-4cf4-9f92-bb3aa4ea8d10 is DONE. 2.1 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9cb7b33f-ea05-4cf4-9f92-bb3aa4ea8d10&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job c257ff78-3e15-4296-82f5-ba6c2eb6a6ff is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1656,13 +1550,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e900465918224249bccc781d992aadbb",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job fe1f35d6-d82c-4aab-a284-637b72554f5b is DONE. 29.2 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:fe1f35d6-d82c-4aab-a284-637b72554f5b&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job a17eec0c-10d0-4943-95be-60fced57d5cb is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1670,13 +1562,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b0272ee35c5745a491b7c5883b3fbb1b",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 37bc90ff-59cb-4b0c-8f9d-73bcda43524a is DONE. 536 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:37bc90ff-59cb-4b0c-8f9d-73bcda43524a&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 1db53c8a-cf45-4c69-a443-6b7a49fc3a07 is DONE. 536 Bytes processed. <a target=\"_blank\" hr…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1684,13 +1574,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "00f9d4b55bb94997aaebdae298cefab3",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job e23f4724-fdd8-45a9-8c87-defd8d471035 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e23f4724-fdd8-45a9-8c87-defd8d471035&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job ae870ee3-e633-4556-94e6-6669fa0bfde2 is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1698,13 +1586,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9cd8e791be5844669cba10dc53f862ae",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 257378db-0569-42d7-965a-7757154c710b is DONE. 21.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:257378db-0569-42d7-965a-7757154c710b&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job baa7c70c-eabc-49e1-bae9-fdd4891cdb6e is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1751,226 +1637,226 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-1.344188</td>\n",
-       "      <td>0.642519</td>\n",
-       "      <td>-1.193942</td>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
+       "      <td>-0.750505</td>\n",
+       "      <td>0.84903</td>\n",
+       "      <td>-0.937262</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-0.750047</td>\n",
-       "      <td>1.005876</td>\n",
-       "      <td>-1.193942</td>\n",
-       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-0.545811</td>\n",
-       "      <td>0.90206</td>\n",
-       "      <td>-1.193942</td>\n",
-       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.622496</td>\n",
+       "      <td>-1.322402</td>\n",
+       "      <td>0.804051</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-1.214219</td>\n",
-       "      <td>-0.188011</td>\n",
-       "      <td>-0.619171</td>\n",
+       "      <th>3</th>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>-0.299107</td>\n",
+       "      <td>-0.261935</td>\n",
+       "      <td>-1.009817</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.490839</td>\n",
+       "      <td>-0.968913</td>\n",
+       "      <td>1.311935</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-0.118772</td>\n",
-       "      <td>0.694427</td>\n",
-       "      <td>-0.619171</td>\n",
        "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
-       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-0.524806</td>\n",
+       "      <td>1.959995</td>\n",
+       "      <td>-0.429379</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>0.568203</td>\n",
-       "      <td>-0.291828</td>\n",
-       "      <td>-0.619171</td>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>1.236611</td>\n",
-       "      <td>0.642519</td>\n",
-       "      <td>-0.044401</td>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.208715</td>\n",
+       "      <td>-1.726389</td>\n",
+       "      <td>1.021716</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-0.675779</td>\n",
-       "      <td>1.524957</td>\n",
-       "      <td>-0.044401</td>\n",
-       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>1.205551</td>\n",
+       "      <td>-1.019412</td>\n",
+       "      <td>1.09427</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.772962</td>\n",
+       "      <td>-0.817418</td>\n",
+       "      <td>1.457044</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-0.564378</td>\n",
-       "      <td>0.90206</td>\n",
-       "      <td>0.530369</td>\n",
        "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
-       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>11</th>\n",
+       "      <th>12</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>1.243168</td>\n",
+       "      <td>-1.120408</td>\n",
+       "      <td>1.602153</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-0.898582</td>\n",
-       "      <td>0.798243</td>\n",
-       "      <td>-1.122096</td>\n",
        "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
-       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-1.26992</td>\n",
-       "      <td>-0.136103</td>\n",
-       "      <td>-1.122096</td>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <th>14</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-1.709725</td>\n",
+       "      <td>0.344046</td>\n",
+       "      <td>-0.792152</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>0.58677</td>\n",
-       "      <td>0.071529</td>\n",
-       "      <td>-1.122096</td>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <th>17</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.509647</td>\n",
+       "      <td>-0.918415</td>\n",
+       "      <td>1.021716</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>14</th>\n",
+       "      <th>18</th>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>1.167935</td>\n",
+       "      <td>1.404513</td>\n",
+       "      <td>-0.284269</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-1.826927</td>\n",
-       "      <td>-0.032287</td>\n",
-       "      <td>-1.122096</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>15</th>\n",
+       "      <th>19</th>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-1.26992</td>\n",
-       "      <td>-0.343736</td>\n",
-       "      <td>-1.122096</td>\n",
+       "      <td>-1.295944</td>\n",
+       "      <td>0.445043</td>\n",
+       "      <td>-1.662809</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>16</th>\n",
+       "      <th>20</th>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>0.3454</td>\n",
-       "      <td>0.071529</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <td>1.299593</td>\n",
+       "      <td>0.798532</td>\n",
+       "      <td>0.151059</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-0.768614</td>\n",
-       "      <td>0.382978</td>\n",
-       "      <td>-0.547325</td>\n",
-       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <th>21</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-0.675272</td>\n",
+       "      <td>0.344046</td>\n",
+       "      <td>-1.009817</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-1.121385</td>\n",
-       "      <td>0.486795</td>\n",
-       "      <td>-0.547325</td>\n",
-       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <th>22</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.26514</td>\n",
+       "      <td>-1.675891</td>\n",
+       "      <td>0.658942</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>20</th>\n",
+       "      <th>24</th>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>0.512502</td>\n",
-       "      <td>0.33107</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <td>1.43125</td>\n",
+       "      <td>1.556008</td>\n",
+       "      <td>-0.501934</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>21</th>\n",
+       "      <th>25</th>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>1.385146</td>\n",
-       "      <td>1.057784</td>\n",
-       "      <td>-0.547325</td>\n",
-       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <td>0.302756</td>\n",
+       "      <td>0.041055</td>\n",
+       "      <td>-0.574488</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-0.675779</td>\n",
-       "      <td>-0.032287</td>\n",
-       "      <td>-0.547325</td>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <th>26</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.302756</td>\n",
+       "      <td>-1.675891</td>\n",
+       "      <td>0.949161</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>1.069509</td>\n",
-       "      <td>0.538703</td>\n",
-       "      <td>-0.547325</td>\n",
+       "      <th>27</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.227523</td>\n",
+       "      <td>-1.776888</td>\n",
+       "      <td>0.658942</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
-       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>26</th>\n",
+       "      <th>28</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>1.318401</td>\n",
+       "      <td>-0.362932</td>\n",
+       "      <td>1.747263</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-0.43441</td>\n",
-       "      <td>0.694427</td>\n",
-       "      <td>0.027445</td>\n",
        "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
-       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>28</th>\n",
+       "      <th>29</th>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>2.202388</td>\n",
+       "      <td>1.303516</td>\n",
+       "      <td>0.441278</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>1.923586</td>\n",
-       "      <td>1.888314</td>\n",
-       "      <td>0.027445</td>\n",
-       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>30</th>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>1.292312</td>\n",
-       "      <td>0.694427</td>\n",
-       "      <td>0.027445</td>\n",
-       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <td>-0.919779</td>\n",
+       "      <td>1.959995</td>\n",
+       "      <td>-0.356824</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>31</th>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>1.036277</td>\n",
+       "      <td>-0.615424</td>\n",
+       "      <td>1.747263</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
-       "      <td>-1.994029</td>\n",
-       "      <td>-0.551368</td>\n",
-       "      <td>-1.62502</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <td>-0.223874</td>\n",
+       "      <td>0.19255</td>\n",
+       "      <td>-0.356824</td>\n",
        "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
@@ -1982,121 +1868,121 @@
       "text/plain": [
        "                    onehotencoded_island  standard_scaled_culmen_length_mm  \\\n",
        "penguin_id                                                                   \n",
-       "0           [{'index': 2, 'value': 1.0}]                         -1.344188   \n",
-       "1           [{'index': 2, 'value': 1.0}]                         -0.750047   \n",
-       "2           [{'index': 2, 'value': 1.0}]                         -0.545811   \n",
-       "4           [{'index': 2, 'value': 1.0}]                         -1.214219   \n",
-       "5           [{'index': 2, 'value': 1.0}]                         -0.118772   \n",
-       "6           [{'index': 2, 'value': 1.0}]                          0.568203   \n",
-       "7           [{'index': 2, 'value': 1.0}]                          1.236611   \n",
-       "9           [{'index': 2, 'value': 1.0}]                         -0.675779   \n",
-       "10          [{'index': 2, 'value': 1.0}]                         -0.564378   \n",
-       "11          [{'index': 2, 'value': 1.0}]                         -0.898582   \n",
-       "12          [{'index': 2, 'value': 1.0}]                          -1.26992   \n",
-       "13          [{'index': 2, 'value': 1.0}]                           0.58677   \n",
-       "14          [{'index': 2, 'value': 1.0}]                         -1.826927   \n",
-       "15          [{'index': 2, 'value': 1.0}]                          -1.26992   \n",
-       "16          [{'index': 2, 'value': 1.0}]                            0.3454   \n",
-       "18          [{'index': 2, 'value': 1.0}]                         -0.768614   \n",
-       "19          [{'index': 2, 'value': 1.0}]                         -1.121385   \n",
-       "20          [{'index': 2, 'value': 1.0}]                          0.512502   \n",
-       "21          [{'index': 2, 'value': 1.0}]                          1.385146   \n",
-       "22          [{'index': 2, 'value': 1.0}]                         -0.675779   \n",
-       "24          [{'index': 2, 'value': 1.0}]                          1.069509   \n",
-       "26          [{'index': 2, 'value': 1.0}]                          -0.43441   \n",
-       "28          [{'index': 2, 'value': 1.0}]                          1.923586   \n",
-       "30          [{'index': 2, 'value': 1.0}]                          1.292312   \n",
-       "31          [{'index': 2, 'value': 1.0}]                         -1.994029   \n",
+       "0           [{'index': 1, 'value': 1.0}]                         -0.750505   \n",
+       "2           [{'index': 1, 'value': 1.0}]                          0.622496   \n",
+       "3           [{'index': 2, 'value': 1.0}]                         -0.299107   \n",
+       "5           [{'index': 1, 'value': 1.0}]                          0.490839   \n",
+       "6           [{'index': 1, 'value': 1.0}]                         -0.524806   \n",
+       "7           [{'index': 1, 'value': 1.0}]                          0.208715   \n",
+       "9           [{'index': 1, 'value': 1.0}]                          1.205551   \n",
+       "10          [{'index': 1, 'value': 1.0}]                          0.772962   \n",
+       "12          [{'index': 1, 'value': 1.0}]                          1.243168   \n",
+       "14          [{'index': 1, 'value': 1.0}]                         -1.709725   \n",
+       "17          [{'index': 1, 'value': 1.0}]                          0.509647   \n",
+       "18          [{'index': 2, 'value': 1.0}]                          1.167935   \n",
+       "19          [{'index': 2, 'value': 1.0}]                         -1.295944   \n",
+       "20          [{'index': 2, 'value': 1.0}]                          1.299593   \n",
+       "21          [{'index': 1, 'value': 1.0}]                         -0.675272   \n",
+       "22          [{'index': 1, 'value': 1.0}]                           0.26514   \n",
+       "24          [{'index': 2, 'value': 1.0}]                           1.43125   \n",
+       "25          [{'index': 2, 'value': 1.0}]                          0.302756   \n",
+       "26          [{'index': 1, 'value': 1.0}]                          0.302756   \n",
+       "27          [{'index': 1, 'value': 1.0}]                          0.227523   \n",
+       "28          [{'index': 1, 'value': 1.0}]                          1.318401   \n",
+       "29          [{'index': 2, 'value': 1.0}]                          2.202388   \n",
+       "30          [{'index': 2, 'value': 1.0}]                         -0.919779   \n",
+       "31          [{'index': 1, 'value': 1.0}]                          1.036277   \n",
+       "32          [{'index': 3, 'value': 1.0}]                         -0.223874   \n",
        "\n",
        "            standard_scaled_culmen_depth_mm  \\\n",
        "penguin_id                                    \n",
-       "0                                  0.642519   \n",
-       "1                                  1.005876   \n",
-       "2                                   0.90206   \n",
-       "4                                 -0.188011   \n",
-       "5                                  0.694427   \n",
-       "6                                 -0.291828   \n",
-       "7                                  0.642519   \n",
-       "9                                  1.524957   \n",
-       "10                                  0.90206   \n",
-       "11                                 0.798243   \n",
-       "12                                -0.136103   \n",
-       "13                                 0.071529   \n",
-       "14                                -0.032287   \n",
-       "15                                -0.343736   \n",
-       "16                                 0.071529   \n",
-       "18                                 0.382978   \n",
-       "19                                 0.486795   \n",
-       "20                                  0.33107   \n",
-       "21                                 1.057784   \n",
-       "22                                -0.032287   \n",
-       "24                                 0.538703   \n",
-       "26                                 0.694427   \n",
-       "28                                 1.888314   \n",
-       "30                                 0.694427   \n",
-       "31                                -0.551368   \n",
+       "0                                   0.84903   \n",
+       "2                                 -1.322402   \n",
+       "3                                 -0.261935   \n",
+       "5                                 -0.968913   \n",
+       "6                                  1.959995   \n",
+       "7                                 -1.726389   \n",
+       "9                                 -1.019412   \n",
+       "10                                -0.817418   \n",
+       "12                                -1.120408   \n",
+       "14                                 0.344046   \n",
+       "17                                -0.918415   \n",
+       "18                                 1.404513   \n",
+       "19                                 0.445043   \n",
+       "20                                 0.798532   \n",
+       "21                                 0.344046   \n",
+       "22                                -1.675891   \n",
+       "24                                 1.556008   \n",
+       "25                                 0.041055   \n",
+       "26                                -1.675891   \n",
+       "27                                -1.776888   \n",
+       "28                                -0.362932   \n",
+       "29                                 1.303516   \n",
+       "30                                 1.959995   \n",
+       "31                                -0.615424   \n",
+       "32                                  0.19255   \n",
        "\n",
        "            standard_scaled_flipper_length_mm             onehotencoded_sex  \\\n",
        "penguin_id                                                                    \n",
-       "0                                   -1.193942  [{'index': 2, 'value': 1.0}]   \n",
-       "1                                   -1.193942  [{'index': 3, 'value': 1.0}]   \n",
-       "2                                   -1.193942  [{'index': 3, 'value': 1.0}]   \n",
-       "4                                   -0.619171  [{'index': 2, 'value': 1.0}]   \n",
-       "5                                   -0.619171  [{'index': 3, 'value': 1.0}]   \n",
-       "6                                   -0.619171  [{'index': 2, 'value': 1.0}]   \n",
-       "7                                   -0.044401  [{'index': 2, 'value': 1.0}]   \n",
-       "9                                   -0.044401  [{'index': 3, 'value': 1.0}]   \n",
-       "10                                   0.530369  [{'index': 3, 'value': 1.0}]   \n",
-       "11                                  -1.122096  [{'index': 3, 'value': 1.0}]   \n",
-       "12                                  -1.122096  [{'index': 2, 'value': 1.0}]   \n",
-       "13                                  -1.122096  [{'index': 2, 'value': 1.0}]   \n",
-       "14                                  -1.122096  [{'index': 2, 'value': 1.0}]   \n",
-       "15                                  -1.122096  [{'index': 2, 'value': 1.0}]   \n",
-       "16                                  -0.547325  [{'index': 2, 'value': 1.0}]   \n",
-       "18                                  -0.547325  [{'index': 3, 'value': 1.0}]   \n",
-       "19                                  -0.547325  [{'index': 3, 'value': 1.0}]   \n",
-       "20                                  -0.547325  [{'index': 2, 'value': 1.0}]   \n",
-       "21                                  -0.547325  [{'index': 3, 'value': 1.0}]   \n",
-       "22                                  -0.547325  [{'index': 2, 'value': 1.0}]   \n",
-       "24                                  -0.547325  [{'index': 3, 'value': 1.0}]   \n",
-       "26                                   0.027445  [{'index': 3, 'value': 1.0}]   \n",
-       "28                                   0.027445  [{'index': 3, 'value': 1.0}]   \n",
-       "30                                   0.027445  [{'index': 3, 'value': 1.0}]   \n",
-       "31                                   -1.62502  [{'index': 2, 'value': 1.0}]   \n",
+       "0                                   -0.937262  [{'index': 2, 'value': 1.0}]   \n",
+       "2                                    0.804051  [{'index': 1, 'value': 1.0}]   \n",
+       "3                                   -1.009817  [{'index': 1, 'value': 1.0}]   \n",
+       "5                                    1.311935  [{'index': 2, 'value': 1.0}]   \n",
+       "6                                   -0.429379  [{'index': 2, 'value': 1.0}]   \n",
+       "7                                    1.021716  [{'index': 1, 'value': 1.0}]   \n",
+       "9                                     1.09427  [{'index': 1, 'value': 1.0}]   \n",
+       "10                                   1.457044  [{'index': 2, 'value': 1.0}]   \n",
+       "12                                   1.602153  [{'index': 2, 'value': 1.0}]   \n",
+       "14                                  -0.792152  [{'index': 1, 'value': 1.0}]   \n",
+       "17                                   1.021716  [{'index': 2, 'value': 1.0}]   \n",
+       "18                                  -0.284269  [{'index': 2, 'value': 1.0}]   \n",
+       "19                                  -1.662809  [{'index': 2, 'value': 1.0}]   \n",
+       "20                                   0.151059  [{'index': 2, 'value': 1.0}]   \n",
+       "21                                  -1.009817  [{'index': 1, 'value': 1.0}]   \n",
+       "22                                   0.658942  [{'index': 1, 'value': 1.0}]   \n",
+       "24                                  -0.501934  [{'index': 2, 'value': 1.0}]   \n",
+       "25                                  -0.574488  [{'index': 1, 'value': 1.0}]   \n",
+       "26                                   0.949161  [{'index': 1, 'value': 1.0}]   \n",
+       "27                                   0.658942  [{'index': 1, 'value': 1.0}]   \n",
+       "28                                   1.747263  [{'index': 2, 'value': 1.0}]   \n",
+       "29                                   0.441278  [{'index': 2, 'value': 1.0}]   \n",
+       "30                                  -0.356824  [{'index': 2, 'value': 1.0}]   \n",
+       "31                                   1.747263  [{'index': 2, 'value': 1.0}]   \n",
+       "32                                  -0.356824  [{'index': 2, 'value': 1.0}]   \n",
        "\n",
        "                   onehotencoded_species  \n",
        "penguin_id                                \n",
        "0           [{'index': 1, 'value': 1.0}]  \n",
-       "1           [{'index': 1, 'value': 1.0}]  \n",
-       "2           [{'index': 1, 'value': 1.0}]  \n",
-       "4           [{'index': 1, 'value': 1.0}]  \n",
-       "5           [{'index': 1, 'value': 1.0}]  \n",
-       "6           [{'index': 2, 'value': 1.0}]  \n",
-       "7           [{'index': 2, 'value': 1.0}]  \n",
-       "9           [{'index': 1, 'value': 1.0}]  \n",
-       "10          [{'index': 1, 'value': 1.0}]  \n",
-       "11          [{'index': 1, 'value': 1.0}]  \n",
-       "12          [{'index': 1, 'value': 1.0}]  \n",
-       "13          [{'index': 2, 'value': 1.0}]  \n",
+       "2           [{'index': 3, 'value': 1.0}]  \n",
+       "3           [{'index': 2, 'value': 1.0}]  \n",
+       "5           [{'index': 3, 'value': 1.0}]  \n",
+       "6           [{'index': 1, 'value': 1.0}]  \n",
+       "7           [{'index': 3, 'value': 1.0}]  \n",
+       "9           [{'index': 3, 'value': 1.0}]  \n",
+       "10          [{'index': 3, 'value': 1.0}]  \n",
+       "12          [{'index': 3, 'value': 1.0}]  \n",
        "14          [{'index': 1, 'value': 1.0}]  \n",
-       "15          [{'index': 1, 'value': 1.0}]  \n",
-       "16          [{'index': 2, 'value': 1.0}]  \n",
-       "18          [{'index': 1, 'value': 1.0}]  \n",
+       "17          [{'index': 3, 'value': 1.0}]  \n",
+       "18          [{'index': 2, 'value': 1.0}]  \n",
        "19          [{'index': 1, 'value': 1.0}]  \n",
        "20          [{'index': 2, 'value': 1.0}]  \n",
-       "21          [{'index': 2, 'value': 1.0}]  \n",
-       "22          [{'index': 1, 'value': 1.0}]  \n",
+       "21          [{'index': 1, 'value': 1.0}]  \n",
+       "22          [{'index': 3, 'value': 1.0}]  \n",
        "24          [{'index': 2, 'value': 1.0}]  \n",
-       "26          [{'index': 1, 'value': 1.0}]  \n",
-       "28          [{'index': 2, 'value': 1.0}]  \n",
-       "30          [{'index': 2, 'value': 1.0}]  \n",
-       "31          [{'index': 1, 'value': 1.0}]  \n",
+       "25          [{'index': 2, 'value': 1.0}]  \n",
+       "26          [{'index': 3, 'value': 1.0}]  \n",
+       "27          [{'index': 3, 'value': 1.0}]  \n",
+       "28          [{'index': 3, 'value': 1.0}]  \n",
+       "29          [{'index': 2, 'value': 1.0}]  \n",
+       "30          [{'index': 1, 'value': 1.0}]  \n",
+       "31          [{'index': 3, 'value': 1.0}]  \n",
+       "32          [{'index': 1, 'value': 1.0}]  \n",
        "...\n",
        "\n",
        "[267 rows x 6 columns]"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2138,18 +2024,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5db4c5c80ba4417db151aa561dab5ee7",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 7d9c9f8b-6b4c-451f-ae3d-06fb7090d148 is DONE. 21.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7d9c9f8b-6b4c-451f-ae3d-06fb7090d148&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job be87ccfa-72ab-4858-9d4a-b2f5f8b2a5e6 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:be87ccfa-72ab-4858-9d4a-b2f5f8b2a5e6&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job ceced0cc-13a7-4b14-b42c-4d5f69e7e49a is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2157,13 +2053,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e6b05d83de0e496d9e47392762046fc5",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 2d651fac-11bf-42da-8c18-bd33207379ca is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:2d651fac-11bf-42da-8c18-bd33207379ca&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job a708b8df-6040-49b1-a6da-d2c0d162f247 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2171,13 +2065,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "24d616c24a844abfbfd77ebd9f28486a",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 58836ccc-242b-4574-bc48-4c269e74dbf1 is DONE. 5.7 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:58836ccc-242b-4574-bc48-4c269e74dbf1&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job e9b9cbb5-f6a4-4d85-ba78-1edae77dce94 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2185,13 +2077,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ce49b66c6fa0460aa3ee28746765b6ac",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 1bf531f0-0fde-489b-ab36-6040a2a12377 is DONE. 536 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1bf531f0-0fde-489b-ab36-6040a2a12377&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 6c0a41a7-a732-413a-b074-ba82f175eab8 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2199,13 +2089,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4a6010d73ca04ea9a133de99aa90da3c",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 4245f4e6-4d5b-404f-81d7-50f0553e2456 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:4245f4e6-4d5b-404f-81d7-50f0553e2456&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 2d08b79d-9c36-4db7-824a-332fdd02e9fc is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2213,13 +2101,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ce9cfdca964a4062a52ebaae9d13ae59",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job ed951699-c005-450e-a8b6-0916ec234e7f is DONE. 5.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ed951699-c005-450e-a8b6-0916ec234e7f&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 7fa0bf53-1022-45ee-b3ac-78fa5c155585 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2247,152 +2133,397 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>predicted_body_mass_g</th>\n",
+       "      <th>onehotencoded_island</th>\n",
+       "      <th>standard_scaled_culmen_length_mm</th>\n",
+       "      <th>standard_scaled_culmen_depth_mm</th>\n",
+       "      <th>standard_scaled_flipper_length_mm</th>\n",
+       "      <th>onehotencoded_sex</th>\n",
+       "      <th>onehotencoded_species</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>penguin_id</th>\n",
        "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>3394.118128</td>\n",
+       "      <th>1</th>\n",
+       "      <td>3781.402407</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <td>-0.938587</td>\n",
+       "      <td>0.748033</td>\n",
+       "      <td>-1.445145</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>4048.685642</td>\n",
+       "      <th>4</th>\n",
+       "      <td>4124.107944</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-0.16745</td>\n",
+       "      <td>0.899528</td>\n",
+       "      <td>-0.284269</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>3976.454093</td>\n",
+       "      <th>8</th>\n",
+       "      <td>4670.344196</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.453222</td>\n",
+       "      <td>-1.877885</td>\n",
+       "      <td>0.658942</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>3541.582194</td>\n",
+       "      <th>11</th>\n",
+       "      <td>3529.417214</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>-1.12667</td>\n",
+       "      <td>0.697535</td>\n",
+       "      <td>-0.792152</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>4032.844186</td>\n",
+       "      <th>13</th>\n",
+       "      <td>4014.101714</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-1.183094</td>\n",
+       "      <td>1.404513</td>\n",
+       "      <td>-0.792152</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>4118.351772</td>\n",
+       "      <th>15</th>\n",
+       "      <td>5212.41288</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.867003</td>\n",
+       "      <td>-0.766919</td>\n",
+       "      <td>0.513833</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>4087.767826</td>\n",
+       "      <th>16</th>\n",
+       "      <td>4163.595615</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <td>-1.784958</td>\n",
+       "      <td>1.959995</td>\n",
+       "      <td>-0.211715</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>34</th>\n",
-       "      <td>3183.755249</td>\n",
+       "      <th>23</th>\n",
+       "      <td>3392.453069</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>-0.355532</td>\n",
+       "      <td>0.647036</td>\n",
+       "      <td>-1.5177</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>35</th>\n",
-       "      <td>3418.802274</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>39</th>\n",
-       "      <td>3519.186468</td>\n",
+       "      <th>34</th>\n",
+       "      <td>4698.305397</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-0.600039</td>\n",
+       "      <td>-1.776888</td>\n",
+       "      <td>0.949161</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>51</th>\n",
-       "      <td>3398.135365</td>\n",
+       "      <th>36</th>\n",
+       "      <td>4828.226949</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-0.129833</td>\n",
+       "      <td>-1.423399</td>\n",
+       "      <td>1.23938</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>52</th>\n",
-       "      <td>3223.615957</td>\n",
+       "      <th>42</th>\n",
+       "      <td>3430.58866</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-1.615684</td>\n",
+       "      <td>-0.514427</td>\n",
+       "      <td>-0.429379</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>60</th>\n",
-       "      <td>3445.014718</td>\n",
+       "      <th>48</th>\n",
+       "      <td>5314.260221</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.415606</td>\n",
+       "      <td>-0.716421</td>\n",
+       "      <td>1.021716</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>61</th>\n",
-       "      <td>3505.638864</td>\n",
+       "      <td>5363.205372</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.396797</td>\n",
+       "      <td>-1.170907</td>\n",
+       "      <td>1.457044</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>64</th>\n",
-       "      <td>3515.905786</td>\n",
+       "      <td>4855.908314</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.434414</td>\n",
+       "      <td>-1.120408</td>\n",
+       "      <td>1.09427</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>65</th>\n",
-       "      <td>4028.363185</td>\n",
+       "      <td>3413.100524</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>-1.220711</td>\n",
+       "      <td>1.051024</td>\n",
+       "      <td>-1.445145</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>67</th>\n",
-       "      <td>4159.993943</td>\n",
+       "      <th>68</th>\n",
+       "      <td>3340.219002</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <td>-1.484026</td>\n",
+       "      <td>-0.009443</td>\n",
+       "      <td>-1.009817</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>83</th>\n",
-       "      <td>3348.16883</td>\n",
+       "      <th>70</th>\n",
+       "      <td>4228.73157</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>1.638141</td>\n",
+       "      <td>1.404513</td>\n",
+       "      <td>0.296168</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>85</th>\n",
-       "      <td>3485.050273</td>\n",
+       "      <th>72</th>\n",
+       "      <td>3811.538478</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>0.829387</td>\n",
+       "      <td>0.142052</td>\n",
+       "      <td>-0.719598</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>93</th>\n",
-       "      <td>4172.874548</td>\n",
+       "      <th>74</th>\n",
+       "      <td>4659.770763</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-0.242683</td>\n",
+       "      <td>-1.524396</td>\n",
+       "      <td>0.586387</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>104</th>\n",
-       "      <td>3299.302424</td>\n",
+       "      <th>77</th>\n",
+       "      <td>3453.388804</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>-1.277136</td>\n",
+       "      <td>-0.211437</td>\n",
+       "      <td>-0.647043</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>105</th>\n",
-       "      <td>3515.687917</td>\n",
+       "      <th>81</th>\n",
+       "      <td>4766.245033</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.208715</td>\n",
+       "      <td>-1.221405</td>\n",
+       "      <td>0.804051</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>91</th>\n",
+       "      <td>4057.807281</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>1.261976</td>\n",
+       "      <td>0.647036</td>\n",
+       "      <td>0.005949</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>108</th>\n",
-       "      <td>3405.224618</td>\n",
+       "      <th>96</th>\n",
+       "      <td>4739.827445</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.246331</td>\n",
+       "      <td>-1.322402</td>\n",
+       "      <td>0.731497</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>113</th>\n",
-       "      <td>4209.140425</td>\n",
+       "      <th>105</th>\n",
+       "      <td>3394.891976</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-1.803766</td>\n",
+       "      <td>0.445043</td>\n",
+       "      <td>-1.009817</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>130</th>\n",
-       "      <td>4197.905737</td>\n",
+       "      <th>111</th>\n",
+       "      <td>3201.493683</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-1.164286</td>\n",
+       "      <td>0.697535</td>\n",
+       "      <td>-2.098138</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>25 rows × 1 columns</p>\n",
-       "</div>[67 rows x 1 columns in total]"
-      ],
-      "text/plain": [
-       "            predicted_body_mass_g\n",
-       "penguin_id                       \n",
-       "3                     3394.118128\n",
-       "8                     4048.685642\n",
-       "17                    3976.454093\n",
-       "23                    3541.582194\n",
-       "25                    4032.844186\n",
-       "27                    4118.351772\n",
-       "29                    4087.767826\n",
-       "34                    3183.755249\n",
-       "35                    3418.802274\n",
-       "39                    3519.186468\n",
-       "51                    3398.135365\n",
-       "52                    3223.615957\n",
-       "60                    3445.014718\n",
-       "61                    3505.638864\n",
-       "64                    3515.905786\n",
-       "65                    4028.363185\n",
-       "67                    4159.993943\n",
-       "83                     3348.16883\n",
-       "85                    3485.050273\n",
-       "93                    4172.874548\n",
-       "104                   3299.302424\n",
-       "105                   3515.687917\n",
-       "108                   3405.224618\n",
-       "113                   4209.140425\n",
-       "130                   4197.905737\n",
-       "...\n",
+       "<p>25 rows × 7 columns</p>\n",
+       "</div>[67 rows x 7 columns in total]"
+      ],
+      "text/plain": [
+       "            predicted_body_mass_g          onehotencoded_island  \\\n",
+       "penguin_id                                                        \n",
+       "1                     3781.402407  [{'index': 3, 'value': 1.0}]   \n",
+       "4                     4124.107944  [{'index': 1, 'value': 1.0}]   \n",
+       "8                     4670.344196  [{'index': 1, 'value': 1.0}]   \n",
+       "11                    3529.417214  [{'index': 2, 'value': 1.0}]   \n",
+       "13                    4014.101714  [{'index': 1, 'value': 1.0}]   \n",
+       "15                     5212.41288  [{'index': 1, 'value': 1.0}]   \n",
+       "16                    4163.595615  [{'index': 3, 'value': 1.0}]   \n",
+       "23                    3392.453069  [{'index': 2, 'value': 1.0}]   \n",
+       "34                    4698.305397  [{'index': 1, 'value': 1.0}]   \n",
+       "36                    4828.226949  [{'index': 1, 'value': 1.0}]   \n",
+       "42                     3430.58866  [{'index': 1, 'value': 1.0}]   \n",
+       "48                    5314.260221  [{'index': 1, 'value': 1.0}]   \n",
+       "61                    5363.205372  [{'index': 1, 'value': 1.0}]   \n",
+       "64                    4855.908314  [{'index': 1, 'value': 1.0}]   \n",
+       "65                    3413.100524  [{'index': 2, 'value': 1.0}]   \n",
+       "68                    3340.219002  [{'index': 3, 'value': 1.0}]   \n",
+       "70                     4228.73157  [{'index': 2, 'value': 1.0}]   \n",
+       "72                    3811.538478  [{'index': 2, 'value': 1.0}]   \n",
+       "74                    4659.770763  [{'index': 1, 'value': 1.0}]   \n",
+       "77                    3453.388804  [{'index': 2, 'value': 1.0}]   \n",
+       "81                    4766.245033  [{'index': 1, 'value': 1.0}]   \n",
+       "91                    4057.807281  [{'index': 2, 'value': 1.0}]   \n",
+       "96                    4739.827445  [{'index': 1, 'value': 1.0}]   \n",
+       "105                   3394.891976  [{'index': 1, 'value': 1.0}]   \n",
+       "111                   3201.493683  [{'index': 1, 'value': 1.0}]   \n",
        "\n",
-       "[67 rows x 1 columns]"
+       "            standard_scaled_culmen_length_mm  standard_scaled_culmen_depth_mm  \\\n",
+       "penguin_id                                                                      \n",
+       "1                                  -0.938587                         0.748033   \n",
+       "4                                   -0.16745                         0.899528   \n",
+       "8                                   0.453222                        -1.877885   \n",
+       "11                                  -1.12667                         0.697535   \n",
+       "13                                 -1.183094                         1.404513   \n",
+       "15                                  0.867003                        -0.766919   \n",
+       "16                                 -1.784958                         1.959995   \n",
+       "23                                 -0.355532                         0.647036   \n",
+       "34                                 -0.600039                        -1.776888   \n",
+       "36                                 -0.129833                        -1.423399   \n",
+       "42                                 -1.615684                        -0.514427   \n",
+       "48                                  0.415606                        -0.716421   \n",
+       "61                                  0.396797                        -1.170907   \n",
+       "64                                  0.434414                        -1.120408   \n",
+       "65                                 -1.220711                         1.051024   \n",
+       "68                                 -1.484026                        -0.009443   \n",
+       "70                                  1.638141                         1.404513   \n",
+       "72                                  0.829387                         0.142052   \n",
+       "74                                 -0.242683                        -1.524396   \n",
+       "77                                 -1.277136                        -0.211437   \n",
+       "81                                  0.208715                        -1.221405   \n",
+       "91                                  1.261976                         0.647036   \n",
+       "96                                  0.246331                        -1.322402   \n",
+       "105                                -1.803766                         0.445043   \n",
+       "111                                -1.164286                         0.697535   \n",
+       "\n",
+       "            standard_scaled_flipper_length_mm             onehotencoded_sex  \\\n",
+       "penguin_id                                                                    \n",
+       "1                                   -1.445145  [{'index': 2, 'value': 1.0}]   \n",
+       "4                                   -0.284269  [{'index': 2, 'value': 1.0}]   \n",
+       "8                                    0.658942  [{'index': 1, 'value': 1.0}]   \n",
+       "11                                  -0.792152  [{'index': 1, 'value': 1.0}]   \n",
+       "13                                  -0.792152  [{'index': 2, 'value': 1.0}]   \n",
+       "15                                   0.513833  [{'index': 2, 'value': 1.0}]   \n",
+       "16                                  -0.211715  [{'index': 2, 'value': 1.0}]   \n",
+       "23                                    -1.5177  [{'index': 1, 'value': 1.0}]   \n",
+       "34                                   0.949161  [{'index': 1, 'value': 1.0}]   \n",
+       "36                                    1.23938  [{'index': 1, 'value': 1.0}]   \n",
+       "42                                  -0.429379  [{'index': 1, 'value': 1.0}]   \n",
+       "48                                   1.021716  [{'index': 2, 'value': 1.0}]   \n",
+       "61                                   1.457044  [{'index': 2, 'value': 1.0}]   \n",
+       "64                                    1.09427  [{'index': 1, 'value': 1.0}]   \n",
+       "65                                  -1.445145  [{'index': 1, 'value': 1.0}]   \n",
+       "68                                  -1.009817  [{'index': 1, 'value': 1.0}]   \n",
+       "70                                   0.296168  [{'index': 2, 'value': 1.0}]   \n",
+       "72                                  -0.719598  [{'index': 2, 'value': 1.0}]   \n",
+       "74                                   0.586387  [{'index': 1, 'value': 1.0}]   \n",
+       "77                                  -0.647043  [{'index': 1, 'value': 1.0}]   \n",
+       "81                                   0.804051  [{'index': 1, 'value': 1.0}]   \n",
+       "91                                   0.005949  [{'index': 2, 'value': 1.0}]   \n",
+       "96                                   0.731497  [{'index': 1, 'value': 1.0}]   \n",
+       "105                                 -1.009817  [{'index': 1, 'value': 1.0}]   \n",
+       "111                                 -2.098138  [{'index': 1, 'value': 1.0}]   \n",
+       "\n",
+       "                   onehotencoded_species  \n",
+       "penguin_id                                \n",
+       "1           [{'index': 1, 'value': 1.0}]  \n",
+       "4           [{'index': 1, 'value': 1.0}]  \n",
+       "8           [{'index': 3, 'value': 1.0}]  \n",
+       "11          [{'index': 1, 'value': 1.0}]  \n",
+       "13          [{'index': 1, 'value': 1.0}]  \n",
+       "15          [{'index': 3, 'value': 1.0}]  \n",
+       "16          [{'index': 1, 'value': 1.0}]  \n",
+       "23          [{'index': 1, 'value': 1.0}]  \n",
+       "34          [{'index': 3, 'value': 1.0}]  \n",
+       "36          [{'index': 3, 'value': 1.0}]  \n",
+       "42          [{'index': 1, 'value': 1.0}]  \n",
+       "48          [{'index': 3, 'value': 1.0}]  \n",
+       "61          [{'index': 3, 'value': 1.0}]  \n",
+       "64          [{'index': 3, 'value': 1.0}]  \n",
+       "65          [{'index': 1, 'value': 1.0}]  \n",
+       "68          [{'index': 1, 'value': 1.0}]  \n",
+       "70          [{'index': 2, 'value': 1.0}]  \n",
+       "72          [{'index': 2, 'value': 1.0}]  \n",
+       "74          [{'index': 3, 'value': 1.0}]  \n",
+       "77          [{'index': 1, 'value': 1.0}]  \n",
+       "81          [{'index': 3, 'value': 1.0}]  \n",
+       "91          [{'index': 2, 'value': 1.0}]  \n",
+       "96          [{'index': 3, 'value': 1.0}]  \n",
+       "105         [{'index': 1, 'value': 1.0}]  \n",
+       "111         [{'index': 1, 'value': 1.0}]  \n",
+       "\n",
+       "[67 rows x 7 columns]"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2423,18 +2554,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d7a16e04253a42b7a5ce247d8f63b656",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 027042f1-9a18-43d8-a378-ab9410e395b1 is DONE. 23.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:027042f1-9a18-43d8-a378-ab9410e395b1&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 6f19614c-82c0-4f8b-b74b-9d91a894efdd is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2442,13 +2571,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4a99ac15431e433595de1040872a4558",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 6c8484a0-a504-4e50-93d6-3d247c9ff558 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6c8484a0-a504-4e50-93d6-3d247c9ff558&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 51899e2d-f6ef-4e62-98b6-c11550f74f4b is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2456,13 +2583,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "90909b620e084f59b0f9da266257593f",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job e81ca2de-df2e-41ec-af86-14f8dcec1b44 is DONE. 6.2 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e81ca2de-df2e-41ec-af86-14f8dcec1b44&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 44d3fddc-74bc-4de0-a458-2c73b38f74fb is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2470,13 +2595,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2a9c2c05041a4fb691809bab5310bb05",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 3e6d413c-f8c4-4390-95eb-3a1f5bc59aed is DONE. 536 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:3e6d413c-f8c4-4390-95eb-3a1f5bc59aed&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 33584475-f02b-4c98-9a51-e29996f4f950 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2484,13 +2607,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6b0677c228d54b409c66e5dfa98d7e00",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job e448220d-0c50-45b7-bcbe-d1159b3d18ce is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e448220d-0c50-45b7-bcbe-d1159b3d18ce&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job df25ba49-280e-424d-a357-dde71a9b35dd is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2498,13 +2619,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "379ae6497fb34f969d21b2cd664e8bfa",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job e167a234-828d-4f05-8654-63cf97e50ba3 is DONE. 10.2 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e167a234-828d-4f05-8654-63cf97e50ba3&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 6f92a04e-af7e-41d6-9303-6366c1751294 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2532,152 +2651,452 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>CENTROID_ID</th>\n",
+       "      <th>NEAREST_CENTROIDS_DISTANCE</th>\n",
+       "      <th>onehotencoded_island</th>\n",
+       "      <th>standard_scaled_culmen_length_mm</th>\n",
+       "      <th>standard_scaled_culmen_depth_mm</th>\n",
+       "      <th>standard_scaled_flipper_length_mm</th>\n",
+       "      <th>onehotencoded_sex</th>\n",
+       "      <th>onehotencoded_species</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>penguin_id</th>\n",
        "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
+       "      <th>1</th>\n",
        "      <td>3</td>\n",
+       "      <td>[{'CENTROID_ID': 3, 'DISTANCE': 1.236380597035...</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <td>-0.938587</td>\n",
+       "      <td>0.748033</td>\n",
+       "      <td>-1.445145</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>17</th>\n",
+       "      <th>4</th>\n",
        "      <td>3</td>\n",
+       "      <td>[{'CENTROID_ID': 3, 'DISTANCE': 1.039497631856...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-0.16745</td>\n",
+       "      <td>0.899528</td>\n",
+       "      <td>-0.284269</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>23</th>\n",
+       "      <th>8</th>\n",
        "      <td>1</td>\n",
+       "      <td>[{'CENTROID_ID': 1, 'DISTANCE': 1.171040485975...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.453222</td>\n",
+       "      <td>-1.877885</td>\n",
+       "      <td>0.658942</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>3</td>\n",
+       "      <th>11</th>\n",
+       "      <td>2</td>\n",
+       "      <td>[{'CENTROID_ID': 2, 'DISTANCE': 0.969102754012...</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>-1.12667</td>\n",
+       "      <td>0.697535</td>\n",
+       "      <td>-0.792152</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>27</th>\n",
+       "      <th>13</th>\n",
        "      <td>3</td>\n",
+       "      <td>[{'CENTROID_ID': 3, 'DISTANCE': 1.113138945949...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-1.183094</td>\n",
+       "      <td>1.404513</td>\n",
+       "      <td>-0.792152</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>3</td>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "      <td>[{'CENTROID_ID': 1, 'DISTANCE': 1.070996026772...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.867003</td>\n",
+       "      <td>-0.766919</td>\n",
+       "      <td>0.513833</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>34</th>\n",
+       "      <th>16</th>\n",
        "      <td>3</td>\n",
+       "      <td>[{'CENTROID_ID': 3, 'DISTANCE': 1.780136190720...</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <td>-1.784958</td>\n",
+       "      <td>1.959995</td>\n",
+       "      <td>-0.211715</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>35</th>\n",
-       "      <td>1</td>\n",
+       "      <th>23</th>\n",
+       "      <td>2</td>\n",
+       "      <td>[{'CENTROID_ID': 2, 'DISTANCE': 1.382540667483...</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>-0.355532</td>\n",
+       "      <td>0.647036</td>\n",
+       "      <td>-1.5177</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>39</th>\n",
-       "      <td>3</td>\n",
+       "      <th>34</th>\n",
+       "      <td>1</td>\n",
+       "      <td>[{'CENTROID_ID': 1, 'DISTANCE': 1.598627908302...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-0.600039</td>\n",
+       "      <td>-1.776888</td>\n",
+       "      <td>0.949161</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>51</th>\n",
+       "      <th>36</th>\n",
        "      <td>1</td>\n",
+       "      <td>[{'CENTROID_ID': 1, 'DISTANCE': 1.095162305190...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-0.129833</td>\n",
+       "      <td>-1.423399</td>\n",
+       "      <td>1.23938</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>52</th>\n",
-       "      <td>3</td>\n",
+       "      <th>42</th>\n",
+       "      <td>2</td>\n",
+       "      <td>[{'CENTROID_ID': 2, 'DISTANCE': 1.275841743930...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-1.615684</td>\n",
+       "      <td>-0.514427</td>\n",
+       "      <td>-0.429379</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>60</th>\n",
-       "      <td>3</td>\n",
+       "      <th>48</th>\n",
+       "      <td>1</td>\n",
+       "      <td>[{'CENTROID_ID': 1, 'DISTANCE': 0.882209023196...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.415606</td>\n",
+       "      <td>-0.716421</td>\n",
+       "      <td>1.021716</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>61</th>\n",
-       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>[{'CENTROID_ID': 1, 'DISTANCE': 0.816202832282...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.396797</td>\n",
+       "      <td>-1.170907</td>\n",
+       "      <td>1.457044</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>64</th>\n",
        "      <td>1</td>\n",
+       "      <td>[{'CENTROID_ID': 1, 'DISTANCE': 0.735435721625...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.434414</td>\n",
+       "      <td>-1.120408</td>\n",
+       "      <td>1.09427</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>65</th>\n",
-       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>[{'CENTROID_ID': 2, 'DISTANCE': 1.292559869148...</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>-1.220711</td>\n",
+       "      <td>1.051024</td>\n",
+       "      <td>-1.445145</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>67</th>\n",
-       "      <td>3</td>\n",
+       "      <th>68</th>\n",
+       "      <td>2</td>\n",
+       "      <td>[{'CENTROID_ID': 2, 'DISTANCE': 0.876430138449...</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
+       "      <td>-1.484026</td>\n",
+       "      <td>-0.009443</td>\n",
+       "      <td>-1.009817</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>83</th>\n",
-       "      <td>3</td>\n",
+       "      <th>70</th>\n",
+       "      <td>4</td>\n",
+       "      <td>[{'CENTROID_ID': 4, 'DISTANCE': 1.314229913955...</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>1.638141</td>\n",
+       "      <td>1.404513</td>\n",
+       "      <td>0.296168</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>85</th>\n",
-       "      <td>1</td>\n",
+       "      <th>72</th>\n",
+       "      <td>4</td>\n",
+       "      <td>[{'CENTROID_ID': 4, 'DISTANCE': 0.938569518009...</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>0.829387</td>\n",
+       "      <td>0.142052</td>\n",
+       "      <td>-0.719598</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>93</th>\n",
+       "      <th>74</th>\n",
        "      <td>1</td>\n",
+       "      <td>[{'CENTROID_ID': 1, 'DISTANCE': 1.350320088546...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-0.242683</td>\n",
+       "      <td>-1.524396</td>\n",
+       "      <td>0.586387</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>104</th>\n",
-       "      <td>3</td>\n",
+       "      <th>77</th>\n",
+       "      <td>2</td>\n",
+       "      <td>[{'CENTROID_ID': 2, 'DISTANCE': 0.904806634663...</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>-1.277136</td>\n",
+       "      <td>-0.211437</td>\n",
+       "      <td>-0.647043</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>105</th>\n",
+       "      <th>81</th>\n",
        "      <td>1</td>\n",
+       "      <td>[{'CENTROID_ID': 1, 'DISTANCE': 0.919082578073...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.208715</td>\n",
+       "      <td>-1.221405</td>\n",
+       "      <td>0.804051</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>108</th>\n",
-       "      <td>3</td>\n",
+       "      <th>91</th>\n",
+       "      <td>4</td>\n",
+       "      <td>[{'CENTROID_ID': 4, 'DISTANCE': 0.760360038086...</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>1.261976</td>\n",
+       "      <td>0.647036</td>\n",
+       "      <td>0.005949</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 2, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>113</th>\n",
-       "      <td>3</td>\n",
+       "      <th>96</th>\n",
+       "      <td>1</td>\n",
+       "      <td>[{'CENTROID_ID': 1, 'DISTANCE': 0.950188657227...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>0.246331</td>\n",
+       "      <td>-1.322402</td>\n",
+       "      <td>0.731497</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 3, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>130</th>\n",
-       "      <td>1</td>\n",
+       "      <th>105</th>\n",
+       "      <td>2</td>\n",
+       "      <td>[{'CENTROID_ID': 2, 'DISTANCE': 1.101316467029...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-1.803766</td>\n",
+       "      <td>0.445043</td>\n",
+       "      <td>-1.009817</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>111</th>\n",
+       "      <td>2</td>\n",
+       "      <td>[{'CENTROID_ID': 2, 'DISTANCE': 1.549061068385...</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>-1.164286</td>\n",
+       "      <td>0.697535</td>\n",
+       "      <td>-2.098138</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
+       "      <td>[{'index': 1, 'value': 1.0}]</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>25 rows × 1 columns</p>\n",
-       "</div>[67 rows x 1 columns in total]"
+       "<p>25 rows × 8 columns</p>\n",
+       "</div>[67 rows x 8 columns in total]"
       ],
       "text/plain": [
-       "            CENTROID_ID\n",
-       "penguin_id             \n",
-       "3                     3\n",
-       "8                     3\n",
-       "17                    3\n",
-       "23                    1\n",
-       "25                    3\n",
-       "27                    3\n",
-       "29                    3\n",
-       "34                    3\n",
-       "35                    1\n",
-       "39                    3\n",
-       "51                    1\n",
-       "52                    3\n",
-       "60                    3\n",
-       "61                    3\n",
-       "64                    1\n",
-       "65                    1\n",
-       "67                    3\n",
-       "83                    3\n",
-       "85                    1\n",
-       "93                    1\n",
-       "104                   3\n",
-       "105                   1\n",
-       "108                   3\n",
-       "113                   3\n",
-       "130                   1\n",
-       "...\n",
+       "            CENTROID_ID                         NEAREST_CENTROIDS_DISTANCE  \\\n",
+       "penguin_id                                                                   \n",
+       "1                     3  [{'CENTROID_ID': 3, 'DISTANCE': 1.236380597035...   \n",
+       "4                     3  [{'CENTROID_ID': 3, 'DISTANCE': 1.039497631856...   \n",
+       "8                     1  [{'CENTROID_ID': 1, 'DISTANCE': 1.171040485975...   \n",
+       "11                    2  [{'CENTROID_ID': 2, 'DISTANCE': 0.969102754012...   \n",
+       "13                    3  [{'CENTROID_ID': 3, 'DISTANCE': 1.113138945949...   \n",
+       "15                    1  [{'CENTROID_ID': 1, 'DISTANCE': 1.070996026772...   \n",
+       "16                    3  [{'CENTROID_ID': 3, 'DISTANCE': 1.780136190720...   \n",
+       "23                    2  [{'CENTROID_ID': 2, 'DISTANCE': 1.382540667483...   \n",
+       "34                    1  [{'CENTROID_ID': 1, 'DISTANCE': 1.598627908302...   \n",
+       "36                    1  [{'CENTROID_ID': 1, 'DISTANCE': 1.095162305190...   \n",
+       "42                    2  [{'CENTROID_ID': 2, 'DISTANCE': 1.275841743930...   \n",
+       "48                    1  [{'CENTROID_ID': 1, 'DISTANCE': 0.882209023196...   \n",
+       "61                    1  [{'CENTROID_ID': 1, 'DISTANCE': 0.816202832282...   \n",
+       "64                    1  [{'CENTROID_ID': 1, 'DISTANCE': 0.735435721625...   \n",
+       "65                    2  [{'CENTROID_ID': 2, 'DISTANCE': 1.292559869148...   \n",
+       "68                    2  [{'CENTROID_ID': 2, 'DISTANCE': 0.876430138449...   \n",
+       "70                    4  [{'CENTROID_ID': 4, 'DISTANCE': 1.314229913955...   \n",
+       "72                    4  [{'CENTROID_ID': 4, 'DISTANCE': 0.938569518009...   \n",
+       "74                    1  [{'CENTROID_ID': 1, 'DISTANCE': 1.350320088546...   \n",
+       "77                    2  [{'CENTROID_ID': 2, 'DISTANCE': 0.904806634663...   \n",
+       "81                    1  [{'CENTROID_ID': 1, 'DISTANCE': 0.919082578073...   \n",
+       "91                    4  [{'CENTROID_ID': 4, 'DISTANCE': 0.760360038086...   \n",
+       "96                    1  [{'CENTROID_ID': 1, 'DISTANCE': 0.950188657227...   \n",
+       "105                   2  [{'CENTROID_ID': 2, 'DISTANCE': 1.101316467029...   \n",
+       "111                   2  [{'CENTROID_ID': 2, 'DISTANCE': 1.549061068385...   \n",
        "\n",
-       "[67 rows x 1 columns]"
+       "                    onehotencoded_island  standard_scaled_culmen_length_mm  \\\n",
+       "penguin_id                                                                   \n",
+       "1           [{'index': 3, 'value': 1.0}]                         -0.938587   \n",
+       "4           [{'index': 1, 'value': 1.0}]                          -0.16745   \n",
+       "8           [{'index': 1, 'value': 1.0}]                          0.453222   \n",
+       "11          [{'index': 2, 'value': 1.0}]                          -1.12667   \n",
+       "13          [{'index': 1, 'value': 1.0}]                         -1.183094   \n",
+       "15          [{'index': 1, 'value': 1.0}]                          0.867003   \n",
+       "16          [{'index': 3, 'value': 1.0}]                         -1.784958   \n",
+       "23          [{'index': 2, 'value': 1.0}]                         -0.355532   \n",
+       "34          [{'index': 1, 'value': 1.0}]                         -0.600039   \n",
+       "36          [{'index': 1, 'value': 1.0}]                         -0.129833   \n",
+       "42          [{'index': 1, 'value': 1.0}]                         -1.615684   \n",
+       "48          [{'index': 1, 'value': 1.0}]                          0.415606   \n",
+       "61          [{'index': 1, 'value': 1.0}]                          0.396797   \n",
+       "64          [{'index': 1, 'value': 1.0}]                          0.434414   \n",
+       "65          [{'index': 2, 'value': 1.0}]                         -1.220711   \n",
+       "68          [{'index': 3, 'value': 1.0}]                         -1.484026   \n",
+       "70          [{'index': 2, 'value': 1.0}]                          1.638141   \n",
+       "72          [{'index': 2, 'value': 1.0}]                          0.829387   \n",
+       "74          [{'index': 1, 'value': 1.0}]                         -0.242683   \n",
+       "77          [{'index': 2, 'value': 1.0}]                         -1.277136   \n",
+       "81          [{'index': 1, 'value': 1.0}]                          0.208715   \n",
+       "91          [{'index': 2, 'value': 1.0}]                          1.261976   \n",
+       "96          [{'index': 1, 'value': 1.0}]                          0.246331   \n",
+       "105         [{'index': 1, 'value': 1.0}]                         -1.803766   \n",
+       "111         [{'index': 1, 'value': 1.0}]                         -1.164286   \n",
+       "\n",
+       "            standard_scaled_culmen_depth_mm  \\\n",
+       "penguin_id                                    \n",
+       "1                                  0.748033   \n",
+       "4                                  0.899528   \n",
+       "8                                 -1.877885   \n",
+       "11                                 0.697535   \n",
+       "13                                 1.404513   \n",
+       "15                                -0.766919   \n",
+       "16                                 1.959995   \n",
+       "23                                 0.647036   \n",
+       "34                                -1.776888   \n",
+       "36                                -1.423399   \n",
+       "42                                -0.514427   \n",
+       "48                                -0.716421   \n",
+       "61                                -1.170907   \n",
+       "64                                -1.120408   \n",
+       "65                                 1.051024   \n",
+       "68                                -0.009443   \n",
+       "70                                 1.404513   \n",
+       "72                                 0.142052   \n",
+       "74                                -1.524396   \n",
+       "77                                -0.211437   \n",
+       "81                                -1.221405   \n",
+       "91                                 0.647036   \n",
+       "96                                -1.322402   \n",
+       "105                                0.445043   \n",
+       "111                                0.697535   \n",
+       "\n",
+       "            standard_scaled_flipper_length_mm             onehotencoded_sex  \\\n",
+       "penguin_id                                                                    \n",
+       "1                                   -1.445145  [{'index': 2, 'value': 1.0}]   \n",
+       "4                                   -0.284269  [{'index': 2, 'value': 1.0}]   \n",
+       "8                                    0.658942  [{'index': 1, 'value': 1.0}]   \n",
+       "11                                  -0.792152  [{'index': 1, 'value': 1.0}]   \n",
+       "13                                  -0.792152  [{'index': 2, 'value': 1.0}]   \n",
+       "15                                   0.513833  [{'index': 2, 'value': 1.0}]   \n",
+       "16                                  -0.211715  [{'index': 2, 'value': 1.0}]   \n",
+       "23                                    -1.5177  [{'index': 1, 'value': 1.0}]   \n",
+       "34                                   0.949161  [{'index': 1, 'value': 1.0}]   \n",
+       "36                                    1.23938  [{'index': 1, 'value': 1.0}]   \n",
+       "42                                  -0.429379  [{'index': 1, 'value': 1.0}]   \n",
+       "48                                   1.021716  [{'index': 2, 'value': 1.0}]   \n",
+       "61                                   1.457044  [{'index': 2, 'value': 1.0}]   \n",
+       "64                                    1.09427  [{'index': 1, 'value': 1.0}]   \n",
+       "65                                  -1.445145  [{'index': 1, 'value': 1.0}]   \n",
+       "68                                  -1.009817  [{'index': 1, 'value': 1.0}]   \n",
+       "70                                   0.296168  [{'index': 2, 'value': 1.0}]   \n",
+       "72                                  -0.719598  [{'index': 2, 'value': 1.0}]   \n",
+       "74                                   0.586387  [{'index': 1, 'value': 1.0}]   \n",
+       "77                                  -0.647043  [{'index': 1, 'value': 1.0}]   \n",
+       "81                                   0.804051  [{'index': 1, 'value': 1.0}]   \n",
+       "91                                   0.005949  [{'index': 2, 'value': 1.0}]   \n",
+       "96                                   0.731497  [{'index': 1, 'value': 1.0}]   \n",
+       "105                                 -1.009817  [{'index': 1, 'value': 1.0}]   \n",
+       "111                                 -2.098138  [{'index': 1, 'value': 1.0}]   \n",
+       "\n",
+       "                   onehotencoded_species  \n",
+       "penguin_id                                \n",
+       "1           [{'index': 1, 'value': 1.0}]  \n",
+       "4           [{'index': 1, 'value': 1.0}]  \n",
+       "8           [{'index': 3, 'value': 1.0}]  \n",
+       "11          [{'index': 1, 'value': 1.0}]  \n",
+       "13          [{'index': 1, 'value': 1.0}]  \n",
+       "15          [{'index': 3, 'value': 1.0}]  \n",
+       "16          [{'index': 1, 'value': 1.0}]  \n",
+       "23          [{'index': 1, 'value': 1.0}]  \n",
+       "34          [{'index': 3, 'value': 1.0}]  \n",
+       "36          [{'index': 3, 'value': 1.0}]  \n",
+       "42          [{'index': 1, 'value': 1.0}]  \n",
+       "48          [{'index': 3, 'value': 1.0}]  \n",
+       "61          [{'index': 3, 'value': 1.0}]  \n",
+       "64          [{'index': 3, 'value': 1.0}]  \n",
+       "65          [{'index': 1, 'value': 1.0}]  \n",
+       "68          [{'index': 1, 'value': 1.0}]  \n",
+       "70          [{'index': 2, 'value': 1.0}]  \n",
+       "72          [{'index': 2, 'value': 1.0}]  \n",
+       "74          [{'index': 3, 'value': 1.0}]  \n",
+       "77          [{'index': 1, 'value': 1.0}]  \n",
+       "81          [{'index': 3, 'value': 1.0}]  \n",
+       "91          [{'index': 2, 'value': 1.0}]  \n",
+       "96          [{'index': 3, 'value': 1.0}]  \n",
+       "105         [{'index': 1, 'value': 1.0}]  \n",
+       "111         [{'index': 1, 'value': 1.0}]  \n",
+       "\n",
+       "[67 rows x 8 columns]"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2704,7 +3123,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -2721,7 +3140,7 @@
        "                ('linreg', LinearRegression())])"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2748,18 +3167,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "887bf58cebf14bdba95db828390fd33d",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job b11be0d8-e6f1-41cb-8cb2-25a38e7ef311 is DONE. 24.7 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:b11be0d8-e6f1-41cb-8cb2-25a38e7ef311&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job ed42cbb3-3d25-47ca-96c5-71a84e426a8c is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2767,13 +3184,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "24357055792a4eaaa60997fea0f76921",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job f32ea25c-be39-4726-a8f5-604ae83849a6 is DONE. 8.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:f32ea25c-be39-4726-a8f5-604ae83849a6&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 3fc74930-03b9-4a49-8ed3-c3edc4dd6e51 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2781,13 +3196,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bba878d6d3e345f1a29aea50f7101e8f",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 86e29b78-76f5-4937-8bde-407b99af04a2 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:86e29b78-76f5-4937-8bde-407b99af04a2&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 38a4ce3b-5c2a-4d44-b826-f24529d6500b is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2795,13 +3208,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4bc2c53aeb7d4a8280f9fbbe373f4b55",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job ca819734-0d41-4d9e-b743-09edae8c7fee is DONE. 29.6 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ca819734-0d41-4d9e-b743-09edae8c7fee&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job ecad776d-77c8-4d94-8186-d5571b512b62 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2809,13 +3220,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f4f695cb0a224102b6e26adeb1827981",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 49bb5bed-cc84-47e0-9a90-08ab01e00548 is DONE. 536 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:49bb5bed-cc84-47e0-9a90-08ab01e00548&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job c9bfc58f-ce2c-47a9-bbc7-b10d9de9b5a6 is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2823,13 +3232,23 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cb1df595006d485288a1060299970e5e",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 1e40a085-2289-47dd-afd8-820413186b9f is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1e40a085-2289-47dd-afd8-820413186b9f&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 8fd8036e-3753-433d-975b-c7b42406f648 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 60319296-a480-4f51-b7ad-190ac6de963a is DONE. 6.2 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:60319296-a480-4f51-b7ad-190ac6de963a&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -2857,152 +3276,369 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>predicted_body_mass_g</th>\n",
+       "      <th>island</th>\n",
+       "      <th>culmen_length_mm</th>\n",
+       "      <th>culmen_depth_mm</th>\n",
+       "      <th>flipper_length_mm</th>\n",
+       "      <th>sex</th>\n",
+       "      <th>species</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>penguin_id</th>\n",
        "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>3394.116212</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>4048.683645</td>\n",
+       "      <th>1</th>\n",
+       "      <td>3781.396682</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>39.1</td>\n",
+       "      <td>18.7</td>\n",
+       "      <td>181.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>3976.452358</td>\n",
+       "      <th>4</th>\n",
+       "      <td>4124.102574</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>43.2</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>197.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>3541.580346</td>\n",
+       "      <th>8</th>\n",
+       "      <td>4670.338389</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.5</td>\n",
+       "      <td>13.5</td>\n",
+       "      <td>210.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>4032.842027</td>\n",
+       "      <th>11</th>\n",
+       "      <td>3529.411644</td>\n",
+       "      <td>Dream</td>\n",
+       "      <td>38.1</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>4118.34983</td>\n",
+       "      <th>13</th>\n",
+       "      <td>4014.09632</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>37.8</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>4087.765797</td>\n",
+       "      <th>15</th>\n",
+       "      <td>5212.407319</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>48.7</td>\n",
+       "      <td>15.7</td>\n",
+       "      <td>208.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>34</th>\n",
-       "      <td>3183.75379</td>\n",
+       "      <th>16</th>\n",
+       "      <td>4163.590502</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>198.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>35</th>\n",
-       "      <td>3418.800633</td>\n",
+       "      <th>23</th>\n",
+       "      <td>3392.44731</td>\n",
+       "      <td>Dream</td>\n",
+       "      <td>42.2</td>\n",
+       "      <td>18.5</td>\n",
+       "      <td>180.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>39</th>\n",
-       "      <td>3519.18471</td>\n",
+       "      <th>34</th>\n",
+       "      <td>4698.299674</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.9</td>\n",
+       "      <td>13.7</td>\n",
+       "      <td>214.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>51</th>\n",
-       "      <td>3398.133564</td>\n",
+       "      <th>36</th>\n",
+       "      <td>4828.221398</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>43.4</td>\n",
+       "      <td>14.4</td>\n",
+       "      <td>218.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>52</th>\n",
-       "      <td>3223.614107</td>\n",
+       "      <th>42</th>\n",
+       "      <td>3430.582874</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>35.5</td>\n",
+       "      <td>16.2</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>60</th>\n",
-       "      <td>3445.012713</td>\n",
+       "      <th>48</th>\n",
+       "      <td>5314.254798</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.3</td>\n",
+       "      <td>15.8</td>\n",
+       "      <td>215.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>61</th>\n",
-       "      <td>3505.637004</td>\n",
+       "      <td>5363.19995</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.2</td>\n",
+       "      <td>14.9</td>\n",
+       "      <td>221.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>64</th>\n",
-       "      <td>3515.903779</td>\n",
+       "      <td>4855.90281</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.4</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>216.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>65</th>\n",
-       "      <td>4028.361259</td>\n",
+       "      <td>3413.094869</td>\n",
+       "      <td>Dream</td>\n",
+       "      <td>37.6</td>\n",
+       "      <td>19.3</td>\n",
+       "      <td>181.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>67</th>\n",
-       "      <td>4159.991956</td>\n",
+       "      <th>68</th>\n",
+       "      <td>3340.213193</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>36.2</td>\n",
+       "      <td>17.2</td>\n",
+       "      <td>187.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>83</th>\n",
-       "      <td>3348.167212</td>\n",
+       "      <th>70</th>\n",
+       "      <td>4228.726508</td>\n",
+       "      <td>Dream</td>\n",
+       "      <td>52.8</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>205.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>85</th>\n",
-       "      <td>3485.048557</td>\n",
+       "      <th>72</th>\n",
+       "      <td>3811.532821</td>\n",
+       "      <td>Dream</td>\n",
+       "      <td>48.5</td>\n",
+       "      <td>17.5</td>\n",
+       "      <td>191.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>93</th>\n",
-       "      <td>4172.872284</td>\n",
+       "      <th>74</th>\n",
+       "      <td>4659.765013</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>42.8</td>\n",
+       "      <td>14.2</td>\n",
+       "      <td>209.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>104</th>\n",
-       "      <td>3299.300454</td>\n",
+       "      <th>77</th>\n",
+       "      <td>3453.383042</td>\n",
+       "      <td>Dream</td>\n",
+       "      <td>37.3</td>\n",
+       "      <td>16.8</td>\n",
+       "      <td>192.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>105</th>\n",
-       "      <td>3515.68617</td>\n",
+       "      <th>81</th>\n",
+       "      <td>4766.239424</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.2</td>\n",
+       "      <td>14.8</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>108</th>\n",
-       "      <td>3405.222757</td>\n",
+       "      <th>91</th>\n",
+       "      <td>4057.801947</td>\n",
+       "      <td>Dream</td>\n",
+       "      <td>50.8</td>\n",
+       "      <td>18.5</td>\n",
+       "      <td>201.0</td>\n",
+       "      <td>MALE</td>\n",
+       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>96</th>\n",
+       "      <td>4739.821792</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.4</td>\n",
+       "      <td>14.6</td>\n",
+       "      <td>211.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>113</th>\n",
-       "      <td>4209.13832</td>\n",
+       "      <th>105</th>\n",
+       "      <td>3394.886275</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>34.5</td>\n",
+       "      <td>18.1</td>\n",
+       "      <td>187.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>130</th>\n",
-       "      <td>4197.90382</td>\n",
+       "      <th>111</th>\n",
+       "      <td>3201.48777</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>37.9</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>172.0</td>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>25 rows × 1 columns</p>\n",
-       "</div>[67 rows x 1 columns in total]"
-      ],
-      "text/plain": [
-       "            predicted_body_mass_g\n",
-       "penguin_id                       \n",
-       "3                     3394.116212\n",
-       "8                     4048.683645\n",
-       "17                    3976.452358\n",
-       "23                    3541.580346\n",
-       "25                    4032.842027\n",
-       "27                     4118.34983\n",
-       "29                    4087.765797\n",
-       "34                     3183.75379\n",
-       "35                    3418.800633\n",
-       "39                     3519.18471\n",
-       "51                    3398.133564\n",
-       "52                    3223.614107\n",
-       "60                    3445.012713\n",
-       "61                    3505.637004\n",
-       "64                    3515.903779\n",
-       "65                    4028.361259\n",
-       "67                    4159.991956\n",
-       "83                    3348.167212\n",
-       "85                    3485.048557\n",
-       "93                    4172.872284\n",
-       "104                   3299.300454\n",
-       "105                    3515.68617\n",
-       "108                   3405.222757\n",
-       "113                    4209.13832\n",
-       "130                    4197.90382\n",
-       "...\n",
+       "<p>25 rows × 7 columns</p>\n",
+       "</div>[67 rows x 7 columns in total]"
+      ],
+      "text/plain": [
+       "            predicted_body_mass_g     island  culmen_length_mm  \\\n",
+       "penguin_id                                                       \n",
+       "1                     3781.396682  Torgersen              39.1   \n",
+       "4                     4124.102574     Biscoe              43.2   \n",
+       "8                     4670.338389     Biscoe              46.5   \n",
+       "11                    3529.411644      Dream              38.1   \n",
+       "13                     4014.09632     Biscoe              37.8   \n",
+       "15                    5212.407319     Biscoe              48.7   \n",
+       "16                    4163.590502  Torgersen              34.6   \n",
+       "23                     3392.44731      Dream              42.2   \n",
+       "34                    4698.299674     Biscoe              40.9   \n",
+       "36                    4828.221398     Biscoe              43.4   \n",
+       "42                    3430.582874     Biscoe              35.5   \n",
+       "48                    5314.254798     Biscoe              46.3   \n",
+       "61                     5363.19995     Biscoe              46.2   \n",
+       "64                     4855.90281     Biscoe              46.4   \n",
+       "65                    3413.094869      Dream              37.6   \n",
+       "68                    3340.213193  Torgersen              36.2   \n",
+       "70                    4228.726508      Dream              52.8   \n",
+       "72                    3811.532821      Dream              48.5   \n",
+       "74                    4659.765013     Biscoe              42.8   \n",
+       "77                    3453.383042      Dream              37.3   \n",
+       "81                    4766.239424     Biscoe              45.2   \n",
+       "91                    4057.801947      Dream              50.8   \n",
+       "96                    4739.821792     Biscoe              45.4   \n",
+       "105                   3394.886275     Biscoe              34.5   \n",
+       "111                    3201.48777     Biscoe              37.9   \n",
        "\n",
-       "[67 rows x 1 columns]"
+       "            culmen_depth_mm  flipper_length_mm     sex  \\\n",
+       "penguin_id                                               \n",
+       "1                      18.7              181.0    MALE   \n",
+       "4                      19.0              197.0    MALE   \n",
+       "8                      13.5              210.0  FEMALE   \n",
+       "11                     18.6              190.0  FEMALE   \n",
+       "13                     20.0              190.0    MALE   \n",
+       "15                     15.7              208.0    MALE   \n",
+       "16                     21.1              198.0    MALE   \n",
+       "23                     18.5              180.0  FEMALE   \n",
+       "34                     13.7              214.0  FEMALE   \n",
+       "36                     14.4              218.0  FEMALE   \n",
+       "42                     16.2              195.0  FEMALE   \n",
+       "48                     15.8              215.0    MALE   \n",
+       "61                     14.9              221.0    MALE   \n",
+       "64                     15.0              216.0  FEMALE   \n",
+       "65                     19.3              181.0  FEMALE   \n",
+       "68                     17.2              187.0  FEMALE   \n",
+       "70                     20.0              205.0    MALE   \n",
+       "72                     17.5              191.0    MALE   \n",
+       "74                     14.2              209.0  FEMALE   \n",
+       "77                     16.8              192.0  FEMALE   \n",
+       "81                     14.8              212.0  FEMALE   \n",
+       "91                     18.5              201.0    MALE   \n",
+       "96                     14.6              211.0  FEMALE   \n",
+       "105                    18.1              187.0  FEMALE   \n",
+       "111                    18.6              172.0  FEMALE   \n",
+       "\n",
+       "                                              species  \n",
+       "penguin_id                                             \n",
+       "1                 Adelie Penguin (Pygoscelis adeliae)  \n",
+       "4                 Adelie Penguin (Pygoscelis adeliae)  \n",
+       "8                   Gentoo penguin (Pygoscelis papua)  \n",
+       "11                Adelie Penguin (Pygoscelis adeliae)  \n",
+       "13                Adelie Penguin (Pygoscelis adeliae)  \n",
+       "15                  Gentoo penguin (Pygoscelis papua)  \n",
+       "16                Adelie Penguin (Pygoscelis adeliae)  \n",
+       "23                Adelie Penguin (Pygoscelis adeliae)  \n",
+       "34                  Gentoo penguin (Pygoscelis papua)  \n",
+       "36                  Gentoo penguin (Pygoscelis papua)  \n",
+       "42                Adelie Penguin (Pygoscelis adeliae)  \n",
+       "48                  Gentoo penguin (Pygoscelis papua)  \n",
+       "61                  Gentoo penguin (Pygoscelis papua)  \n",
+       "64                  Gentoo penguin (Pygoscelis papua)  \n",
+       "65                Adelie Penguin (Pygoscelis adeliae)  \n",
+       "68                Adelie Penguin (Pygoscelis adeliae)  \n",
+       "70          Chinstrap penguin (Pygoscelis antarctica)  \n",
+       "72          Chinstrap penguin (Pygoscelis antarctica)  \n",
+       "74                  Gentoo penguin (Pygoscelis papua)  \n",
+       "77                Adelie Penguin (Pygoscelis adeliae)  \n",
+       "81                  Gentoo penguin (Pygoscelis papua)  \n",
+       "91          Chinstrap penguin (Pygoscelis antarctica)  \n",
+       "96                  Gentoo penguin (Pygoscelis papua)  \n",
+       "105               Adelie Penguin (Pygoscelis adeliae)  \n",
+       "111               Adelie Penguin (Pygoscelis adeliae)  \n",
+       "\n",
+       "[67 rows x 7 columns]"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3034,60 +3670,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2d32081be31f44abb8de67e2209d76cd",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 2a043039-670f-4eb8-9cf0-765ee6ed7de6 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "441323a674ce4c01b039420125fb34ba",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 7a3da9b3-e6d5-453a-8178-9cb311e83113 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d177be3c94b043389a461787c026929a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 5fcfd48b-c26f-487e-8387-c662b59ea424 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "54d4f250e1014de181931e5ffe4f5525",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job c02fb597-8d5a-42ca-9185-03b59c5ef2f9 is DONE. 29.6 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:c02fb597-8d5a-42ca-9185-03b59c5ef2f9&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job bc8b2042-1e13-441c-9531-300ed5badb7a is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -3095,13 +3687,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4588ae10de634460bf4026ddd9076351",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 7f1f565b-0f73-4a4e-b33f-8484fa260838 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7f1f565b-0f73-4a4e-b33f-8484fa260838&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 5e867182-dd7a-4aff-87a8-f7596e900fd5 is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -3109,13 +3699,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8209cf8286a545ebb7b6ef9d002a43a1",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job d4b9d4a6-d75e-46e1-b092-ab58e8aef890 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d4b9d4a6-d75e-46e1-b092-ab58e8aef890&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job d4cdb016-8f1e-4960-8ed7-4524ccc5a8a8 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -3153,12 +3741,12 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>229.48269</td>\n",
-       "      <td>82962.794947</td>\n",
-       "      <td>0.004248</td>\n",
-       "      <td>206.728384</td>\n",
-       "      <td>0.88633</td>\n",
-       "      <td>0.892953</td>\n",
+       "      <td>216.444357</td>\n",
+       "      <td>72639.698707</td>\n",
+       "      <td>0.00463</td>\n",
+       "      <td>170.588356</td>\n",
+       "      <td>0.896396</td>\n",
+       "      <td>0.900547</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -3167,15 +3755,15 @@
       ],
       "text/plain": [
        "   mean_absolute_error  mean_squared_error  mean_squared_log_error  \\\n",
-       "0            229.48269        82962.794947                0.004248   \n",
+       "0           216.444357        72639.698707                 0.00463   \n",
        "\n",
        "   median_absolute_error  r2_score  explained_variance  \n",
-       "0             206.728384   0.88633            0.892953  \n",
+       "0             170.588356  0.896396            0.900547  \n",
        "\n",
        "[1 rows x 6 columns]"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3195,18 +3783,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f32692d89f00406499f4ea5aa55268fb",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 73448ee8-698b-435f-b11e-6fe2de3bcd8d is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:73448ee8-698b-435f-b11e-6fe2de3bcd8d&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job e57383ef-f043-458b-96c6-893e7c5b0c00 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -3214,13 +3800,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "72e5f23a99de4a818c8493b8b4f3854d",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job e002f59d-a03c-4ec9-a85a-93adbfd7bd17 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e002f59d-a03c-4ec9-a85a-93adbfd7bd17&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 1a9db485-477b-43e2-94eb-dea7dc21d45d is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -3228,13 +3812,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9d5333a91b504dd9be51c997715530ab",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 4ab1febc-fb55-473a-b295-69e4329cc5f0 is DONE. 30.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:4ab1febc-fb55-473a-b295-69e4329cc5f0&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 4570a563-b8e0-4308-b8cb-c4731491d4f7 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -3243,10 +3825,10 @@
     {
      "data": {
       "text/plain": [
-       "0.8863300923278365"
+       "0.8963962044533755"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3254,7 +3836,7 @@
    "source": [
     "from bigframes.ml.metrics import r2_score\n",
     "\n",
-    "r2_score(y_test, predicted_y_test)"
+    "r2_score(y_test, predicted_y_test[\"predicted_body_mass_g\"])"
    ]
   },
   {
@@ -3274,57 +3856,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 15,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "fbc4a70f31d4465b974a7f7c9cc97731",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Copy job c2413be4-6972-4e36-8234-5063628b6d71 is RUNNING. <a target=\"_blank\" href=\"https://console…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "Pipeline(steps=[('transform',\n",
-       "                 ColumnTransformer(transformers=[('ont_hot_encoder',\n",
-       "                                                  OneHotEncoder(max_categories=1000001,\n",
-       "                                                                min_frequency=0),\n",
-       "                                                  'island'),\n",
-       "                                                 ('standard_scaler',\n",
-       "                                                  StandardScaler(),\n",
-       "                                                  'culmen_length_mm'),\n",
-       "                                                 ('standard_scaler',\n",
-       "                                                  StandardScaler(),\n",
-       "                                                  'culmen_depth_mm'),\n",
-       "                                                 ('standard_scaler',\n",
-       "                                                  StandardScaler(),\n",
-       "                                                  'flipper_length_mm'),\n",
-       "                                                 ('ont_hot_encoder',\n",
-       "                                                  OneHotEncoder(max_categories=1000001,\n",
-       "                                                                min_frequency=0),\n",
-       "                                                  'sex'),\n",
-       "                                                 ('ont_hot_encoder',\n",
-       "                                                  OneHotEncoder(max_categories=1000001,\n",
-       "                                                                min_frequency=0),\n",
-       "                                                  'species')])),\n",
-       "                ('estimator',\n",
-       "                 LinearRegression(optimize_strategy='NORMAL_EQUATION'))])"
-      ]
-     },
-     "execution_count": 33,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# # Replace with a path where you have permission to save a model\n",
     "# model_name = \"bigframes-dev.bqml_tutorial.penguins_model\"\n",
@@ -3334,43 +3868,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 16,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Pipeline(steps=[('transform',\n",
-       "                 ColumnTransformer(transformers=[('ont_hot_encoder',\n",
-       "                                                  OneHotEncoder(max_categories=1000001,\n",
-       "                                                                min_frequency=0),\n",
-       "                                                  'island'),\n",
-       "                                                 ('standard_scaler',\n",
-       "                                                  StandardScaler(),\n",
-       "                                                  'culmen_length_mm'),\n",
-       "                                                 ('standard_scaler',\n",
-       "                                                  StandardScaler(),\n",
-       "                                                  'culmen_depth_mm'),\n",
-       "                                                 ('standard_scaler',\n",
-       "                                                  StandardScaler(),\n",
-       "                                                  'flipper_length_mm'),\n",
-       "                                                 ('ont_hot_encoder',\n",
-       "                                                  OneHotEncoder(max_categories=1000001,\n",
-       "                                                                min_frequency=0),\n",
-       "                                                  'sex'),\n",
-       "                                                 ('ont_hot_encoder',\n",
-       "                                                  OneHotEncoder(max_categories=1000001,\n",
-       "                                                                min_frequency=0),\n",
-       "                                                  'species')])),\n",
-       "                ('estimator',\n",
-       "                 LinearRegression(optimize_strategy='NORMAL_EQUATION'))])"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# # WARNING - until b/281709360 is fixed & pipeline is updated, pipelines will load as models,\n",
     "# # and details of their transform steps will be lost (the loaded model will behave the same)\n",
@@ -3394,7 +3894,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.9"
   },
   "orig_nbformat": 4,
   "vscode": {
diff --git a/notebooks/regression/sklearn_linear_regression.ipynb b/notebooks/regression/sklearn_linear_regression.ipynb
index beb77ef092..ec14d15cdf 100644
--- a/notebooks/regression/sklearn_linear_regression.ipynb
+++ b/notebooks/regression/sklearn_linear_regression.ipynb
@@ -20,46 +20,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "62b29650872b4c438d0eefb825fcae32",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job d47c23df-1830-4451-9016-7747c1420abd is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1ef2cf509710492eb37abd1f1feda149",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 7ada8963-c6e6-4d46-b42d-8a8cc6d6ead2 is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2a2f06590494449982273038aa18699f",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job f201b84b-5506-4038-92e6-b4a82318df8f is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:f201b84b-5506-4038-92e6-b4a82318df8f&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 31a5b656-000e-4238-9fd9-c6e644ca298f is DONE. 31.7 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -67,13 +37,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0f25faa156584cc59dda9b0e60f72534",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 12e0f983-695e-4903-8ff1-2f353d7e8cba is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:12e0f983-695e-4903-8ff1-2f353d7e8cba&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job d8eed0ca-7ce9-4ed8-a592-e16af9f9db8d is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -113,250 +81,250 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>36.6</td>\n",
-       "      <td>18.4</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>3475.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.1</td>\n",
+       "      <td>18.9</td>\n",
+       "      <td>188.0</td>\n",
+       "      <td>4300.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.8</td>\n",
-       "      <td>19.1</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>4650.0</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>39.1</td>\n",
+       "      <td>18.7</td>\n",
+       "      <td>181.0</td>\n",
+       "      <td>3750.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.9</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>3900.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>47.4</td>\n",
+       "      <td>14.6</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>4725.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>46.5</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>3500.0</td>\n",
+       "      <td>42.5</td>\n",
+       "      <td>16.7</td>\n",
+       "      <td>187.0</td>\n",
+       "      <td>3350.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.3</td>\n",
-       "      <td>16.8</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>43.2</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>197.0</td>\n",
+       "      <td>4775.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>43.2</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>4100.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.7</td>\n",
+       "      <td>15.3</td>\n",
+       "      <td>219.0</td>\n",
+       "      <td>5200.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>46.9</td>\n",
-       "      <td>16.6</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>2700.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>41.3</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>4400.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>50.5</td>\n",
-       "      <td>18.4</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.2</td>\n",
+       "      <td>13.8</td>\n",
+       "      <td>215.0</td>\n",
+       "      <td>4750.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>49.5</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3800.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.5</td>\n",
+       "      <td>13.5</td>\n",
+       "      <td>210.0</td>\n",
+       "      <td>4550.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>20.1</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3975.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>50.5</td>\n",
+       "      <td>15.2</td>\n",
+       "      <td>216.0</td>\n",
+       "      <td>5000.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.8</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>208.0</td>\n",
-       "      <td>4300.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>48.2</td>\n",
+       "      <td>15.6</td>\n",
+       "      <td>221.0</td>\n",
+       "      <td>5100.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>39.0</td>\n",
-       "      <td>18.7</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3650.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>38.1</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3700.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>12</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>16.9</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>50.7</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>223.0</td>\n",
+       "      <td>5550.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>13</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>17.3</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3700.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>37.8</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>4250.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>14</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>17.9</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3450.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>15</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>16.5</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3400.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>48.7</td>\n",
+       "      <td>15.7</td>\n",
+       "      <td>208.0</td>\n",
+       "      <td>5350.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>16</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>45.7</td>\n",
-       "      <td>17.3</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3600.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>198.0</td>\n",
+       "      <td>4400.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>17</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>50.6</td>\n",
-       "      <td>19.4</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3800.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.8</td>\n",
+       "      <td>15.4</td>\n",
+       "      <td>215.0</td>\n",
+       "      <td>5150.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>18</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>39.7</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>4250.0</td>\n",
+       "      <td>50.3</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>197.0</td>\n",
+       "      <td>3300.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>19</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>37.8</td>\n",
+       "      <td>37.2</td>\n",
        "      <td>18.1</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3750.0</td>\n",
+       "      <td>178.0</td>\n",
+       "      <td>3900.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>20</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>46.6</td>\n",
-       "      <td>17.8</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3800.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>18.8</td>\n",
+       "      <td>203.0</td>\n",
+       "      <td>4100.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>21</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>51.3</td>\n",
-       "      <td>19.2</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3650.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.5</td>\n",
+       "      <td>17.9</td>\n",
+       "      <td>187.0</td>\n",
+       "      <td>3200.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>22</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.5</td>\n",
+       "      <td>13.9</td>\n",
+       "      <td>210.0</td>\n",
+       "      <td>4200.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>23</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>36.8</td>\n",
+       "      <td>42.2</td>\n",
        "      <td>18.5</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3500.0</td>\n",
+       "      <td>180.0</td>\n",
+       "      <td>3550.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>24</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>49.6</td>\n",
-       "      <td>18.2</td>\n",
-       "      <td>193.0</td>\n",
+       "      <td>51.7</td>\n",
+       "      <td>20.3</td>\n",
+       "      <td>194.0</td>\n",
        "      <td>3775.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
@@ -366,65 +334,65 @@
        "</div>[344 rows x 7 columns in total]"
       ],
       "text/plain": [
-       "                                      species island  culmen_length_mm  \\\n",
-       "0         Adelie Penguin (Pygoscelis adeliae)  Dream              36.6   \n",
-       "1         Adelie Penguin (Pygoscelis adeliae)  Dream              39.8   \n",
-       "2         Adelie Penguin (Pygoscelis adeliae)  Dream              40.9   \n",
-       "3   Chinstrap penguin (Pygoscelis antarctica)  Dream              46.5   \n",
-       "4         Adelie Penguin (Pygoscelis adeliae)  Dream              37.3   \n",
-       "5         Adelie Penguin (Pygoscelis adeliae)  Dream              43.2   \n",
-       "6   Chinstrap penguin (Pygoscelis antarctica)  Dream              46.9   \n",
-       "7   Chinstrap penguin (Pygoscelis antarctica)  Dream              50.5   \n",
-       "8   Chinstrap penguin (Pygoscelis antarctica)  Dream              49.5   \n",
-       "9         Adelie Penguin (Pygoscelis adeliae)  Dream              40.2   \n",
-       "10        Adelie Penguin (Pygoscelis adeliae)  Dream              40.8   \n",
-       "11        Adelie Penguin (Pygoscelis adeliae)  Dream              39.0   \n",
-       "12        Adelie Penguin (Pygoscelis adeliae)  Dream              37.0   \n",
-       "13  Chinstrap penguin (Pygoscelis antarctica)  Dream              47.0   \n",
-       "14        Adelie Penguin (Pygoscelis adeliae)  Dream              34.0   \n",
-       "15        Adelie Penguin (Pygoscelis adeliae)  Dream              37.0   \n",
-       "16  Chinstrap penguin (Pygoscelis antarctica)  Dream              45.7   \n",
-       "17  Chinstrap penguin (Pygoscelis antarctica)  Dream              50.6   \n",
-       "18        Adelie Penguin (Pygoscelis adeliae)  Dream              39.7   \n",
-       "19        Adelie Penguin (Pygoscelis adeliae)  Dream              37.8   \n",
-       "20  Chinstrap penguin (Pygoscelis antarctica)  Dream              46.6   \n",
-       "21  Chinstrap penguin (Pygoscelis antarctica)  Dream              51.3   \n",
-       "22        Adelie Penguin (Pygoscelis adeliae)  Dream              40.2   \n",
-       "23        Adelie Penguin (Pygoscelis adeliae)  Dream              36.8   \n",
-       "24  Chinstrap penguin (Pygoscelis antarctica)  Dream              49.6   \n",
+       "                                      species     island  culmen_length_mm  \\\n",
+       "0         Adelie Penguin (Pygoscelis adeliae)     Biscoe              40.1   \n",
+       "1         Adelie Penguin (Pygoscelis adeliae)  Torgersen              39.1   \n",
+       "2           Gentoo penguin (Pygoscelis papua)     Biscoe              47.4   \n",
+       "3   Chinstrap penguin (Pygoscelis antarctica)      Dream              42.5   \n",
+       "4         Adelie Penguin (Pygoscelis adeliae)     Biscoe              43.2   \n",
+       "5           Gentoo penguin (Pygoscelis papua)     Biscoe              46.7   \n",
+       "6         Adelie Penguin (Pygoscelis adeliae)     Biscoe              41.3   \n",
+       "7           Gentoo penguin (Pygoscelis papua)     Biscoe              45.2   \n",
+       "8           Gentoo penguin (Pygoscelis papua)     Biscoe              46.5   \n",
+       "9           Gentoo penguin (Pygoscelis papua)     Biscoe              50.5   \n",
+       "10          Gentoo penguin (Pygoscelis papua)     Biscoe              48.2   \n",
+       "11        Adelie Penguin (Pygoscelis adeliae)      Dream              38.1   \n",
+       "12          Gentoo penguin (Pygoscelis papua)     Biscoe              50.7   \n",
+       "13        Adelie Penguin (Pygoscelis adeliae)     Biscoe              37.8   \n",
+       "14        Adelie Penguin (Pygoscelis adeliae)     Biscoe              35.0   \n",
+       "15          Gentoo penguin (Pygoscelis papua)     Biscoe              48.7   \n",
+       "16        Adelie Penguin (Pygoscelis adeliae)  Torgersen              34.6   \n",
+       "17          Gentoo penguin (Pygoscelis papua)     Biscoe              46.8   \n",
+       "18  Chinstrap penguin (Pygoscelis antarctica)      Dream              50.3   \n",
+       "19        Adelie Penguin (Pygoscelis adeliae)      Dream              37.2   \n",
+       "20  Chinstrap penguin (Pygoscelis antarctica)      Dream              51.0   \n",
+       "21        Adelie Penguin (Pygoscelis adeliae)     Biscoe              40.5   \n",
+       "22          Gentoo penguin (Pygoscelis papua)     Biscoe              45.5   \n",
+       "23        Adelie Penguin (Pygoscelis adeliae)      Dream              42.2   \n",
+       "24  Chinstrap penguin (Pygoscelis antarctica)      Dream              51.7   \n",
        "\n",
        "    culmen_depth_mm  flipper_length_mm  body_mass_g     sex  \n",
-       "0              18.4              184.0       3475.0  FEMALE  \n",
-       "1              19.1              184.0       4650.0    MALE  \n",
-       "2              18.9              184.0       3900.0    MALE  \n",
-       "3              17.9              192.0       3500.0  FEMALE  \n",
-       "4              16.8              192.0       3000.0  FEMALE  \n",
-       "5              18.5              192.0       4100.0    MALE  \n",
-       "6              16.6              192.0       2700.0  FEMALE  \n",
-       "7              18.4              200.0       3400.0  FEMALE  \n",
-       "8              19.0              200.0       3800.0    MALE  \n",
-       "9              20.1              200.0       3975.0    MALE  \n",
-       "10             18.9              208.0       4300.0    MALE  \n",
-       "11             18.7              185.0       3650.0    MALE  \n",
-       "12             16.9              185.0       3000.0  FEMALE  \n",
-       "13             17.3              185.0       3700.0  FEMALE  \n",
-       "14             17.1              185.0       3400.0  FEMALE  \n",
-       "15             16.5              185.0       3400.0  FEMALE  \n",
-       "16             17.3              193.0       3600.0  FEMALE  \n",
-       "17             19.4              193.0       3800.0    MALE  \n",
-       "18             17.9              193.0       4250.0    MALE  \n",
-       "19             18.1              193.0       3750.0    MALE  \n",
-       "20             17.8              193.0       3800.0  FEMALE  \n",
-       "21             19.2              193.0       3650.0    MALE  \n",
-       "22             17.1              193.0       3400.0  FEMALE  \n",
-       "23             18.5              193.0       3500.0  FEMALE  \n",
-       "24             18.2              193.0       3775.0    MALE  \n",
+       "0              18.9              188.0       4300.0    MALE  \n",
+       "1              18.7              181.0       3750.0    MALE  \n",
+       "2              14.6              212.0       4725.0  FEMALE  \n",
+       "3              16.7              187.0       3350.0  FEMALE  \n",
+       "4              19.0              197.0       4775.0    MALE  \n",
+       "5              15.3              219.0       5200.0    MALE  \n",
+       "6              21.1              195.0       4400.0    MALE  \n",
+       "7              13.8              215.0       4750.0  FEMALE  \n",
+       "8              13.5              210.0       4550.0  FEMALE  \n",
+       "9              15.2              216.0       5000.0  FEMALE  \n",
+       "10             15.6              221.0       5100.0    MALE  \n",
+       "11             18.6              190.0       3700.0  FEMALE  \n",
+       "12             15.0              223.0       5550.0    MALE  \n",
+       "13             20.0              190.0       4250.0    MALE  \n",
+       "14             17.9              190.0       3450.0  FEMALE  \n",
+       "15             15.7              208.0       5350.0    MALE  \n",
+       "16             21.1              198.0       4400.0    MALE  \n",
+       "17             15.4              215.0       5150.0    MALE  \n",
+       "18             20.0              197.0       3300.0    MALE  \n",
+       "19             18.1              178.0       3900.0    MALE  \n",
+       "20             18.8              203.0       4100.0    MALE  \n",
+       "21             17.9              187.0       3200.0  FEMALE  \n",
+       "22             13.9              210.0       4200.0  FEMALE  \n",
+       "23             18.5              180.0       3550.0  FEMALE  \n",
+       "24             20.3              194.0       3775.0    MALE  \n",
        "...\n",
        "\n",
        "[344 rows x 7 columns]"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 1,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -450,32 +418,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a9ad907fa6e64a61a9dce420bc7d2beb",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 3537a10a-641a-4d40-ae47-449c641b1bc5 is DONE. 28.9 kB processed. <a target=\"_blank\" href…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "94360b343eed419c884383a39e0d563d",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 81305962-a96a-4c86-949c-471b2ae7c86d is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:81305962-a96a-4c86-949c-471b2ae7c86d&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 34101409-7c65-4045-ad52-c6ba24dc9cbb is DONE. 31.7 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -483,13 +435,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "514e68d5b0b4452a9ccdff947848541a",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 2af0b0d6-c11b-499e-8d25-a2c628b2853b is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:2af0b0d6-c11b-499e-8d25-a2c628b2853b&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 74190ac2-21a2-47b0-bc21-ef5373565f17 is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -527,294 +477,294 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>36.6</td>\n",
-       "      <td>18.4</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>3475.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.8</td>\n",
-       "      <td>19.1</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>4650.0</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.1</td>\n",
+       "      <td>18.9</td>\n",
+       "      <td>188.0</td>\n",
+       "      <td>4300.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.9</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>3900.0</td>\n",
+       "      <th>1</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>39.1</td>\n",
+       "      <td>18.7</td>\n",
+       "      <td>181.0</td>\n",
+       "      <td>3750.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.3</td>\n",
-       "      <td>16.8</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>Dream</td>\n",
+       "      <td>Biscoe</td>\n",
        "      <td>43.2</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>4100.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>20.1</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3975.0</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>197.0</td>\n",
+       "      <td>4775.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.8</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>208.0</td>\n",
-       "      <td>4300.0</td>\n",
+       "      <th>6</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>41.3</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>4400.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>39.0</td>\n",
-       "      <td>18.7</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3650.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>16.9</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3000.0</td>\n",
+       "      <td>38.1</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3700.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3400.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <th>13</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>37.8</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>4250.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>16.5</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <th>14</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>17.9</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3450.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.7</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>4250.0</td>\n",
+       "      <th>16</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>198.0</td>\n",
+       "      <td>4400.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>19</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>37.8</td>\n",
+       "      <td>37.2</td>\n",
        "      <td>18.1</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3750.0</td>\n",
+       "      <td>178.0</td>\n",
+       "      <td>3900.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <th>21</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.5</td>\n",
+       "      <td>17.9</td>\n",
+       "      <td>187.0</td>\n",
+       "      <td>3200.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>23</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>36.8</td>\n",
+       "      <td>42.2</td>\n",
        "      <td>18.5</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3500.0</td>\n",
+       "      <td>180.0</td>\n",
+       "      <td>3550.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>26</th>\n",
+       "      <th>30</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>41.5</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>201.0</td>\n",
-       "      <td>4000.0</td>\n",
+       "      <td>39.2</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>196.0</td>\n",
+       "      <td>4150.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>31</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>33.1</td>\n",
-       "      <td>16.1</td>\n",
-       "      <td>178.0</td>\n",
-       "      <td>2900.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <th>32</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>42.9</td>\n",
+       "      <td>17.6</td>\n",
+       "      <td>196.0</td>\n",
+       "      <td>4700.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>32</th>\n",
+       "      <th>38</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>37.2</td>\n",
-       "      <td>18.1</td>\n",
-       "      <td>178.0</td>\n",
+       "      <td>41.1</td>\n",
+       "      <td>17.5</td>\n",
+       "      <td>190.0</td>\n",
        "      <td>3900.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>33</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.5</td>\n",
-       "      <td>16.7</td>\n",
-       "      <td>178.0</td>\n",
-       "      <td>3250.0</td>\n",
+       "      <th>40</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>38.6</td>\n",
+       "      <td>21.2</td>\n",
+       "      <td>191.0</td>\n",
+       "      <td>3800.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>35.5</td>\n",
+       "      <td>16.2</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>3350.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>35</th>\n",
+       "      <th>44</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>18.5</td>\n",
+       "      <td>39.2</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>4250.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>45</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>35.2</td>\n",
+       "      <td>15.9</td>\n",
        "      <td>186.0</td>\n",
-       "      <td>3100.0</td>\n",
+       "      <td>3050.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>36</th>\n",
+       "      <th>46</th>\n",
        "      <td>Dream</td>\n",
+       "      <td>43.2</td>\n",
+       "      <td>18.5</td>\n",
+       "      <td>192.0</td>\n",
+       "      <td>4100.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>49</th>\n",
+       "      <td>Biscoe</td>\n",
        "      <td>39.6</td>\n",
-       "      <td>18.1</td>\n",
+       "      <td>17.7</td>\n",
        "      <td>186.0</td>\n",
-       "      <td>4450.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>3500.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>38</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>41.3</td>\n",
+       "      <th>53</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.6</td>\n",
        "      <td>20.3</td>\n",
-       "      <td>194.0</td>\n",
-       "      <td>3550.0</td>\n",
+       "      <td>191.0</td>\n",
+       "      <td>4600.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>41</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>35.7</td>\n",
-       "      <td>18.0</td>\n",
-       "      <td>202.0</td>\n",
-       "      <td>3550.0</td>\n",
+       "      <th>58</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>40.9</td>\n",
+       "      <td>16.8</td>\n",
+       "      <td>191.0</td>\n",
+       "      <td>3700.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>51</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>38.1</td>\n",
-       "      <td>17.6</td>\n",
-       "      <td>187.0</td>\n",
-       "      <td>3425.0</td>\n",
+       "      <th>60</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>40.3</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>3250.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>53</th>\n",
+       "      <th>62</th>\n",
        "      <td>Dream</td>\n",
        "      <td>36.0</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>187.0</td>\n",
-       "      <td>3700.0</td>\n",
+       "      <td>18.5</td>\n",
+       "      <td>186.0</td>\n",
+       "      <td>3100.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
+       "    <tr>\n",
+       "      <th>63</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>39.3</td>\n",
+       "      <td>20.6</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3650.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>25 rows × 6 columns</p>\n",
        "</div>[146 rows x 6 columns in total]"
       ],
       "text/plain": [
-       "   island  culmen_length_mm  culmen_depth_mm  flipper_length_mm  body_mass_g  \\\n",
-       "0   Dream              36.6             18.4              184.0       3475.0   \n",
-       "1   Dream              39.8             19.1              184.0       4650.0   \n",
-       "2   Dream              40.9             18.9              184.0       3900.0   \n",
-       "4   Dream              37.3             16.8              192.0       3000.0   \n",
-       "5   Dream              43.2             18.5              192.0       4100.0   \n",
-       "9   Dream              40.2             20.1              200.0       3975.0   \n",
-       "10  Dream              40.8             18.9              208.0       4300.0   \n",
-       "11  Dream              39.0             18.7              185.0       3650.0   \n",
-       "12  Dream              37.0             16.9              185.0       3000.0   \n",
-       "14  Dream              34.0             17.1              185.0       3400.0   \n",
-       "15  Dream              37.0             16.5              185.0       3400.0   \n",
-       "18  Dream              39.7             17.9              193.0       4250.0   \n",
-       "19  Dream              37.8             18.1              193.0       3750.0   \n",
-       "22  Dream              40.2             17.1              193.0       3400.0   \n",
-       "23  Dream              36.8             18.5              193.0       3500.0   \n",
-       "26  Dream              41.5             18.5              201.0       4000.0   \n",
-       "31  Dream              33.1             16.1              178.0       2900.0   \n",
-       "32  Dream              37.2             18.1              178.0       3900.0   \n",
-       "33  Dream              39.5             16.7              178.0       3250.0   \n",
-       "35  Dream              36.0             18.5              186.0       3100.0   \n",
-       "36  Dream              39.6             18.1              186.0       4450.0   \n",
-       "38  Dream              41.3             20.3              194.0       3550.0   \n",
-       "41  Dream              35.7             18.0              202.0       3550.0   \n",
-       "51  Dream              38.1             17.6              187.0       3425.0   \n",
-       "53  Dream              36.0             17.1              187.0       3700.0   \n",
+       "       island  culmen_length_mm  culmen_depth_mm  flipper_length_mm  \\\n",
+       "0      Biscoe              40.1             18.9              188.0   \n",
+       "1   Torgersen              39.1             18.7              181.0   \n",
+       "4      Biscoe              43.2             19.0              197.0   \n",
+       "6      Biscoe              41.3             21.1              195.0   \n",
+       "11      Dream              38.1             18.6              190.0   \n",
+       "13     Biscoe              37.8             20.0              190.0   \n",
+       "14     Biscoe              35.0             17.9              190.0   \n",
+       "16  Torgersen              34.6             21.1              198.0   \n",
+       "19      Dream              37.2             18.1              178.0   \n",
+       "21     Biscoe              40.5             17.9              187.0   \n",
+       "23      Dream              42.2             18.5              180.0   \n",
+       "30      Dream              39.2             21.1              196.0   \n",
+       "32  Torgersen              42.9             17.6              196.0   \n",
+       "38      Dream              41.1             17.5              190.0   \n",
+       "40  Torgersen              38.6             21.2              191.0   \n",
+       "42     Biscoe              35.5             16.2              195.0   \n",
+       "44      Dream              39.2             18.6              190.0   \n",
+       "45  Torgersen              35.2             15.9              186.0   \n",
+       "46      Dream              43.2             18.5              192.0   \n",
+       "49     Biscoe              39.6             17.7              186.0   \n",
+       "53     Biscoe              45.6             20.3              191.0   \n",
+       "58  Torgersen              40.9             16.8              191.0   \n",
+       "60  Torgersen              40.3             18.0              195.0   \n",
+       "62      Dream              36.0             18.5              186.0   \n",
+       "63  Torgersen              39.3             20.6              190.0   \n",
        "\n",
-       "       sex  \n",
-       "0   FEMALE  \n",
-       "1     MALE  \n",
-       "2     MALE  \n",
-       "4   FEMALE  \n",
-       "5     MALE  \n",
-       "9     MALE  \n",
-       "10    MALE  \n",
-       "11    MALE  \n",
-       "12  FEMALE  \n",
-       "14  FEMALE  \n",
-       "15  FEMALE  \n",
-       "18    MALE  \n",
-       "19    MALE  \n",
-       "22  FEMALE  \n",
-       "23  FEMALE  \n",
-       "26    MALE  \n",
-       "31  FEMALE  \n",
-       "32    MALE  \n",
-       "33  FEMALE  \n",
-       "35  FEMALE  \n",
-       "36    MALE  \n",
-       "38    MALE  \n",
-       "41  FEMALE  \n",
-       "51  FEMALE  \n",
-       "53  FEMALE  \n",
+       "    body_mass_g     sex  \n",
+       "0        4300.0    MALE  \n",
+       "1        3750.0    MALE  \n",
+       "4        4775.0    MALE  \n",
+       "6        4400.0    MALE  \n",
+       "11       3700.0  FEMALE  \n",
+       "13       4250.0    MALE  \n",
+       "14       3450.0  FEMALE  \n",
+       "16       4400.0    MALE  \n",
+       "19       3900.0    MALE  \n",
+       "21       3200.0  FEMALE  \n",
+       "23       3550.0  FEMALE  \n",
+       "30       4150.0    MALE  \n",
+       "32       4700.0    MALE  \n",
+       "38       3900.0    MALE  \n",
+       "40       3800.0    MALE  \n",
+       "42       3350.0  FEMALE  \n",
+       "44       4250.0    MALE  \n",
+       "45       3050.0  FEMALE  \n",
+       "46       4100.0    MALE  \n",
+       "49       3500.0  FEMALE  \n",
+       "53       4600.0    MALE  \n",
+       "58       3700.0  FEMALE  \n",
+       "60       3250.0  FEMALE  \n",
+       "62       3100.0  FEMALE  \n",
+       "63       3650.0    MALE  \n",
        "...\n",
        "\n",
        "[146 rows x 6 columns]"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -843,18 +793,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "81f9aa34c7234bd88b6b7a4bc77d4b4e",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 0808457b-a0df-4a37-b7a5-8885f4a4588c is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:0808457b-a0df-4a37-b7a5-8885f4a4588c&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 288f0daa-a51e-45b4-86bf-d054467c4a99 is DONE. 28.9 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -881,7 +829,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -897,7 +845,7 @@
        "                ('linreg', LinearRegression(fit_intercept=False))])"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -936,9 +884,63 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job e9bfa6a5-a53f-4d8b-ae8c-cc8cd55d0947 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e9bfa6a5-a53f-4d8b-ae8c-cc8cd55d0947&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job d8d553cf-3d36-49aa-b18b-9a05576a1fb0 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d8d553cf-3d36-49aa-b18b-9a05576a1fb0&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 75ef0083-9a4f-4ffb-a6c6-d82974a1659f is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:75ef0083-9a4f-4ffb-a6c6-d82974a1659f&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Pipeline(steps=[('preproc',\n",
+       "                 ColumnTransformer(transformers=[('onehot', OneHotEncoder(),\n",
+       "                                                  ['island', 'species', 'sex']),\n",
+       "                                                 ('scaler', StandardScaler(),\n",
+       "                                                  ['culmen_depth_mm',\n",
+       "                                                   'culmen_length_mm',\n",
+       "                                                   'flipper_length_mm'])])),\n",
+       "                ('linreg', LinearRegression(fit_intercept=False))])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "pipeline.fit(X_train, y_train)"
    ]
@@ -953,18 +955,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "fcf406d36c0d4915b318cd30c0f3df25",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 55c5a9ce-8159-4a1a-99a4-af3a906640ba is DONE. 29.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:55c5a9ce-8159-4a1a-99a4-af3a906640ba&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 81196f97-304b-4d77-bb0f-8fc8adb8fe75 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -972,13 +972,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "41399a6b1d4f45328bacc6c868cefdf6",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 3e41c470-de70-4f13-89d9-c5564d0b2836 is DONE. 232 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:3e41c470-de70-4f13-89d9-c5564d0b2836&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job b417f27a-387d-4eb2-8d6d-287327ef0471 is DONE. 232 Bytes processed. <a target=\"_blank\" hr…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -986,13 +984,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e3c17676eab448c0942c0c32689ba4b5",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job ed2f9042-a737-4d13-bd21-8c3d29cd61a2 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ed2f9042-a737-4d13-bd21-8c3d29cd61a2&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job b7f89a61-d76a-47be-8b83-917d69f255a2 is DONE. 31.7 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1000,13 +996,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6c903861564b412aad9d9decad26560c",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 815d16b5-0a5d-42be-a766-1cff5b8f22f2 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:815d16b5-0a5d-42be-a766-1cff5b8f22f2&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 9619c393-90b3-4fea-a197-d09389e9486c is DONE. 31.7 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1014,13 +1008,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2c2534cd90e64c81be45753b81b1be46",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 37a38dc6-5073-4544-a1e3-da145a843922 is DONE. 29.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:37a38dc6-5073-4544-a1e3-da145a843922&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job e5854451-ffb4-4a28-a25f-3bdd68e9edae is DONE. 32.2 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1029,10 +1021,10 @@
     {
      "data": {
       "text/plain": [
-       "0.6757452736197735"
+       "0.2655729213572775"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1040,9 +1032,9 @@
    "source": [
     "from bigframes.ml.metrics import r2_score\n",
     "\n",
-    "pred_y = pipeline.predict(X_test)\n",
+    "y_pred = pipeline.predict(X_test)[\"predicted_body_mass_g\"]\n",
     "\n",
-    "r2_score(y_test, pred_y)"
+    "r2_score(y_test, y_pred)"
    ]
   },
   {
@@ -1055,18 +1047,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9295d6a3ff834f7a91a43d3f4ef4a61c",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Load job 7b46750c-70b4-468d-87ba-9f84f579f2a6 is DONE. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7b46750c-70b4-468d-87ba-9f84f579f2a6&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Load job d4c2f933-3514-4901-bcd7-888ee66eba82 is RUNNING. <a target=\"_blank\" href=\"https://console…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1097,32 +1087,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b7eb82b3b5fc4a8e97468070a3e76300",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job e4ffd919-6f69-4382-a7e5-db37c7c1fefa is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f14acf6da0cf4752a0d9c6086abc783f",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job d10dd37d-5e8e-4e15-9c83-a7e9a4c592a8 is DONE. 593 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d10dd37d-5e8e-4e15-9c83-a7e9a4c592a8&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 6b3e3285-79e9-4137-bf3b-7b7185ef76a5 is DONE. 24 Bytes processed. <a target=\"_blank\" hre…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1130,13 +1104,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "edc7bc6434bd4be4926626a235aab65a",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 207cb787-cf8a-43ea-8e73-644d3f58b11a is DONE. 24 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:207cb787-cf8a-43ea-8e73-644d3f58b11a&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 173c4194-e194-43d2-8359-7bec83d3c861 is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1144,13 +1116,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "55a8cbd9b1ab47eeab6e1c305847630f",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job c5dc5075-cac0-4947-9e9f-06aa9cc5bd2a is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:c5dc5075-cac0-4947-9e9f-06aa9cc5bd2a&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 53ba2332-590c-488d-9505-23aebaaad9cb is DONE. 48 Bytes processed. <a target=\"_blank\" hre…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1158,13 +1128,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "463a5b072148474db629b9346fa3a6d1",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 2ca4a569-7186-48ed-b3e4-004dca704798 is DONE. 282 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:2ca4a569-7186-48ed-b3e4-004dca704798&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 66e4a8e0-4cae-4e9d-86e0-17dc24f6cfbb is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1192,41 +1160,83 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>predicted_body_mass_g</th>\n",
+       "      <th>species</th>\n",
+       "      <th>island</th>\n",
+       "      <th>culmen_length_mm</th>\n",
+       "      <th>culmen_depth_mm</th>\n",
+       "      <th>flipper_length_mm</th>\n",
+       "      <th>sex</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>tag_number</th>\n",
        "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1633</th>\n",
-       "      <td>3965.994361</td>\n",
+       "      <td>4017.203152</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>39.5</td>\n",
+       "      <td>18.8</td>\n",
+       "      <td>196.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1672</th>\n",
-       "      <td>3246.312058</td>\n",
+       "      <td>3127.601519</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>38.5</td>\n",
+       "      <td>17.2</td>\n",
+       "      <td>181.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1690</th>\n",
-       "      <td>3456.404062</td>\n",
+       "      <td>3386.101231</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Dream</td>\n",
+       "      <td>37.9</td>\n",
+       "      <td>18.1</td>\n",
+       "      <td>188.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>3 rows × 1 columns</p>\n",
-       "</div>[3 rows x 1 columns in total]"
+       "<p>3 rows × 7 columns</p>\n",
+       "</div>[3 rows x 7 columns in total]"
       ],
       "text/plain": [
-       "            predicted_body_mass_g\n",
-       "tag_number                       \n",
-       "1633                  3965.994361\n",
-       "1672                  3246.312058\n",
-       "1690                  3456.404062\n",
+       "            predicted_body_mass_g                              species  \\\n",
+       "tag_number                                                               \n",
+       "1633                  4017.203152  Adelie Penguin (Pygoscelis adeliae)   \n",
+       "1672                  3127.601519  Adelie Penguin (Pygoscelis adeliae)   \n",
+       "1690                  3386.101231  Adelie Penguin (Pygoscelis adeliae)   \n",
+       "\n",
+       "               island  culmen_length_mm  culmen_depth_mm  flipper_length_mm  \\\n",
+       "tag_number                                                                    \n",
+       "1633        Torgersen              39.5             18.8              196.0   \n",
+       "1672        Torgersen              38.5             17.2              181.0   \n",
+       "1690            Dream              37.9             18.1              188.0   \n",
        "\n",
-       "[3 rows x 1 columns]"
+       "               sex  \n",
+       "tag_number          \n",
+       "1633          MALE  \n",
+       "1672        FEMALE  \n",
+       "1690        FEMALE  \n",
+       "\n",
+       "[3 rows x 7 columns]"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1240,28 +1250,53 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 4. Save in BigQuery"
+    "## 6. Save in BigQuery"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "Copy job d1def4a4-1da1-43a9-8ae5-4459444d993d is DONE. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d1def4a4-1da1-43a9-8ae5-4459444d993d&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "Pipeline(steps=[('preproc',\n",
-       "                 ColumnTransformer(transformers=[('onehot', OneHotEncoder(),\n",
-       "                                                  ['island', 'species', 'sex']),\n",
-       "                                                 ('scaler', StandardScaler(),\n",
-       "                                                  ['culmen_depth_mm',\n",
-       "                                                   'culmen_length_mm',\n",
-       "                                                   'flipper_length_mm'])])),\n",
-       "                ('linreg', LinearRegression(fit_intercept=False))])"
+       "<IPython.core.display.HTML object>"
       ]
      },
-     "execution_count": 20,
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Pipeline(steps=[('transform',\n",
+       "                 ColumnTransformer(transformers=[('ont_hot_encoder',\n",
+       "                                                  OneHotEncoder(max_categories=1000001,\n",
+       "                                                                min_frequency=0),\n",
+       "                                                  'island'),\n",
+       "                                                 ('standard_scaler',\n",
+       "                                                  StandardScaler(),\n",
+       "                                                  'culmen_length_mm'),\n",
+       "                                                 ('standard_scaler',\n",
+       "                                                  StandardScaler(),\n",
+       "                                                  'culmen_depth_mm'),\n",
+       "                                                 ('standard_scaler',\n",
+       "                                                  StandardScaler(),\n",
+       "                                                  'flipper_length_mm'),\n",
+       "                                                 ('ont_hot_encoder',\n",
+       "                                                  OneHotEncoder(max_categories=1000001,\n",
+       "                                                                min_frequency=0),\n",
+       "                                                  'sex')])),\n",
+       "                ('estimator',\n",
+       "                 LinearRegression(fit_intercept=False,\n",
+       "                                  optimize_strategy='NORMAL_EQUATION'))])"
+      ]
+     },
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1269,6 +1304,13 @@
    "source": [
     "pipeline.to_gbq(\"bigframes-dev.bigframes_demo_us.penguin_model\", replace=True)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -1287,7 +1329,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.9"
   },
   "orig_nbformat": 4,
   "vscode": {
diff --git a/tests/system/large/ml/test_cluster.py b/tests/system/large/ml/test_cluster.py
index f01116665f..9244c4b9f1 100644
--- a/tests/system/large/ml/test_cluster.py
+++ b/tests/system/large/ml/test_cluster.py
@@ -98,7 +98,9 @@ def test_cluster_configure_fit_score_predict(
         score_result, score_expected, check_exact=False, rtol=0.1
     )
 
-    result = model.predict(new_penguins).to_pandas()
+    predictions = model.predict(new_penguins).to_pandas()
+    assert predictions.shape == (4, 9)
+    result = predictions[["CENTROID_ID"]]
     expected = pd.DataFrame(
         {"CENTROID_ID": [2, 3, 1, 2]},
         dtype="Int64",
diff --git a/tests/system/large/ml/test_ensemble.py b/tests/system/large/ml/test_ensemble.py
index a8613dfeb9..b98d7a757c 100644
--- a/tests/system/large/ml/test_ensemble.py
+++ b/tests/system/large/ml/test_ensemble.py
@@ -179,7 +179,7 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id):
     )
 
 
-@pytest.mark.flaky(retries=2, delay=120)
+# @pytest.mark.flaky(retries=2, delay=120)
 def test_xgbclassifier_dart_booster_multiple_params(
     penguins_df_default_index, dataset_id
 ):
diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py
index 3e56954058..2929baf3f7 100644
--- a/tests/system/large/ml/test_pipeline.py
+++ b/tests/system/large/ml/test_pipeline.py
@@ -545,7 +545,9 @@ def test_pipeline_standard_scaler_kmeans_fit_score_predict(
         score_result, score_expected, check_exact=False, rtol=0.1
     )
 
-    result = pl.predict(new_penguins).to_pandas().sort_index()
+    predictions = pl.predict(new_penguins).to_pandas().sort_index()
+    assert predictions.shape == (6, 9)
+    result = predictions[["CENTROID_ID"]]
     expected = pd.DataFrame(
         {"CENTROID_ID": [1, 2, 1, 2, 1, 2]},
         dtype="Int64",
diff --git a/tests/system/small/ml/test_cluster.py b/tests/system/small/ml/test_cluster.py
index 266a38e3ee..a9fec0bbce 100644
--- a/tests/system/small/ml/test_cluster.py
+++ b/tests/system/small/ml/test_cluster.py
@@ -62,7 +62,9 @@
 
 def test_kmeans_predict(session, penguins_kmeans_model: cluster.KMeans):
     new_penguins = session.read_pandas(_PD_NEW_PENGUINS)
-    result = penguins_kmeans_model.predict(new_penguins).to_pandas()
+    predictions = penguins_kmeans_model.predict(new_penguins).to_pandas()
+    assert predictions.shape == (4, 9)
+    result = predictions[["CENTROID_ID"]]
     expected = pd.DataFrame(
         {"CENTROID_ID": [2, 3, 1, 2]},
         dtype="Int64",
diff --git a/tests/system/small/ml/test_ensemble.py b/tests/system/small/ml/test_ensemble.py
index bba083d98d..55d9fef661 100644
--- a/tests/system/small/ml/test_ensemble.py
+++ b/tests/system/small/ml/test_ensemble.py
@@ -98,7 +98,9 @@ def test_xgbregressor_model_score_series(
 def test_xgbregressor_model_predict(
     penguins_xgbregressor_model: bigframes.ml.ensemble.XGBRegressor, new_penguins_df
 ):
-    result = penguins_xgbregressor_model.predict(new_penguins_df).to_pandas()
+    predictions = penguins_xgbregressor_model.predict(new_penguins_df).to_pandas()
+    assert predictions.shape == (3, 8)
+    result = predictions[["predicted_body_mass_g"]]
     expected = pandas.DataFrame(
         {"predicted_body_mass_g": ["4293.1538089", "3410.0271", "3357.944"]},
         dtype="Float64",
@@ -220,7 +222,9 @@ def test_xgbclassifier_model_score_series(
 def test_xgbclassifier_model_predict(
     penguins_xgbclassifier_model: bigframes.ml.ensemble.XGBClassifier, new_penguins_df
 ):
-    result = penguins_xgbclassifier_model.predict(new_penguins_df).to_pandas()
+    predictions = penguins_xgbclassifier_model.predict(new_penguins_df).to_pandas()
+    assert predictions.shape == (3, 9)
+    result = predictions[["predicted_sex"]]
     expected = pandas.DataFrame(
         {"predicted_sex": ["MALE", "MALE", "FEMALE"]},
         dtype="string[pyarrow]",
@@ -363,7 +367,11 @@ def test_randomforestregressor_model_predict(
     penguins_randomforest_regressor_model: bigframes.ml.ensemble.RandomForestRegressor,
     new_penguins_df,
 ):
-    result = penguins_randomforest_regressor_model.predict(new_penguins_df).to_pandas()
+    predictions = penguins_randomforest_regressor_model.predict(
+        new_penguins_df
+    ).to_pandas()
+    assert predictions.shape == (3, 8)
+    result = predictions[["predicted_body_mass_g"]]
     expected = pandas.DataFrame(
         {"predicted_body_mass_g": ["3897.341797", "3458.385742", "3458.385742"]},
         dtype="Float64",
@@ -490,7 +498,11 @@ def test_randomforestclassifier_model_predict(
     penguins_randomforest_classifier_model: bigframes.ml.ensemble.RandomForestClassifier,
     new_penguins_df,
 ):
-    result = penguins_randomforest_classifier_model.predict(new_penguins_df).to_pandas()
+    predictions = penguins_randomforest_classifier_model.predict(
+        new_penguins_df
+    ).to_pandas()
+    assert predictions.shape == (3, 9)
+    result = predictions[["predicted_sex"]]
     expected = pandas.DataFrame(
         {"predicted_sex": ["MALE", "MALE", "FEMALE"]},
         dtype="string[pyarrow]",
diff --git a/tests/system/small/ml/test_forecasting.py b/tests/system/small/ml/test_forecasting.py
index 55079c94cf..948db59650 100644
--- a/tests/system/small/ml/test_forecasting.py
+++ b/tests/system/small/ml/test_forecasting.py
@@ -22,6 +22,8 @@
 def test_model_predict(time_series_arima_plus_model):
     utc = pytz.utc
     predictions = time_series_arima_plus_model.predict().to_pandas()
+    assert predictions.shape == (3, 8)
+    result = predictions[["forecast_timestamp", "forecast_value"]]
     expected = pd.DataFrame(
         {
             "forecast_timestamp": [
@@ -38,7 +40,7 @@ def test_model_predict(time_series_arima_plus_model):
     )
 
     pd.testing.assert_frame_equal(
-        predictions,
+        result,
         expected,
         rtol=0.1,
         check_index_type=False,
diff --git a/tests/system/small/ml/test_imported.py b/tests/system/small/ml/test_imported.py
index d305567066..9008e85a0b 100644
--- a/tests/system/small/ml/test_imported.py
+++ b/tests/system/small/ml/test_imported.py
@@ -32,7 +32,9 @@ def test_tensorflow_create_model_default_session(imported_tensorflow_model_path)
 
 def test_tensorflow_model_predict(imported_tensorflow_model, llm_text_df):
     df = llm_text_df.rename(columns={"prompt": "input"})
-    result = imported_tensorflow_model.predict(df).to_pandas()
+    predictions = imported_tensorflow_model.predict(df).to_pandas()
+    assert predictions.shape == (3, 2)
+    result = predictions[["dense_1"]]
     # The values are non-human-readable. As they are a dense layer of Neural Network.
     # And since it is pretrained and imported, the model is a opaque-box.
     # We may want to switch to better test model and cases.
@@ -72,7 +74,9 @@ def test_onnx_create_model_default_session(imported_onnx_model_path):
 
 
 def test_onnx_model_predict(imported_onnx_model, onnx_iris_df):
-    result = imported_onnx_model.predict(onnx_iris_df).to_pandas()
+    predictions = imported_onnx_model.predict(onnx_iris_df).to_pandas()
+    assert predictions.shape == (3, 7)
+    result = predictions[["label", "probabilities"]]
     value1 = np.array([0.9999993443489075, 0.0, 0.0])
     value2 = np.array([0.0, 0.0, 0.9999993443489075])
     expected = pd.DataFrame(
diff --git a/tests/system/small/ml/test_linear_model.py b/tests/system/small/ml/test_linear_model.py
index 3a8232ed9e..218c1074ab 100644
--- a/tests/system/small/ml/test_linear_model.py
+++ b/tests/system/small/ml/test_linear_model.py
@@ -91,13 +91,15 @@ def test_linear_reg_model_score_series(
 
 def test_linear_reg_model_predict(penguins_linear_model, new_penguins_df):
     predictions = penguins_linear_model.predict(new_penguins_df).to_pandas()
+    assert predictions.shape == (3, 8)
+    result = predictions[["predicted_body_mass_g"]]
     expected = pandas.DataFrame(
         {"predicted_body_mass_g": [4030.1, 3280.8, 3177.9]},
         dtype="Float64",
         index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
     pandas.testing.assert_frame_equal(
-        predictions.sort_index(),
+        result.sort_index(),
         expected,
         check_exact=False,
         rtol=0.1,
@@ -224,13 +226,15 @@ def test_logistic_model_score_series(
 
 def test_logsitic_model_predict(penguins_logistic_model, new_penguins_df):
     predictions = penguins_logistic_model.predict(new_penguins_df).to_pandas()
+    assert predictions.shape == (3, 9)
+    result = predictions[["predicted_sex"]]
     expected = pandas.DataFrame(
         {"predicted_sex": ["MALE", "MALE", "FEMALE"]},
         dtype="string[pyarrow]",
         index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
     pandas.testing.assert_frame_equal(
-        predictions.sort_index(),
+        result.sort_index(),
         expected,
         check_exact=False,
         rtol=0.1,
diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index 79d3c40317..306098548e 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from unittest import TestCase
-
 import numpy as np
 import pytest
 
@@ -48,7 +46,7 @@ def test_create_text_generator_model_default_session(bq_connection, llm_text_pan
     llm_text_df = bpd.read_pandas(llm_text_pandas_df)
 
     df = model.predict(llm_text_df).to_pandas()
-    TestCase().assertSequenceEqual(df.shape, (3, 1))
+    assert df.shape == (3, 4)
     assert "ml_generate_text_llm_result" in df.columns
     series = df["ml_generate_text_llm_result"]
     assert all(series.str.len() > 20)
@@ -72,7 +70,7 @@ def test_create_text_generator_32k_model_default_session(
     llm_text_df = bpd.read_pandas(llm_text_pandas_df)
 
     df = model.predict(llm_text_df).to_pandas()
-    TestCase().assertSequenceEqual(df.shape, (3, 1))
+    assert df.shape == (3, 4)
     assert "ml_generate_text_llm_result" in df.columns
     series = df["ml_generate_text_llm_result"]
     assert all(series.str.len() > 20)
@@ -97,7 +95,7 @@ def test_create_text_generator_model_default_connection(llm_text_pandas_df):
     )
 
     df = model.predict(llm_text_df).to_pandas()
-    TestCase().assertSequenceEqual(df.shape, (3, 1))
+    assert df.shape == (3, 4)
     assert "ml_generate_text_llm_result" in df.columns
     series = df["ml_generate_text_llm_result"]
     assert all(series.str.len() > 20)
@@ -109,7 +107,7 @@ def test_text_generator_predict_default_params_success(
     palm2_text_generator_model, llm_text_df
 ):
     df = palm2_text_generator_model.predict(llm_text_df).to_pandas()
-    TestCase().assertSequenceEqual(df.shape, (3, 1))
+    assert df.shape == (3, 4)
     assert "ml_generate_text_llm_result" in df.columns
     series = df["ml_generate_text_llm_result"]
     assert all(series.str.len() > 20)
@@ -120,7 +118,7 @@ def test_text_generator_predict_series_default_params_success(
     palm2_text_generator_model, llm_text_df
 ):
     df = palm2_text_generator_model.predict(llm_text_df["prompt"]).to_pandas()
-    TestCase().assertSequenceEqual(df.shape, (3, 1))
+    assert df.shape == (3, 4)
     assert "ml_generate_text_llm_result" in df.columns
     series = df["ml_generate_text_llm_result"]
     assert all(series.str.len() > 20)
@@ -132,7 +130,7 @@ def test_text_generator_predict_arbitrary_col_label_success(
 ):
     llm_text_df = llm_text_df.rename(columns={"prompt": "arbitrary"})
     df = palm2_text_generator_model.predict(llm_text_df).to_pandas()
-    TestCase().assertSequenceEqual(df.shape, (3, 1))
+    assert df.shape == (3, 4)
     assert "ml_generate_text_llm_result" in df.columns
     series = df["ml_generate_text_llm_result"]
     assert all(series.str.len() > 20)
@@ -145,7 +143,7 @@ def test_text_generator_predict_with_params_success(
     df = palm2_text_generator_model.predict(
         llm_text_df, temperature=0.5, max_output_tokens=100, top_k=20, top_p=0.5
     ).to_pandas()
-    TestCase().assertSequenceEqual(df.shape, (3, 1))
+    assert df.shape == (3, 4)
     assert "ml_generate_text_llm_result" in df.columns
     series = df["ml_generate_text_llm_result"]
     assert all(series.str.len() > 20)
@@ -196,7 +194,7 @@ def test_embedding_generator_predict_success(
     palm2_embedding_generator_model, llm_text_df
 ):
     df = palm2_embedding_generator_model.predict(llm_text_df).to_pandas()
-    TestCase().assertSequenceEqual(df.shape, (3, 1))
+    assert df.shape == (3, 4)
     assert "text_embedding" in df.columns
     series = df["text_embedding"]
     value = series[0]
@@ -209,7 +207,7 @@ def test_embedding_generator_multilingual_predict_success(
     palm2_embedding_generator_multilingual_model, llm_text_df
 ):
     df = palm2_embedding_generator_multilingual_model.predict(llm_text_df).to_pandas()
-    TestCase().assertSequenceEqual(df.shape, (3, 1))
+    assert df.shape == (3, 4)
     assert "text_embedding" in df.columns
     series = df["text_embedding"]
     value = series[0]
@@ -222,7 +220,7 @@ def test_embedding_generator_predict_series_success(
     palm2_embedding_generator_model, llm_text_df
 ):
     df = palm2_embedding_generator_model.predict(llm_text_df["prompt"]).to_pandas()
-    TestCase().assertSequenceEqual(df.shape, (3, 1))
+    assert df.shape == (3, 4)
     assert "text_embedding" in df.columns
     series = df["text_embedding"]
     value = series[0]
diff --git a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
index 5369d3662d..be6c5e7c52 100644
--- a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
+++ b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
@@ -20,19 +20,7 @@
 class _BaseKMeans(BaseEstimator, ABC):
     """Base class for KMeans and MiniBatchKMeans"""
 
-    def predict(self, X):
-        """Predict the closest cluster each sample in X belongs to.
-
-        Args:
-            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
-                Series or DataFrame of shape (n_samples, n_features). The data matrix for
-                which we want to get the predictions.
-
-        Returns:
-            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples,), containing the
-                class labels for each sample.
-        """
-        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+    pass
 
 
 class KMeans(_BaseKMeans):
@@ -73,7 +61,7 @@ def predict(
                 DataFrame of shape (n_samples, n_features). New data to predict.
 
         Returns:
-            bigframes.dataframe.DataFrame: DataFrame of the cluster each sample belongs to.
+            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_base.py b/third_party/bigframes_vendored/sklearn/linear_model/_base.py
index 8dc3b6280a..ab946e5861 100644
--- a/third_party/bigframes_vendored/sklearn/linear_model/_base.py
+++ b/third_party/bigframes_vendored/sklearn/linear_model/_base.py
@@ -16,7 +16,6 @@
 # Original location: https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/linear_model/_base.py
 
 from abc import ABCMeta
-from typing import List, Optional
 
 from bigframes import constants
 from third_party.bigframes_vendored.sklearn.base import (
@@ -35,7 +34,7 @@ def predict(self, X):
                 Series or DataFrame of shape (n_samples, n_features). Samples.
 
         Returns:
-            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples,). Returns predicted values.
+            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -50,8 +49,7 @@ def predict(self, X):
                 which we want to get the predictions.
 
         Returns:
-            bigframes.dataframe.DataFrame:  DataFrame of shape (n_samples,), containing
-                the class labels for each sample.
+            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
diff --git a/third_party/bigframes_vendored/xgboost/sklearn.py b/third_party/bigframes_vendored/xgboost/sklearn.py
index b7b43b85a3..dfd0ba7356 100644
--- a/third_party/bigframes_vendored/xgboost/sklearn.py
+++ b/third_party/bigframes_vendored/xgboost/sklearn.py
@@ -18,7 +18,7 @@ def predict(self, X):
                 Series or DataFrame of shape (n_samples, n_features). Samples.
 
         Returns:
-            DataFrame of shape (n_samples,): Returns predicted values.
+            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 

From a7298317ea2604faa6ae31817f1f729d7e0b9818 Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Thu, 16 Nov 2023 14:44:14 -0800
Subject: [PATCH 03/26] fix: invalid JSON type of the notebook (#215)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 .../bq_dataframes_llm_kmeans.ipynb            | 1064 +----------------
 1 file changed, 33 insertions(+), 1031 deletions(-)

diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
index ae03813639..8d75950925 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
@@ -139,17 +139,9 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Updated property [core/project].\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "# set your project ID below\n",
         "PROJECT_ID = \"\"  # @param {type:\"string\"}\n",
@@ -170,7 +162,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -264,7 +256,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": null,
       "metadata": {
         "id": "R7STCS8xB5d2"
       },
@@ -296,7 +288,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": null,
       "metadata": {
         "id": "zDSwoBo1CU3G"
       },
@@ -307,101 +299,11 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": null,
       "metadata": {
         "id": "tYDoaKgJChiq"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Query job 9f096761-e3b5-4d58-a9f7-485ced67afca is DONE. 2.3 GB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9f096761-e3b5-4d58-a9f7-485ced67afca&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job ee8fecb1-2e30-407d-9e2e-9e76061da9e7 is DONE. 2.3 GB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ee8fecb1-2e30-407d-9e2e-9e76061da9e7&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>consumer_complaint_narrative</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>I signed a contract as a condition of employme...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>First, I want to disclose that XXXX and XXXX b...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>Frequent calls from Focused Receivables Manage...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>I recently contacted Enhanced Recovery Company...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>This began when I subscribed to XXXX XXXX inte...</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 1 columns</p>\n",
-              "</div>[5 rows x 1 columns in total]"
-            ],
-            "text/plain": [
-              "                        consumer_complaint_narrative\n",
-              "0  I signed a contract as a condition of employme...\n",
-              "1  First, I want to disclose that XXXX and XXXX b...\n",
-              "2  Frequent calls from Focused Receivables Manage...\n",
-              "3  I recently contacted Enhanced Recovery Company...\n",
-              "4  This began when I subscribed to XXXX XXXX inte...\n",
-              "\n",
-              "[5 rows x 1 columns]"
-            ]
-          },
-          "execution_count": 5,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "issues_df = input_df[[\"consumer_complaint_narrative\"]].dropna()\n",
         "issues_df.head(n=5) # View the first five complaints"
@@ -417,7 +319,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": null,
       "metadata": {
         "id": "OltYSUEcsSOW"
       },
@@ -439,24 +341,11 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 7,
+      "execution_count": null,
       "metadata": {
         "id": "li38q8FzDDMu"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Query job 52d2e961-7896-497c-8b03-ab7374737679 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:52d2e961-7896-497c-8b03-ab7374737679&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
-      ],
+      "outputs": [],
       "source": [
         "from bigframes.ml.llm import PaLM2TextEmbeddingGenerator\n",
         "\n",
@@ -465,125 +354,11 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 29,
+      "execution_count": null,
       "metadata": {
         "id": "cOuSOQ5FDewD"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Query job d093d51a-8eda-442f-80cd-568cb76e00b3 is DONE. 10.6 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d093d51a-8eda-442f-80cd-568cb76e00b3&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job 6419df65-3e96-41a7-a7b5-3d058e18763a is DONE. 80.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6419df65-3e96-41a7-a7b5-3d058e18763a&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job 917f09ea-c468-4363-a856-b1091e5f775f is DONE. 80.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:917f09ea-c468-4363-a856-b1091e5f775f&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job 5c9679e7-192c-40b5-a14b-edc0fa113eaa is DONE. 61.5 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5c9679e7-192c-40b5-a14b-edc0fa113eaa&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>text_embedding</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>422</th>\n",
-              "      <td>[-0.012013785541057587, 0.003669967409223318, ...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>616</th>\n",
-              "      <td>[-0.014948881231248379, -0.04672442376613617, ...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>833</th>\n",
-              "      <td>[-0.01951478235423565, -0.027120858430862427, ...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1370</th>\n",
-              "      <td>[-0.03140445053577423, -0.048797041177749634, ...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1430</th>\n",
-              "      <td>[-0.02244548313319683, -0.03336532413959503, 0...</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 1 columns</p>\n",
-              "</div>[5 rows x 1 columns in total]"
-            ],
-            "text/plain": [
-              "                                         text_embedding\n",
-              "422   [-0.012013785541057587, 0.003669967409223318, ...\n",
-              "616   [-0.014948881231248379, -0.04672442376613617, ...\n",
-              "833   [-0.01951478235423565, -0.027120858430862427, ...\n",
-              "1370  [-0.03140445053577423, -0.048797041177749634, ...\n",
-              "1430  [-0.02244548313319683, -0.03336532413959503, 0...\n",
-              "\n",
-              "[5 rows x 1 columns]"
-            ]
-          },
-          "execution_count": 29,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Will take ~3 minutes to compute the embeddings\n",
         "predicted_embeddings = model.predict(downsampled_issues_df)\n",
@@ -593,263 +368,14 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 30,
+      "execution_count": null,
       "metadata": {
         "id": "4H_etYfsEOFP"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Query job ce9cb0f9-4b0d-40a1-81f3-d6e60dd6c684 is DONE. 160.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ce9cb0f9-4b0d-40a1-81f3-d6e60dd6c684&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job aa692a30-5706-46ad-8029-faf2fac66234 is DONE. 72.2 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:aa692a30-5706-46ad-8029-faf2fac66234&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>consumer_complaint_narrative</th>\n",
-              "      <th>text_embedding</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>2580664</th>\n",
-              "      <td>Hello, my name is XXXX XXXX, and I am writing ...</td>\n",
-              "      <td>[0.0003211698785889894, -0.01816680282354355, ...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1806973</th>\n",
-              "      <td>This is XXXX XXXX and I am submitting this com...</td>\n",
-              "      <td>[-0.009485247544944286, -0.025846892967820168,...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2055053</th>\n",
-              "      <td>XXXX XXXX XXXX, XXXX. ( address : XXXX XXXX XX...</td>\n",
-              "      <td>[-0.010950954630970955, -0.0249345600605011, 0...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2515231</th>\n",
-              "      <td>When I reinvestigated my credit report, I real...</td>\n",
-              "      <td>[-0.009660656563937664, -0.05793113633990288, ...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2633049</th>\n",
-              "      <td>Checking my credit report XX/XX/2018 with all ...</td>\n",
-              "      <td>[-0.0022159104701131582, -0.03330004960298538,...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3117273</th>\n",
-              "      <td>I contacted TransUnion and spoke a credit rep ...</td>\n",
-              "      <td>[-0.015955328941345215, -0.006488671060651541,...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>698814</th>\n",
-              "      <td>XXXX XXXX XXXX. makes daily calls to me cell c...</td>\n",
-              "      <td>[0.005397460889071226, -0.01276913657784462, 0...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>267826</th>\n",
-              "      <td>Can we please reopen Case : XXXX? \n",
-              "\n",
-              "Wells Farg...</td>\n",
-              "      <td>[0.004065403249114752, -0.0005381882656365633,...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>54019</th>\n",
-              "      <td>My rights under 15 USC 1681 have been violated...</td>\n",
-              "      <td>[0.013823015615344048, -0.02010691538453102, 0...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>141050</th>\n",
-              "      <td>To whom it may concern : My personal informati...</td>\n",
-              "      <td>[0.008104532025754452, -0.01856449618935585, 0...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2962076</th>\n",
-              "      <td>I have had a CashApp account since last year, ...</td>\n",
-              "      <td>[-0.0003019514260813594, -0.03750108182430267,...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2481105</th>\n",
-              "      <td>that some of the information was erroneous. Th...</td>\n",
-              "      <td>[-0.014868081547319889, -0.0443895161151886, -...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>431562</th>\n",
-              "      <td>I have disputed the referenced accounts to the...</td>\n",
-              "      <td>[-0.0020524838473647833, -0.04830990731716156,...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1953029</th>\n",
-              "      <td>On, XX/XX/22, I attempted to complete a transa...</td>\n",
-              "      <td>[-0.01599179394543171, -0.0074900356121361256,...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2395979</th>\n",
-              "      <td>Subject : XXXX XXXX XXXX compensation, refund,...</td>\n",
-              "      <td>[-0.0035950862802565098, -0.014652969315648079...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>455524</th>\n",
-              "      <td>I paid off my mortgage on XX/XX/2019. The comp...</td>\n",
-              "      <td>[-0.01100730150938034, -0.03495829552412033, 0...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2155924</th>\n",
-              "      <td>This kind of account is placed as a charged of...</td>\n",
-              "      <td>[-0.028635455295443535, -0.028604287654161453,...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1069497</th>\n",
-              "      <td>This is one of many issues I have had with Wel...</td>\n",
-              "      <td>[0.008871790021657944, -0.028502725064754486, ...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3181689</th>\n",
-              "      <td>I have disputed this account with MONTEREY FIN...</td>\n",
-              "      <td>[-0.004721717908978462, -0.03673810139298439, ...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>274268</th>\n",
-              "      <td>Lender is not updating my loan status in the V...</td>\n",
-              "      <td>[-0.009221495129168034, -0.0289347805082798, 0...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1671305</th>\n",
-              "      <td>XXXX is a peer to peer lending conmpany that u...</td>\n",
-              "      <td>[-0.02911308966577053, -0.01850792020559311, -...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>886026</th>\n",
-              "      <td>( DISPUTE CODE - XXXX ) My personal informatio...</td>\n",
-              "      <td>[-0.007220877334475517, -0.016615957021713257,...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1044431</th>\n",
-              "      <td>I filed a complaint against PNC this year and ...</td>\n",
-              "      <td>[0.002848619595170021, -0.035117778927087784, ...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1938481</th>\n",
-              "      <td>I applied for a modification and was approved....</td>\n",
-              "      <td>[-0.03114932030439377, -0.0421406552195549, 0....</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1987834</th>\n",
-              "      <td>Ive been Disputting my XXXX XXXX I opened this...</td>\n",
-              "      <td>[-0.009406660683453083, -0.020967338234186172,...</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>25 rows × 2 columns</p>\n",
-              "</div>[10000 rows x 2 columns in total]"
-            ],
-            "text/plain": [
-              "                              consumer_complaint_narrative  \\\n",
-              "2580664  Hello, my name is XXXX XXXX, and I am writing ...   \n",
-              "1806973  This is XXXX XXXX and I am submitting this com...   \n",
-              "2055053  XXXX XXXX XXXX, XXXX. ( address : XXXX XXXX XX...   \n",
-              "2515231  When I reinvestigated my credit report, I real...   \n",
-              "2633049  Checking my credit report XX/XX/2018 with all ...   \n",
-              "3117273  I contacted TransUnion and spoke a credit rep ...   \n",
-              "698814   XXXX XXXX XXXX. makes daily calls to me cell c...   \n",
-              "267826   Can we please reopen Case : XXXX? \n",
-              "\n",
-              "Wells Farg...   \n",
-              "54019    My rights under 15 USC 1681 have been violated...   \n",
-              "141050   To whom it may concern : My personal informati...   \n",
-              "2962076  I have had a CashApp account since last year, ...   \n",
-              "2481105  that some of the information was erroneous. Th...   \n",
-              "431562   I have disputed the referenced accounts to the...   \n",
-              "1953029  On, XX/XX/22, I attempted to complete a transa...   \n",
-              "2395979  Subject : XXXX XXXX XXXX compensation, refund,...   \n",
-              "455524   I paid off my mortgage on XX/XX/2019. The comp...   \n",
-              "2155924  This kind of account is placed as a charged of...   \n",
-              "1069497  This is one of many issues I have had with Wel...   \n",
-              "3181689  I have disputed this account with MONTEREY FIN...   \n",
-              "274268   Lender is not updating my loan status in the V...   \n",
-              "1671305  XXXX is a peer to peer lending conmpany that u...   \n",
-              "886026   ( DISPUTE CODE - XXXX ) My personal informatio...   \n",
-              "1044431  I filed a complaint against PNC this year and ...   \n",
-              "1938481  I applied for a modification and was approved....   \n",
-              "1987834  Ive been Disputting my XXXX XXXX I opened this...   \n",
-              "\n",
-              "                                            text_embedding  \n",
-              "2580664  [0.0003211698785889894, -0.01816680282354355, ...  \n",
-              "1806973  [-0.009485247544944286, -0.025846892967820168,...  \n",
-              "2055053  [-0.010950954630970955, -0.0249345600605011, 0...  \n",
-              "2515231  [-0.009660656563937664, -0.05793113633990288, ...  \n",
-              "2633049  [-0.0022159104701131582, -0.03330004960298538,...  \n",
-              "3117273  [-0.015955328941345215, -0.006488671060651541,...  \n",
-              "698814   [0.005397460889071226, -0.01276913657784462, 0...  \n",
-              "267826   [0.004065403249114752, -0.0005381882656365633,...  \n",
-              "54019    [0.013823015615344048, -0.02010691538453102, 0...  \n",
-              "141050   [0.008104532025754452, -0.01856449618935585, 0...  \n",
-              "2962076  [-0.0003019514260813594, -0.03750108182430267,...  \n",
-              "2481105  [-0.014868081547319889, -0.0443895161151886, -...  \n",
-              "431562   [-0.0020524838473647833, -0.04830990731716156,...  \n",
-              "1953029  [-0.01599179394543171, -0.0074900356121361256,...  \n",
-              "2395979  [-0.0035950862802565098, -0.014652969315648079...  \n",
-              "455524   [-0.01100730150938034, -0.03495829552412033, 0...  \n",
-              "2155924  [-0.028635455295443535, -0.028604287654161453,...  \n",
-              "1069497  [0.008871790021657944, -0.028502725064754486, ...  \n",
-              "3181689  [-0.004721717908978462, -0.03673810139298439, ...  \n",
-              "274268   [-0.009221495129168034, -0.0289347805082798, 0...  \n",
-              "1671305  [-0.02911308966577053, -0.01850792020559311, -...  \n",
-              "886026   [-0.007220877334475517, -0.016615957021713257,...  \n",
-              "1044431  [0.002848619595170021, -0.035117778927087784, ...  \n",
-              "1938481  [-0.03114932030439377, -0.0421406552195549, 0....  \n",
-              "1987834  [-0.009406660683453083, -0.020967338234186172,...  \n",
-              "...\n",
-              "\n",
-              "[10000 rows x 2 columns]"
-            ]
-          },
-          "execution_count": 30,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Join the complaints with their embeddings in the same DataFrame\n",
-        "combined_df = downsampled_issues_df.join(predicted_embeddings, how=\"left\")\n",
-        "combined_df"
+        "combined_df = downsampled_issues_df.join(predicted_embeddings)"
       ]
     },
     {
@@ -872,7 +398,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 31,
+      "execution_count": null,
       "metadata": {
         "id": "AhNTnEC5FRz2"
       },
@@ -893,152 +419,14 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 32,
+      "execution_count": null,
       "metadata": {
         "id": "6poSxh-fGJF7"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Query job 65eb317d-59f1-4d10-acd1-4b7f3778114c is DONE. 61.7 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:65eb317d-59f1-4d10-acd1-4b7f3778114c&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job 156e445e-cc01-4b30-84cc-ac1c98a69b81 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:156e445e-cc01-4b30-84cc-ac1c98a69b81&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job 5befc212-f4a3-4e33-b1b2-01e809acdcbd is DONE. 61.9 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5befc212-f4a3-4e33-b1b2-01e809acdcbd&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job bd271178-8b8d-45dc-ac57-7f0194d0daac is DONE. 80.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bd271178-8b8d-45dc-ac57-7f0194d0daac&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job bbfb9cca-622d-4bf5-9fc0-6d9a85287d41 is DONE. 80.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bbfb9cca-622d-4bf5-9fc0-6d9a85287d41&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job a5f30b32-9fb0-42b4-b426-d8484f008bdb is DONE. 160.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:a5f30b32-9fb0-42b4-b426-d8484f008bdb&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>CENTROID_ID</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>422</th>\n",
-              "      <td>2</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>616</th>\n",
-              "      <td>3</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>833</th>\n",
-              "      <td>5</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1370</th>\n",
-              "      <td>7</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1430</th>\n",
-              "      <td>3</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 1 columns</p>\n",
-              "</div>[5 rows x 1 columns in total]"
-            ],
-            "text/plain": [
-              "      CENTROID_ID\n",
-              "422             2\n",
-              "616             3\n",
-              "833             5\n",
-              "1370            7\n",
-              "1430            3\n",
-              "\n",
-              "[5 rows x 1 columns]"
-            ]
-          },
-          "execution_count": 32,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Use KMeans clustering to calculate our groups. Will take ~3 minutes.\n",
-        "cluster_model.fit(combined_df[\"text_embedding\"])\n",
+        "cluster_model.fit(combined_df[[\"text_embedding\"]])\n",
         "clustered_result = cluster_model.predict(combined_df[[\"text_embedding\"]])\n",
         "# Notice the CENTROID_ID column, which is the ID number of the group that\n",
         "# each complaint belongs to.\n",
@@ -1047,123 +435,13 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 33,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Query job 7a41196e-ea67-44ac-95a7-7dce620d6d21 is DONE. 320.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7a41196e-ea67-44ac-95a7-7dce620d6d21&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job 8008b482-1a0d-461f-a215-4676d9d918dc is DONE. 72.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8008b482-1a0d-461f-a215-4676d9d918dc&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>consumer_complaint_narrative</th>\n",
-              "      <th>text_embedding</th>\n",
-              "      <th>CENTROID_ID</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>2580664</th>\n",
-              "      <td>Hello, my name is XXXX XXXX, and I am writing ...</td>\n",
-              "      <td>[0.0003211698785889894, -0.01816680282354355, ...</td>\n",
-              "      <td>2</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1806973</th>\n",
-              "      <td>This is XXXX XXXX and I am submitting this com...</td>\n",
-              "      <td>[-0.009485247544944286, -0.025846892967820168,...</td>\n",
-              "      <td>5</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2055053</th>\n",
-              "      <td>XXXX XXXX XXXX, XXXX. ( address : XXXX XXXX XX...</td>\n",
-              "      <td>[-0.010950954630970955, -0.0249345600605011, 0...</td>\n",
-              "      <td>3</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2515231</th>\n",
-              "      <td>When I reinvestigated my credit report, I real...</td>\n",
-              "      <td>[-0.009660656563937664, -0.05793113633990288, ...</td>\n",
-              "      <td>5</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2633049</th>\n",
-              "      <td>Checking my credit report XX/XX/2018 with all ...</td>\n",
-              "      <td>[-0.0022159104701131582, -0.03330004960298538,...</td>\n",
-              "      <td>3</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 3 columns</p>\n",
-              "</div>[5 rows x 3 columns in total]"
-            ],
-            "text/plain": [
-              "                              consumer_complaint_narrative  \\\n",
-              "2580664  Hello, my name is XXXX XXXX, and I am writing ...   \n",
-              "1806973  This is XXXX XXXX and I am submitting this com...   \n",
-              "2055053  XXXX XXXX XXXX, XXXX. ( address : XXXX XXXX XX...   \n",
-              "2515231  When I reinvestigated my credit report, I real...   \n",
-              "2633049  Checking my credit report XX/XX/2018 with all ...   \n",
-              "\n",
-              "                                            text_embedding  CENTROID_ID  \n",
-              "2580664  [0.0003211698785889894, -0.01816680282354355, ...            2  \n",
-              "1806973  [-0.009485247544944286, -0.025846892967820168,...            5  \n",
-              "2055053  [-0.010950954630970955, -0.0249345600605011, 0...            3  \n",
-              "2515231  [-0.009660656563937664, -0.05793113633990288, ...            5  \n",
-              "2633049  [-0.0022159104701131582, -0.03330004960298538,...            3  \n",
-              "\n",
-              "[5 rows x 3 columns]"
-            ]
-          },
-          "execution_count": 33,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Join the group number to the complaints and their text embeddings\n",
         "combined_clustered_result = combined_df.join(clustered_result)\n",
-        "\n",
-        "combined_clustered_result.head(n=5)"
+        "combined_clustered_result.head(n=5) "
       ]
     },
     {
@@ -1194,36 +472,11 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 34,
+      "execution_count": null,
       "metadata": {
         "id": "2E7wXM_jGqo6"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Query job 50c7c0dd-94a2-494e-a37f-6a838a518f6c is DONE. 11.0 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:50c7c0dd-94a2-494e-a37f-6a838a518f6c&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job d96c847f-c292-4804-bd05-fd643c41c7a5 is DONE. 11.0 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d96c847f-c292-4804-bd05-fd643c41c7a5&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Using bigframes, with syntax identical to pandas,\n",
         "# filter out the first and second groups\n",
@@ -1240,100 +493,11 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 36,
+      "execution_count": null,
       "metadata": {
         "id": "ZNDiueI9IP5e"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "comment list 1:\n",
-            "1. XXXX is a peer to peer lending conmpany that uses borrowers crypto to collateralize loans from investors ( like myself ). I've been investing with them for almost XXXX years and currently have {$240000.00} tied up in lending products with XXXX. \n",
-            "As of XXXX days ago we received an email saying all business operations have been ceased and no withdrawals or deposits will be allowed. They said they'll update customers within 10 days, but no one can reach anyone at the company to find out any more details as they are not answering calls nor returning emails. It also appears the company has scrubbed its XXXX page and the XXXX pages of top executives. \n",
-            "\n",
-            "All collateral and client 's investment funds are supposedly held at or processed through XXXX XXXX XXXX ( registered SEC company ). XXXX XXXX keeps telling us to contact XXXX and won't give us any information, so we have no way to find out what's happening with our funds/collateral or if everything is gone. We have a XXXX channel up where people are gathering evidence, documentation, etc. This is probably the best place to start to get a broad view of what's happening. Details below. \n",
-            "\n",
-            "XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX  CONST LLC ( Business ID : XXXX ) FoXXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX 'Cease of Operations ' email received by all investors XXXX XX/XX/2022 at XXXX : \" Dear XXXX Users, Given the collapses of several cryptocurrencies so far this year and the rapidly deteriorating market conditions that have been prompting heavy withdrawals across all XXXX lending and XXXX exchange platforms recently, we are sad to inform you that we are unable to continue to operate our business as usual. As such, we are limiting our business activities, including pausing user withdrawals as allowed under our Terms of XXXX. \n",
-            "No deposit or investment request will be processed at this time. \n",
-            "\n",
-            "Our team is working diligently towards our objective of maximizing value for all of our Users, and our top priority continues to be to protect your interests. As we explore all options available to us, we will provide updates to you as we go. \n",
-            "\n",
-            "We hope to communicate with you within the next XXXX business days on the next steps to address the situation. We appreciate your patience in this trying time. \n",
-            "\n",
-            "Sincerely yoursXXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX\n",
-            "2. Submitted XX/XX/XXXX\n",
-            "Typed XX/XX/XXXX:\n",
-            "\n",
-            "XX/XX/XXXX\n",
-            "XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX, XXXX XXXX\n",
-            "PH:. XXXX\n",
-            "PH: XXXX\n",
-            "EM:\n",
-            "XXXX\n",
-            "XXXX\n",
-            "XXXX XXXX \n",
-            "XXXX XXXX\n",
-            "Date of Birth XX/XX/XXXX\n",
-            "SS#: XXXX\n",
-            "TO:\n",
-            "*  Consumer Financial Protection Brueau\n",
-            "*  Department of Veteran Affairs, Office of the Inspector General\n",
-            "My name is XXXX XXXX XXXX,  I've received more than one email from Discover Card in my XXXX XXXX, past emails from Discover Card were unautherized deletions.\n",
-            "From:  Discover Card XXXX\n",
-            "To:  You XXXX\n",
-            "Date:  XX/XX/XXXX, XXXX XXXX XXXX From: Discover Card XXXX>\n",
-            "To Recipient \n",
-            "Date  Mon, XX/XX/XXXX XXXX XXXX\n",
-            "I dont and havent ever had a Discover Checking, Savings, Business Accounts nor Loans of any kind through any Bank called Discover. The 1st time I was contacted by Discover Card I resided alone from XX/XX/XXXX to XX/XX/XXXXat XXXX XXXX XXXX at XXXX XXXX XXXX XXXX XXXX in XXXX, XXXX years prior to me moving here to XXXX, XXXX in XX/XX/XXXX. When \n",
-            "\n",
-            "\n",
-            "Discover Card had 1st contacted me in XXXX, XXXX  it was associated with my XXXX XXXX XXXX website  related  online Merchants Account.  Not once have I ever applied for or had any Website Merchant Accounts here in XXXX; I only applied for online online Merchant Accounts associated with my XXXX related Accounts I purchased while residing in XXXX, XXXX.  Some of my website related  information was stolen both in  XXXX, XXXX and here in XXXX along with my other property that hasn't been returned to me.  I don't and haven't ever had any XXXX XXXX related Agreements,Contracts or Credit Cards offered to Veterans associated with ones businesses.  Nor have I ever applied for or had a Business License or Business Permit in any City or State inspite of my diverse interest.  Not once have I ever allowed another be it an Paralegal, Payee, Attorney, Employers, Landlords, Veteran Organizations including Vocational Rehabilitation Programs, XXXX( XXXX XXXX XXXX, XXXX  XXXX, Entertainment Companies, Banks, Celebrity Personal Assistant Agencies or Celebs, Shelters, Charities, HUD, Housing Arthority, Department of Veteran Affairs, Military, Law Enforcement or anyone else nor their employess to sign any business related Agreements or Contracts on my behalf; not even my family members or friends. \n",
-            "None of my XXXX XXXX attempts were associated with my Employers, Department of Veteran Affairs,Vocational Rehabilitation Programs Military, Landlords, HUD( Housing Authority),Friends, Family nor did I ever sign related Agreements or Contracts with them. Not once had I ever provided anyone the passwords to be able to sign into my accounts rather were aware of my accounts or not.  Yes, my desktop computer that was stolen along with my other property XX/XX/XXXX was registered with my Online Merchant Account.  I had paid for my Merchant related Accounts through my same XXXX XXXX XXXX Account I purchased both of my XXXX XXXX XXXX related accounts through.  That was 1st once during the Summer of XX/XX/XXXX and 2nd my related website months later, while I  resided in XXXX XXXX  and I worked for XXXX. I never offered nor did I ever sign any business Contracts or Agreements with XXXX nor my Landlord or their staff associted with any of my online websites or Merchant Accounts.  My XXXX XXXX XXXX Compensation was deposited into both of my XXXX  XXXX XXXX Accounts at that time.  My account was changed during the Summer of XX/XX/XXXXbecause of theft of my Bank Card. None of my Checking,Savings, past Credit Cards or  Business related  were shared accounts in which others were allowed to \n",
-            "use to make purchases.  I had written checks from my XXXX XXXX XXXX account to pay for my XXXX XXXX XXXX XXXX on the XXXX XXXX here in XXXX in XX/XX/XXXX before it's name changed to XXXX XXXX. Prior to me using my same account open a Checking account in person at XXXX XXXX before it's name was changed to XXXX  XXXX.  Where my XXXX XXXX XXXX XXXX has been deposited since that time. I had used my XXXX XXXX Checking to pay for my XXXX XXXX XXXX XXXX both before theft of my property XX/XX/XXXX and that was also prior to the theft of my property  from my XXXX XXXX XXXX XXXX in XX/XX/XXXX.\n",
-            "I've stated this many times:\n",
-            "I paid for my 1st XXXX XXXX XXXX Membership while employed at XXXX using my XXXX XXXX XXXX account XXXX my XXXX XXXX XXXX XXXX was also deposited.  That was changed to XXXX because I didn't receive my 1st XXXX XXXX XXXX Card the bank sent to XXXX XXXX residence on XXXX XXXX in XX/XX/XXXX while I was there.  In which both my XXXX  salary and XXXX XXXX XXXX XXXX were deposited into my account, no money from XXXX XXXX nor anyone else that was at that residence was given to nor were any of my children there.  Nor did XXXX or any other person at that residence ever give me my missing Bank Card not even after I moved out and stayed a month at XXXX XXXX XXXX using my replacement card to pay for my Hotel room. Which is the same account I used to pay for XXXX XXXX  Membership, XXXX XXXX XXXX, XXXX XXXX  Membership fees, and various online Merchant Account activation related fees.\n",
-            "*  XXXX XXXX XXXX.\n",
-            "XXXX XXXX  XXXX XXXX. Membership\n",
-            "\n",
-            "# XXXX\n",
-            "*  XXXX XXXX Membership\n",
-            "# XXXX\n",
-            "*  Total Merchant Services XXXX and XXXX.\n",
-            "*  XXXX XXXX XXXX XXXX XXXX\n",
-            "* XXXX XXXX changed my $XXXX a month fees to my XXXX  XXXX XXXX account #XXXX.\n",
-            "XX/XX/XXXX - XX/XX/XXXX XXXX XXXX, XXXX.\n",
-            "\n",
-            "Rep: XXXX XXXX XXXX, Fl \n",
-            "XXXX\n",
-            "XXXX Website \n",
-            "XXXX\n",
-            "Software and website owner, I performed Internet advertising and marketing, to promote this software and website. I worked and XXXX from my home XXXX XXXX XXXX XXXX XXXX , XXXX. I purchased XXXX XXXX XXXX-Software Electronic Book CD and was given a website to promote the software on the internet.  The XXXX was given a copy of my website owner certificate document submitted to me when I purchased the software marketing program as well copies of my other school transcripts in addition to XXXX  XXXX XXXX for example. XXXX, represented the first initials of my children's names.  I wasn't ever paid and I'm still owed the money.  Nor did my marketing program have anything to do with any schools, college nor university programs nor did I ever offer or sign any agreement to include it such.  Nor did my XXXX XXXX XXXX have anything to do with any other employers, Department of Family and Children, Military, Veteran Organizations or Food Stamp programs, Section 8 nor Indianapolis Housing Authority for example; only me.\n",
-            "Thank you,\n",
-            "XXXX XXXX\n",
-            "3. ACCORDING TO 15 U.S. CODE 6803-DISCLOSURE OF INSTITUTION PRIVACY POLICY, AND ACCORDING TO U.S. CODE 6802- OBLIGATIONS WITH RESPECT TO DISCLOSURES OF PERSONAL INFORMATION. ( b ) OPT OUT ( 1 ) IN GENERAL A FINANCIAL INSTITUTION MAY NOT DISCLOSE NONPUBLIC PERSONAL INFORMATION TO A NONAFFILIATED THIRD PARTY ( TRANSUNION, XXXX, AND XXXX. ) UNLESS- ( A ) SUCH FINANCIAL INSTITUTION CLEARLY AND CONSPICUOUSLY DISCLOSES TO THE CONSUMER, IN WRITING OR IN ELECTRONIC FORM OR OTHER FORM PERMITTED BY THE REGULATIONS PRESCRIBED UNDER SECTION 6804 OF THIS TITLE. ALSO ACCORDING TO THE \" XXXX  ACT '', FINANCIAL INSTITUTIONS MUST TELL THEIR CUSTOMERS ABOUT THEIR INFORMATION-SHARING PRACTICES AND EXPLAIN TO CUSTOMERS THEIR RIGHT TO \" OPT OUT '' IF THEY DON'T WANT THEIR INFORMATION SHARED WITH CERTAIN THIRD PARTIES. UNDER THE FDCPA, A COLLECTOR MUST PROVIDE YOU WITH INFORMATION ABOUT THE DEBT IN ITS INITIAL COMMUNICATION OR WITHIN FIVE DAYS AFTER THE INITIAL COMMUNICATION. ALSO, THE FDCPA STATES, \" YOU CAN NOT ATTEMPT TO COLLECT AN DEBT WHILE A PERSON ( THE CONSUMER ) SUPRESS VALIDATION. TRANSUNION, XXXX, XXXX, AND THE ACCOUNTS LISTED BELOW HAVE CLEARLY VIOLATED MY RIGHTS : XXXX ACCOUNT # XXXX, XXXX XXXX XXXX ACCOUNT # XXXXXXXX XXXX XXXX XXXX XXXX  ACCOUNT # XXXXXXXX XXXX XXXX XXXX  ACCOUNT # XXXX, XXXX XXXX XXXX XXXX ACCOUNT # XXXX, AND XXXX ACCOUNT # XXXX. FAILURE TO RESPOND SATISFACTORILY WITH DELETIONS OF ALL THE ABOVE ACCOUNTS WILL RESULT IN LEGAL ACTIONS BEING TAKEN AGAINST, TRANSUNION, XXXX, XXXX, WHICH I'LL BE SEEKING A {$1000.00} PER VIOLATION FOR DEFAMATION OF CHARACTER ( PER SE ) NEGLIGENT ENABLEMENT OF IDENTITY FRAUD. 15 USC 1681 VIOLATIONS FOR WILLFUL NONCOMPLIANCE-616 CIVIL LIABILITY FOR WILLFUL NONCOPLIANCE. THIS IS THE THIRD TIME I'VE SUBMITTED A COMPLAINT, AND THE REPONSE I GET IS \" YOU CAN NOT LOCATE MY CREDIT REPORT! '' THIS IS CLEARLY NEGLIGENCE.\n",
-            "4. I do not know how this works, but I need it done or somehow corrected. My name is XXXX XXXX, XXXX XXXX XXXX XXXX TN XXXXMy SS XXXX DOB XXXX. I had some issues with my income being affected by the COVID-19PANDEMICSHUTDOWN. I was under the 1 CARESAct, Pub. L. 116-136, section 4021, codified at FCRAsection 623 ( a ) ( 1 ) ( F ) ( i ) ( I ), 15 U.S.C.1681s- 2 ( a ) ( 1 ) ( F ) ( i ) ( I ). I am requesting some accommodations so I care to protect the integrity of my credit file. US DEPT OF ED / XXXX # XXXX, # XXXX accounts are reporting on XXXX, XXXX The was 30,60, 90 DAYS LATEsince requested assistance due to the pandemic. I found a few accounts that I have never done any business with these companies and the accounts do not belong on my report : XXXX XXXX # XXXX, XXXX XXXX XXXX XXXX # XXXX. \n",
-            "\n",
-            "I have some issues with the misspelling of my name, my correct spelling is XXXX XXXX. Please remove any other variation of my name they are not correct. The following addresses do not belong to me please delete them : XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXXSC, XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX\n",
-            "5. I want to know if this is even legal?! How can they disclose information without knowing its a correct email?!\n",
-            "\n",
-            "comment list 2:\n",
-            "1. Hello, my name is XXXX XXXX, and I am writing to delete the following information in my file. The items I need deleted are listed in the report. I am a victim of identity theft and did not make the charge. I ask that the items be deleted to correct my credit report. I reported the theft of my identity to the Federal Trade Commission and I also have enclosed copies of the Federal Trade Commissions Identity Theft Affidavit. Please delete the items as soon as possible. The accounts are being reported currently open and the accounts need to be closed. \n",
-            "XXXX account number XXXX opened on XX/XX/2022 for the amount {$530.00} XXXX XXXX XXXX  account number XXXX opened on XX/XX/2022 for the amount of {$140.00} The accounts are being reported currently open and need to be closed immediately. \n",
-            "Based on, 15 U.S. Code 1681c2 a consumer reporting agency shall block the reporting of any information in the file of a consumer that the consumer identifies as information that resulted from an alleged identity theft, not later than 4 business days after the date of receipt. This account should not be furnished on my consumer report. As a consumer I am demanding the deletion of the accounts listed IMMEDIATELY.\n",
-            "2. To whom it may concern : My personal information was breach in the internet as result accounts had been open in my name, I was advise to fill out an Id theft report to help me deal with this situation, I have listed each one of the accounts that do not belong to me. This is my second request to remove unverified items in my report, but XXXX keep rposting these account with out providing any type of original document as the FCRA provide, you need to provide me with original documents or remove these account immediately.\n",
-            "3. Ive been Disputting my XXXX XXXX I opened this account and someone got my information and used my card, I contacted XXXX over and over, they removed the negative reporting from my XXXX report but still reporting it negative on my XXXX and Expean this is very unfair to me because Im a victim of identity theft\n",
-            "4. Today, XX/XX/2021, I received three items in the mail, one envelope containing an unsolicited debit card from Navy Federal credit Union and the other two, with a letter each describing The Important Rights on two accounts should these accounts become delinquent under New York law. \n",
-            "\n",
-            "First of all, I never applied for these accounts with Navy Federal, not have I authorized anyone to do so on my behalf. I immediately contacted Navy Federal via phone and was told I was most likely a victim of identity theft and that I should monitor my credit and use a credit monitoring service. I was also asked for my email and mailing information in order to receive a letter from them regarding this issue. \n",
-            "\n",
-            "My main concern is having someone using my identity to illegally open bank accounts and commit fraud, destroying my credit and finances in the process. This bank is in another state from where I reside. I have not lived in Virginia nor do I intend to do so in the foreseeable future.\n",
-            "5. My personal information ( including my SSN, Drivers License Info, Addresses, and more ) was stolen from a hacking, and Equifax did n't tell the public about the hack until more than a month after the hacking. During this time, three Equifax executives were caught inside trading. It really shows how Equifax cares about other people!\n",
-            "\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Build plain-text prompts to send to PaLM 2. Use only 5 complaints from each group.\n",
         "prompt1 = 'comment list 1:\\n'\n",
@@ -1352,100 +516,11 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 37,
+      "execution_count": null,
       "metadata": {
         "id": "BfHGJLirzSvH"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Please highlight the most obvious difference betweenthe two lists of comments:\n",
-            "comment list 1:\n",
-            "1. XXXX is a peer to peer lending conmpany that uses borrowers crypto to collateralize loans from investors ( like myself ). I've been investing with them for almost XXXX years and currently have {$240000.00} tied up in lending products with XXXX. \n",
-            "As of XXXX days ago we received an email saying all business operations have been ceased and no withdrawals or deposits will be allowed. They said they'll update customers within 10 days, but no one can reach anyone at the company to find out any more details as they are not answering calls nor returning emails. It also appears the company has scrubbed its XXXX page and the XXXX pages of top executives. \n",
-            "\n",
-            "All collateral and client 's investment funds are supposedly held at or processed through XXXX XXXX XXXX ( registered SEC company ). XXXX XXXX keeps telling us to contact XXXX and won't give us any information, so we have no way to find out what's happening with our funds/collateral or if everything is gone. We have a XXXX channel up where people are gathering evidence, documentation, etc. This is probably the best place to start to get a broad view of what's happening. Details below. \n",
-            "\n",
-            "XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX  CONST LLC ( Business ID : XXXX ) FoXXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX 'Cease of Operations ' email received by all investors XXXX XX/XX/2022 at XXXX : \" Dear XXXX Users, Given the collapses of several cryptocurrencies so far this year and the rapidly deteriorating market conditions that have been prompting heavy withdrawals across all XXXX lending and XXXX exchange platforms recently, we are sad to inform you that we are unable to continue to operate our business as usual. As such, we are limiting our business activities, including pausing user withdrawals as allowed under our Terms of XXXX. \n",
-            "No deposit or investment request will be processed at this time. \n",
-            "\n",
-            "Our team is working diligently towards our objective of maximizing value for all of our Users, and our top priority continues to be to protect your interests. As we explore all options available to us, we will provide updates to you as we go. \n",
-            "\n",
-            "We hope to communicate with you within the next XXXX business days on the next steps to address the situation. We appreciate your patience in this trying time. \n",
-            "\n",
-            "Sincerely yoursXXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX\n",
-            "2. Submitted XX/XX/XXXX\n",
-            "Typed XX/XX/XXXX:\n",
-            "\n",
-            "XX/XX/XXXX\n",
-            "XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX, XXXX XXXX\n",
-            "PH:. XXXX\n",
-            "PH: XXXX\n",
-            "EM:\n",
-            "XXXX\n",
-            "XXXX\n",
-            "XXXX XXXX \n",
-            "XXXX XXXX\n",
-            "Date of Birth XX/XX/XXXX\n",
-            "SS#: XXXX\n",
-            "TO:\n",
-            "*  Consumer Financial Protection Brueau\n",
-            "*  Department of Veteran Affairs, Office of the Inspector General\n",
-            "My name is XXXX XXXX XXXX,  I've received more than one email from Discover Card in my XXXX XXXX, past emails from Discover Card were unautherized deletions.\n",
-            "From:  Discover Card XXXX\n",
-            "To:  You XXXX\n",
-            "Date:  XX/XX/XXXX, XXXX XXXX XXXX From: Discover Card XXXX>\n",
-            "To Recipient \n",
-            "Date  Mon, XX/XX/XXXX XXXX XXXX\n",
-            "I dont and havent ever had a Discover Checking, Savings, Business Accounts nor Loans of any kind through any Bank called Discover. The 1st time I was contacted by Discover Card I resided alone from XX/XX/XXXX to XX/XX/XXXXat XXXX XXXX XXXX at XXXX XXXX XXXX XXXX XXXX in XXXX, XXXX years prior to me moving here to XXXX, XXXX in XX/XX/XXXX. When \n",
-            "\n",
-            "\n",
-            "Discover Card had 1st contacted me in XXXX, XXXX  it was associated with my XXXX XXXX XXXX website  related  online Merchants Account.  Not once have I ever applied for or had any Website Merchant Accounts here in XXXX; I only applied for online online Merchant Accounts associated with my XXXX related Accounts I purchased while residing in XXXX, XXXX.  Some of my website related  information was stolen both in  XXXX, XXXX and here in XXXX along with my other property that hasn't been returned to me.  I don't and haven't ever had any XXXX XXXX related Agreements,Contracts or Credit Cards offered to Veterans associated with ones businesses.  Nor have I ever applied for or had a Business License or Business Permit in any City or State inspite of my diverse interest.  Not once have I ever allowed another be it an Paralegal, Payee, Attorney, Employers, Landlords, Veteran Organizations including Vocational Rehabilitation Programs, XXXX( XXXX XXXX XXXX, XXXX  XXXX, Entertainment Companies, Banks, Celebrity Personal Assistant Agencies or Celebs, Shelters, Charities, HUD, Housing Arthority, Department of Veteran Affairs, Military, Law Enforcement or anyone else nor their employess to sign any business related Agreements or Contracts on my behalf; not even my family members or friends. \n",
-            "None of my XXXX XXXX attempts were associated with my Employers, Department of Veteran Affairs,Vocational Rehabilitation Programs Military, Landlords, HUD( Housing Authority),Friends, Family nor did I ever sign related Agreements or Contracts with them. Not once had I ever provided anyone the passwords to be able to sign into my accounts rather were aware of my accounts or not.  Yes, my desktop computer that was stolen along with my other property XX/XX/XXXX was registered with my Online Merchant Account.  I had paid for my Merchant related Accounts through my same XXXX XXXX XXXX Account I purchased both of my XXXX XXXX XXXX related accounts through.  That was 1st once during the Summer of XX/XX/XXXX and 2nd my related website months later, while I  resided in XXXX XXXX  and I worked for XXXX. I never offered nor did I ever sign any business Contracts or Agreements with XXXX nor my Landlord or their staff associted with any of my online websites or Merchant Accounts.  My XXXX XXXX XXXX Compensation was deposited into both of my XXXX  XXXX XXXX Accounts at that time.  My account was changed during the Summer of XX/XX/XXXXbecause of theft of my Bank Card. None of my Checking,Savings, past Credit Cards or  Business related  were shared accounts in which others were allowed to \n",
-            "use to make purchases.  I had written checks from my XXXX XXXX XXXX account to pay for my XXXX XXXX XXXX XXXX on the XXXX XXXX here in XXXX in XX/XX/XXXX before it's name changed to XXXX XXXX. Prior to me using my same account open a Checking account in person at XXXX XXXX before it's name was changed to XXXX  XXXX.  Where my XXXX XXXX XXXX XXXX has been deposited since that time. I had used my XXXX XXXX Checking to pay for my XXXX XXXX XXXX XXXX both before theft of my property XX/XX/XXXX and that was also prior to the theft of my property  from my XXXX XXXX XXXX XXXX in XX/XX/XXXX.\n",
-            "I've stated this many times:\n",
-            "I paid for my 1st XXXX XXXX XXXX Membership while employed at XXXX using my XXXX XXXX XXXX account XXXX my XXXX XXXX XXXX XXXX was also deposited.  That was changed to XXXX because I didn't receive my 1st XXXX XXXX XXXX Card the bank sent to XXXX XXXX residence on XXXX XXXX in XX/XX/XXXX while I was there.  In which both my XXXX  salary and XXXX XXXX XXXX XXXX were deposited into my account, no money from XXXX XXXX nor anyone else that was at that residence was given to nor were any of my children there.  Nor did XXXX or any other person at that residence ever give me my missing Bank Card not even after I moved out and stayed a month at XXXX XXXX XXXX using my replacement card to pay for my Hotel room. Which is the same account I used to pay for XXXX XXXX  Membership, XXXX XXXX XXXX, XXXX XXXX  Membership fees, and various online Merchant Account activation related fees.\n",
-            "*  XXXX XXXX XXXX.\n",
-            "XXXX XXXX  XXXX XXXX. Membership\n",
-            "\n",
-            "# XXXX\n",
-            "*  XXXX XXXX Membership\n",
-            "# XXXX\n",
-            "*  Total Merchant Services XXXX and XXXX.\n",
-            "*  XXXX XXXX XXXX XXXX XXXX\n",
-            "* XXXX XXXX changed my $XXXX a month fees to my XXXX  XXXX XXXX account #XXXX.\n",
-            "XX/XX/XXXX - XX/XX/XXXX XXXX XXXX, XXXX.\n",
-            "\n",
-            "Rep: XXXX XXXX XXXX, Fl \n",
-            "XXXX\n",
-            "XXXX Website \n",
-            "XXXX\n",
-            "Software and website owner, I performed Internet advertising and marketing, to promote this software and website. I worked and XXXX from my home XXXX XXXX XXXX XXXX XXXX , XXXX. I purchased XXXX XXXX XXXX-Software Electronic Book CD and was given a website to promote the software on the internet.  The XXXX was given a copy of my website owner certificate document submitted to me when I purchased the software marketing program as well copies of my other school transcripts in addition to XXXX  XXXX XXXX for example. XXXX, represented the first initials of my children's names.  I wasn't ever paid and I'm still owed the money.  Nor did my marketing program have anything to do with any schools, college nor university programs nor did I ever offer or sign any agreement to include it such.  Nor did my XXXX XXXX XXXX have anything to do with any other employers, Department of Family and Children, Military, Veteran Organizations or Food Stamp programs, Section 8 nor Indianapolis Housing Authority for example; only me.\n",
-            "Thank you,\n",
-            "XXXX XXXX\n",
-            "3. ACCORDING TO 15 U.S. CODE 6803-DISCLOSURE OF INSTITUTION PRIVACY POLICY, AND ACCORDING TO U.S. CODE 6802- OBLIGATIONS WITH RESPECT TO DISCLOSURES OF PERSONAL INFORMATION. ( b ) OPT OUT ( 1 ) IN GENERAL A FINANCIAL INSTITUTION MAY NOT DISCLOSE NONPUBLIC PERSONAL INFORMATION TO A NONAFFILIATED THIRD PARTY ( TRANSUNION, XXXX, AND XXXX. ) UNLESS- ( A ) SUCH FINANCIAL INSTITUTION CLEARLY AND CONSPICUOUSLY DISCLOSES TO THE CONSUMER, IN WRITING OR IN ELECTRONIC FORM OR OTHER FORM PERMITTED BY THE REGULATIONS PRESCRIBED UNDER SECTION 6804 OF THIS TITLE. ALSO ACCORDING TO THE \" XXXX  ACT '', FINANCIAL INSTITUTIONS MUST TELL THEIR CUSTOMERS ABOUT THEIR INFORMATION-SHARING PRACTICES AND EXPLAIN TO CUSTOMERS THEIR RIGHT TO \" OPT OUT '' IF THEY DON'T WANT THEIR INFORMATION SHARED WITH CERTAIN THIRD PARTIES. UNDER THE FDCPA, A COLLECTOR MUST PROVIDE YOU WITH INFORMATION ABOUT THE DEBT IN ITS INITIAL COMMUNICATION OR WITHIN FIVE DAYS AFTER THE INITIAL COMMUNICATION. ALSO, THE FDCPA STATES, \" YOU CAN NOT ATTEMPT TO COLLECT AN DEBT WHILE A PERSON ( THE CONSUMER ) SUPRESS VALIDATION. TRANSUNION, XXXX, XXXX, AND THE ACCOUNTS LISTED BELOW HAVE CLEARLY VIOLATED MY RIGHTS : XXXX ACCOUNT # XXXX, XXXX XXXX XXXX ACCOUNT # XXXXXXXX XXXX XXXX XXXX XXXX  ACCOUNT # XXXXXXXX XXXX XXXX XXXX  ACCOUNT # XXXX, XXXX XXXX XXXX XXXX ACCOUNT # XXXX, AND XXXX ACCOUNT # XXXX. FAILURE TO RESPOND SATISFACTORILY WITH DELETIONS OF ALL THE ABOVE ACCOUNTS WILL RESULT IN LEGAL ACTIONS BEING TAKEN AGAINST, TRANSUNION, XXXX, XXXX, WHICH I'LL BE SEEKING A {$1000.00} PER VIOLATION FOR DEFAMATION OF CHARACTER ( PER SE ) NEGLIGENT ENABLEMENT OF IDENTITY FRAUD. 15 USC 1681 VIOLATIONS FOR WILLFUL NONCOMPLIANCE-616 CIVIL LIABILITY FOR WILLFUL NONCOPLIANCE. THIS IS THE THIRD TIME I'VE SUBMITTED A COMPLAINT, AND THE REPONSE I GET IS \" YOU CAN NOT LOCATE MY CREDIT REPORT! '' THIS IS CLEARLY NEGLIGENCE.\n",
-            "4. I do not know how this works, but I need it done or somehow corrected. My name is XXXX XXXX, XXXX XXXX XXXX XXXX TN XXXXMy SS XXXX DOB XXXX. I had some issues with my income being affected by the COVID-19PANDEMICSHUTDOWN. I was under the 1 CARESAct, Pub. L. 116-136, section 4021, codified at FCRAsection 623 ( a ) ( 1 ) ( F ) ( i ) ( I ), 15 U.S.C.1681s- 2 ( a ) ( 1 ) ( F ) ( i ) ( I ). I am requesting some accommodations so I care to protect the integrity of my credit file. US DEPT OF ED / XXXX # XXXX, # XXXX accounts are reporting on XXXX, XXXX The was 30,60, 90 DAYS LATEsince requested assistance due to the pandemic. I found a few accounts that I have never done any business with these companies and the accounts do not belong on my report : XXXX XXXX # XXXX, XXXX XXXX XXXX XXXX # XXXX. \n",
-            "\n",
-            "I have some issues with the misspelling of my name, my correct spelling is XXXX XXXX. Please remove any other variation of my name they are not correct. The following addresses do not belong to me please delete them : XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXXSC, XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX\n",
-            "5. I want to know if this is even legal?! How can they disclose information without knowing its a correct email?!\n",
-            "comment list 2:\n",
-            "1. Hello, my name is XXXX XXXX, and I am writing to delete the following information in my file. The items I need deleted are listed in the report. I am a victim of identity theft and did not make the charge. I ask that the items be deleted to correct my credit report. I reported the theft of my identity to the Federal Trade Commission and I also have enclosed copies of the Federal Trade Commissions Identity Theft Affidavit. Please delete the items as soon as possible. The accounts are being reported currently open and the accounts need to be closed. \n",
-            "XXXX account number XXXX opened on XX/XX/2022 for the amount {$530.00} XXXX XXXX XXXX  account number XXXX opened on XX/XX/2022 for the amount of {$140.00} The accounts are being reported currently open and need to be closed immediately. \n",
-            "Based on, 15 U.S. Code 1681c2 a consumer reporting agency shall block the reporting of any information in the file of a consumer that the consumer identifies as information that resulted from an alleged identity theft, not later than 4 business days after the date of receipt. This account should not be furnished on my consumer report. As a consumer I am demanding the deletion of the accounts listed IMMEDIATELY.\n",
-            "2. To whom it may concern : My personal information was breach in the internet as result accounts had been open in my name, I was advise to fill out an Id theft report to help me deal with this situation, I have listed each one of the accounts that do not belong to me. This is my second request to remove unverified items in my report, but XXXX keep rposting these account with out providing any type of original document as the FCRA provide, you need to provide me with original documents or remove these account immediately.\n",
-            "3. Ive been Disputting my XXXX XXXX I opened this account and someone got my information and used my card, I contacted XXXX over and over, they removed the negative reporting from my XXXX report but still reporting it negative on my XXXX and Expean this is very unfair to me because Im a victim of identity theft\n",
-            "4. Today, XX/XX/2021, I received three items in the mail, one envelope containing an unsolicited debit card from Navy Federal credit Union and the other two, with a letter each describing The Important Rights on two accounts should these accounts become delinquent under New York law. \n",
-            "\n",
-            "First of all, I never applied for these accounts with Navy Federal, not have I authorized anyone to do so on my behalf. I immediately contacted Navy Federal via phone and was told I was most likely a victim of identity theft and that I should monitor my credit and use a credit monitoring service. I was also asked for my email and mailing information in order to receive a letter from them regarding this issue. \n",
-            "\n",
-            "My main concern is having someone using my identity to illegally open bank accounts and commit fraud, destroying my credit and finances in the process. This bank is in another state from where I reside. I have not lived in Virginia nor do I intend to do so in the foreseeable future.\n",
-            "5. My personal information ( including my SSN, Drivers License Info, Addresses, and more ) was stolen from a hacking, and Equifax did n't tell the public about the hack until more than a month after the hacking. During this time, three Equifax executives were caught inside trading. It really shows how Equifax cares about other people!\n",
-            "\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "# The plain English request we will make of PaLM 2\n",
         "prompt = (\n",
@@ -1465,42 +540,20 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 38,
+      "execution_count": null,
       "metadata": {
         "id": "mL5P0_3X04dE"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Query job 66e3af22-91cb-400a-92c3-69e7cd12ee01 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:66e3af22-91cb-400a-92c3-69e7cd12ee01&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
-      ],
+      "outputs": [],
       "source": [
         "from bigframes.ml.llm import PaLM2TextGenerator\n",
         "\n",
-<<<<<<< HEAD
         "q_a_model = PaLM2TextGenerator()"
-=======
-        "# Create a BigQuery Cloud resource connection\n",
-        "CONN_NAME = \"bqdf-llm\"\n",
-        "session = bf.get_global_session()\n",
-        "\n",
-        "connection = f\"{PROJECT_ID}.{REGION}.{CONN_NAME}\"\n",
-        "q_a_model = PaLM2TextGenerator(session=session, connection_name=connection)"
->>>>>>> origin/lmm-kmeans-notebook
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 39,
+      "execution_count": null,
       "metadata": {
         "id": "ICWHsqAW1FNk"
       },
@@ -1512,58 +565,11 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 40,
+      "execution_count": null,
       "metadata": {
         "id": "gB7e1LXU1pst"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Query job 653add17-29be-408c-8882-064217f8556e is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:653add17-29be-408c-8882-064217f8556e&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job 8fd16954-853a-45fd-80bc-65b1242429e2 is DONE. 8 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8fd16954-853a-45fd-80bc-65b1242429e2&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job d9929bcb-26ce-4844-b68e-f4a980b90ede is DONE. 171 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d9929bcb-26ce-4844-b68e-f4a980b90ede&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/plain": [
-              "' The first comment list is about people complaining about companies or services, while the second comment list is about people reporting identity theft or fraud.'"
-            ]
-          },
-          "execution_count": 40,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Send the request for PaLM 2 to generate a response to our prompt\n",
         "major_difference = q_a_model.predict(df)\n",
@@ -1585,11 +591,7 @@
       "source": [
         "# Summary and next steps\n",
         "\n",
-<<<<<<< HEAD
         "You've used the ML and LLM capabilities of BigQuery DataFrames to help analyze and understand a large dataset of unstructured feedback.\n",
-=======
-        "You've used BigQuery DataFrames' integration with LLM models (`bigframes.ml.llm`) to generate code samples, and have tranformed LLM output by creating and using a custom function in BigQuery DataFrames.\n",
->>>>>>> origin/lmm-kmeans-notebook
         "\n",
         "Learn more about BigQuery DataFrames in the [documentation](https://cloud.google.com/python/docs/reference/bigframes/latest) and find more sample notebooks in the [GitHub repo](https://github.com/googleapis/python-bigquery-dataframes/tree/main/notebooks)."
       ]

From 81125f9505ad98e89939769a8e1fcf30518705f0 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Thu, 16 Nov 2023 15:58:14 -0800
Subject: [PATCH 04/26] feat: send warnings on LLM prediction partial failures
 (#216)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/ml/llm.py | 78 ++++++++++++++++++++++++++++-----------------
 1 file changed, 49 insertions(+), 29 deletions(-)

diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 93e2ba825f..78f3369daf 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -17,6 +17,7 @@
 from __future__ import annotations
 
 from typing import cast, Literal, Optional, Union
+import warnings
 
 import bigframes
 from bigframes import clients, constants
@@ -24,15 +25,22 @@
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 
-_REMOTE_TEXT_GENERATOR_MODEL_ENDPOINT = "text-bison"
-_REMOTE_TEXT_GENERATOR_32K_MODEL_ENDPOINT = "text-bison-32k"
-_TEXT_GENERATE_RESULT_COLUMN = "ml_generate_text_llm_result"
+_TEXT_GENERATOR_BISON_ENDPOINT = "text-bison"
+_TEXT_GENERATOR_BISON_32K_ENDPOINT = "text-bison-32k"
+_TEXT_GENERATOR_ENDPOINTS = (
+    _TEXT_GENERATOR_BISON_ENDPOINT,
+    _TEXT_GENERATOR_BISON_32K_ENDPOINT,
+)
 
-_REMOTE_EMBEDDING_GENERATOR_MODEL_ENDPOINT = "textembedding-gecko"
-_REMOTE_EMBEDDING_GENERATOR_MUlTILINGUAL_MODEL_ENDPOINT = (
-    "textembedding-gecko-multilingual"
+_EMBEDDING_GENERATOR_GECKO_ENDPOINT = "textembedding-gecko"
+_EMBEDDING_GENERATOR_GECKO_MULTILINGUAL_ENDPOINT = "textembedding-gecko-multilingual"
+_EMBEDDING_GENERATOR_ENDPOINTS = (
+    _EMBEDDING_GENERATOR_GECKO_ENDPOINT,
+    _EMBEDDING_GENERATOR_GECKO_MULTILINGUAL_ENDPOINT,
 )
-_EMBED_TEXT_RESULT_COLUMN = "text_embedding"
+
+_ML_GENERATE_TEXT_STATUS = "ml_generate_text_status"
+_ML_EMBED_TEXT_STATUS = "ml_embed_text_status"
 
 
 class PaLM2TextGenerator(base.Predictor):
@@ -90,18 +98,16 @@ def _create_bqml_model(self):
             connection_id=connection_name_parts[2],
             iam_role="aiplatform.user",
         )
-        if self.model_name == _REMOTE_TEXT_GENERATOR_MODEL_ENDPOINT:
-            options = {
-                "endpoint": _REMOTE_TEXT_GENERATOR_MODEL_ENDPOINT,
-            }
-        elif self.model_name == _REMOTE_TEXT_GENERATOR_32K_MODEL_ENDPOINT:
-            options = {
-                "endpoint": _REMOTE_TEXT_GENERATOR_32K_MODEL_ENDPOINT,
-            }
-        else:
+
+        if self.model_name not in _TEXT_GENERATOR_ENDPOINTS:
             raise ValueError(
-                f"Model name {self.model_name} is not supported. We only support {_REMOTE_TEXT_GENERATOR_MODEL_ENDPOINT} and {_REMOTE_TEXT_GENERATOR_32K_MODEL_ENDPOINT}."
+                f"Model name {self.model_name} is not supported. We only support {', '.join(_TEXT_GENERATOR_ENDPOINTS)}."
             )
+
+        options = {
+            "endpoint": self.model_name,
+        }
+
         return self._bqml_model_factory.create_remote_model(
             session=self.session, connection_name=self.connection_name, options=options
         )
@@ -182,7 +188,16 @@ def predict(
             "top_p": top_p,
             "flatten_json_output": True,
         }
-        return self._bqml_model.generate_text(X, options)
+
+        df = self._bqml_model.generate_text(X, options)
+
+        if (df[_ML_GENERATE_TEXT_STATUS] != "").any():
+            warnings.warn(
+                f"Some predictions failed. Check column {_ML_GENERATE_TEXT_STATUS} for detailed status. You may want to filter the failed rows and retry.",
+                RuntimeWarning,
+            )
+
+        return df
 
 
 class PaLM2TextEmbeddingGenerator(base.Predictor):
@@ -241,19 +256,15 @@ def _create_bqml_model(self):
             connection_id=connection_name_parts[2],
             iam_role="aiplatform.user",
         )
-        if self.model_name == "textembedding-gecko":
-            options = {
-                "endpoint": _REMOTE_EMBEDDING_GENERATOR_MODEL_ENDPOINT,
-            }
-        elif self.model_name == _REMOTE_EMBEDDING_GENERATOR_MUlTILINGUAL_MODEL_ENDPOINT:
-            options = {
-                "endpoint": _REMOTE_EMBEDDING_GENERATOR_MUlTILINGUAL_MODEL_ENDPOINT,
-            }
-        else:
+
+        if self.model_name not in _EMBEDDING_GENERATOR_ENDPOINTS:
             raise ValueError(
-                f"Model name {self.model_name} is not supported. We only support {_REMOTE_EMBEDDING_GENERATOR_MODEL_ENDPOINT} and {_REMOTE_EMBEDDING_GENERATOR_MUlTILINGUAL_MODEL_ENDPOINT}."
+                f"Model name {self.model_name} is not supported. We only support {', '.join(_EMBEDDING_GENERATOR_ENDPOINTS)}."
             )
 
+        options = {
+            "endpoint": self.model_name,
+        }
         return self._bqml_model_factory.create_remote_model(
             session=self.session, connection_name=self.connection_name, options=options
         )
@@ -284,4 +295,13 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
         options = {
             "flatten_json_output": True,
         }
-        return self._bqml_model.generate_text_embedding(X, options)
+
+        df = self._bqml_model.generate_text_embedding(X, options)
+
+        if (df[_ML_EMBED_TEXT_STATUS] != "").any():
+            warnings.warn(
+                f"Some predictions failed. Check column {_ML_EMBED_TEXT_STATUS} for detailed status. You may want to filter the failed rows and retry.",
+                RuntimeWarning,
+            )
+
+        return df

From 52dfad281def82548751a276ce42b087dbb09f9a Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Fri, 17 Nov 2023 21:42:14 +0000
Subject: [PATCH 05/26] docs: code samples for `Series.where` and `Series.mask`
 (#217)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated
  - `Series.where`: https://screenshot.googleplex.com/9XWHpMnwrzVPF9G
  - `Series.mask`: https://screenshot.googleplex.com/4cPvvzoVaVzoCDD

Fixes internal issue 310981880 🦕
---
 .../bigframes_vendored/pandas/core/series.py  | 114 ++++++++++++++++++
 1 file changed, 114 insertions(+)

diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index c6d98075f5..01175dc0ef 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -1696,6 +1696,49 @@ def kurt(self):
     def where(self, cond, other):
         """Replace values where the condition is False.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([10, 11, 12, 13, 14])
+            >>> s
+            0    10
+            1    11
+            2    12
+            3    13
+            4    14
+            dtype: Int64
+
+        You can filter the values in the Series based on a condition. The values
+        matching the condition would be kept, and not matching would be replaced.
+        The default replacement value is ``NA``.
+
+            >>> s.where(s % 2 == 0)
+            0      10
+            1    <NA>
+            2      12
+            3    <NA>
+            4      14
+            dtype: Int64
+
+        You can specify a custom replacement value for non-matching values.
+
+            >>> s.where(s % 2 == 0, -1)
+            0    10
+            1    -1
+            2    12
+            3    -1
+            4    14
+            dtype: Int64
+            >>> s.where(s % 2 == 0, 100*s)
+            0      10
+            1    1100
+            2      12
+            3    1300
+            4      14
+            dtype: Int64
+
         Args:
             cond (bool Series/DataFrame, array-like, or callable):
                 Where cond is True, keep the original value. Where False, replace
@@ -1720,6 +1763,77 @@ def where(self, cond, other):
     def mask(self, cond, other):
         """Replace values where the condition is True.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([10, 11, 12, 13, 14])
+            >>> s
+            0    10
+            1    11
+            2    12
+            3    13
+            4    14
+            dtype: Int64
+
+        You can mask the values in the Series based on a condition. The values
+        matching the condition would be masked.
+
+            >>> s.mask(s % 2 == 0)
+            0    <NA>
+            1      11
+            2    <NA>
+            3      13
+            4    <NA>
+            dtype: Int64
+
+        You can specify a custom mask value.
+
+            >>> s.mask(s % 2 == 0, -1)
+            0    -1
+            1    11
+            2    -1
+            3    13
+            4    -1
+            dtype: Int64
+            >>> s.mask(s % 2 == 0, 100*s)
+            0    1000
+            1      11
+            2    1200
+            3      13
+            4    1400
+            dtype: Int64
+
+        You can also use a remote function to evaluate the mask condition. This
+        is useful in situation such as the following, where the mask
+        condition is evaluated based on a complicated business logic which cannot
+        be expressed in form of a Series.
+
+            >>> @bpd.remote_function([str], bool, reuse=False)
+            ... def should_mask(name):
+            ...     hash = 0
+            ...     for char_ in name:
+            ...         hash += ord(char_)
+            ...     return hash % 2 == 0
+
+            >>> s = bpd.Series(["Alice", "Bob", "Caroline"])
+            >>> s
+            0       Alice
+            1         Bob
+            2    Caroline
+            dtype: string
+            >>> s.mask(should_mask)
+            0        <NA>
+            1         Bob
+            2    Caroline
+            dtype: string
+            >>> s.mask(should_mask, "REDACTED")
+            0    REDACTED
+            1         Bob
+            2    Caroline
+            dtype: string
+
         Args:
             cond (bool Series/DataFrame, array-like, or callable):
                 Where cond is False, keep the original value. Where True, replace

From a18d40e808ee0822d21715cc3e8f794c418aeebc Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Fri, 17 Nov 2023 14:42:15 -0800
Subject: [PATCH 06/26] fix: avoid unnecessary row_number() on sort key for io
 (#211)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/core/__init__.py         | 12 ++++++++----
 bigframes/core/compile/compiled.py | 29 ++++++++++++++++++++---------
 bigframes/dataframe.py             | 12 ++++--------
 3 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index b476961bdc..e19fec8f3f 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -125,14 +125,18 @@ def to_sql(
         col_id_overrides: typing.Mapping[str, str] = {},
         sorted: bool = False,
     ) -> str:
-        if sorted or offset_column:
-            return self._compile_ordered().to_sql(
-                offset_column=offset_column,
+        array_value = self
+        if offset_column:
+            array_value = self.promote_offsets(offset_column)
+        if sorted:
+            return array_value._compile_ordered().to_sql(
                 col_id_overrides=col_id_overrides,
                 sorted=sorted,
             )
         else:
-            return self._compile_unordered().to_sql(col_id_overrides=col_id_overrides)
+            return array_value._compile_unordered().to_sql(
+                col_id_overrides=col_id_overrides
+            )
 
     def start_query(
         self,
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 78050ed4f0..461c2c005a 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -1031,31 +1031,42 @@ def _reproject_to_table(self) -> OrderedIR:
 
     def to_sql(
         self,
-        offset_column: typing.Optional[str] = None,
         col_id_overrides: typing.Mapping[str, str] = {},
         sorted: bool = False,
     ) -> str:
-        offsets_id = offset_column or ORDER_ID_COLUMN
-
         sql = ibis_bigquery.Backend().compile(
             self._to_ibis_expr(
-                ordering_mode="offset_col"
-                if (offset_column or sorted)
-                else "unordered",
-                order_col_name=offsets_id,
+                ordering_mode="unordered",
                 col_id_overrides=col_id_overrides,
+                expose_hidden_cols=sorted,
             )
         )
         if sorted:
+            output_columns = [
+                col_id_overrides.get(col) if (col in col_id_overrides) else col
+                for col in self.column_ids
+            ]
+            selection = ", ".join(map(lambda col_id: f"`{col_id}`", output_columns))
+            order_by_clause = self._ordering_clause(self._ordering.all_ordering_columns)
+
             sql = textwrap.dedent(
-                f"SELECT * EXCEPT (`{offsets_id}`)\n"
+                f"SELECT {selection}\n"
                 "FROM (\n"
                 f"{sql}\n"
                 ")\n"
-                f"ORDER BY `{offsets_id}`\n"
+                f"{order_by_clause}\n"
             )
         return typing.cast(str, sql)
 
+    def _ordering_clause(self, ordering: Iterable[OrderingColumnReference]) -> str:
+        parts = []
+        for col_ref in ordering:
+            asc_desc = "ASC" if col_ref.direction.is_ascending else "DESC"
+            null_clause = "NULLS LAST" if col_ref.na_last else "NULLS FIRST"
+            part = f"`{col_ref.column_id}` {asc_desc} {null_clause}"
+            parts.append(part)
+        return f"ORDER BY {' ,'.join(parts)}"
+
     def _to_ibis_expr(
         self,
         *,
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 57b4ca42cf..1f1275e217 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2577,14 +2577,10 @@ def _create_io_query(self, index: bool, ordering_id: Optional[str]) -> str:
         }
 
         if ordering_id is not None:
-            return array_value.to_sql(
-                offset_column=ordering_id,
-                col_id_overrides=id_overrides,
-            )
-        else:
-            return array_value.to_sql(
-                col_id_overrides=id_overrides,
-            )
+            array_value = array_value.promote_offsets(ordering_id)
+        return array_value.to_sql(
+            col_id_overrides=id_overrides,
+        )
 
     def _run_io_query(
         self,

From 010486c3494e05d714da6cc7d51514518d9ae1ea Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Fri, 17 Nov 2023 15:38:14 -0800
Subject: [PATCH 07/26] docs: add code samples for df reshaping, function,
 merge, and join methods (#203)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #310245117
--


<br class="Apple-interchange-newline">[310245117](https://b.corp.google.com/issues/310245117) 🦕
---
 .../bigframes_vendored/pandas/core/frame.py   | 218 +++++++++++++++++-
 1 file changed, 217 insertions(+), 1 deletion(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index b35d0f3b2e..8033c064d7 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -2121,6 +2121,59 @@ def groupby(
         used to group large amounts of data and compute operations on these
         groups.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'Animal': ['Falcon', 'Falcon',
+            ...                                'Parrot', 'Parrot'],
+            ...                     'Max Speed': [380., 370., 24., 26.]})
+            >>> df
+               Animal  Max Speed
+            0  Falcon      380.0
+            1  Falcon      370.0
+            2  Parrot       24.0
+            3  Parrot       26.0
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+            >>> df.groupby(['Animal'])['Max Speed'].mean()
+            Animal
+            Falcon    375.0
+            Parrot     25.0
+            Name: Max Speed, dtype: Float64
+
+        We can also choose to include NA in group keys or not by setting `dropna`:
+
+            >>> df = bpd.DataFrame([[1, 2, 3],[1, None, 4], [2, 1, 3], [1, 2, 2]],
+            ...                    columns=["a", "b", "c"])
+            >>> df.groupby(by=["b"]).sum()
+                 a  c
+            b
+            1.0  2  3
+            2.0  2  5
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df.groupby(by=["b"], dropna=False).sum()
+                  a  c
+            b
+            1.0   2  3
+            2.0   2  5
+            <NA>  1  4
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        We can also choose to return object with group labels or not by setting `as_index`:
+
+            >>> df.groupby(by=["b"], as_index=False).sum()
+                 b  a  c
+            0  1.0  2  3
+            1  2.0  2  5
+            <BLANKLINE>
+            [2 rows x 3 columns]
+
         Args:
             by (str, Sequence[str]):
                 A label or list of labels may be passed to group by the columns
@@ -2224,7 +2277,7 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame:
                 Python function wrapped by ``remote_function`` decorator,
                 returns a single value from a single value.
             na_action (Optional[str], default None):
-                ``{None, 'ignore'}``, default None. If ‘ignore’, propagate NaN
+                ``{None, 'ignore'}``, default None. If `ignore`, propagate NaN
                 values, without passing them to func.
 
         Returns:
@@ -2240,6 +2293,74 @@ def join(self, other, *, on: Optional[str] = None, how: str) -> DataFrame:
 
         Join columns with `other` DataFrame on index
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        Join two DataFrames by specifying how to handle the operation:
+
+            >>> df1 = bpd.DataFrame({'col1': ['foo', 'bar'], 'col2': [1, 2]}, index=[10, 11])
+            >>> df1
+               col1  col2
+            10  foo     1
+            11  bar     2
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df2 = bpd.DataFrame({'col3': ['foo', 'baz'], 'col4': [3, 4]}, index=[11, 22])
+            >>> df2
+               col3  col4
+            11  foo     3
+            22  baz     4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df1.join(df2)
+               col1  col2  col3  col4
+            10  foo     1  <NA>  <NA>
+            11  bar     2   foo     3
+            <BLANKLINE>
+            [2 rows x 4 columns]
+
+            >>> df1.join(df2, how="left")
+               col1  col2  col3  col4
+            10  foo     1  <NA>  <NA>
+            11  bar     2   foo     3
+            <BLANKLINE>
+            [2 rows x 4 columns]
+
+            >>> df1.join(df2, how="right")
+                col1  col2 col3  col4
+            11  bar      2  foo     3
+            22  <NA>  <NA>  baz     4
+            <BLANKLINE>
+            [2 rows x 4 columns]
+
+            >>> df1.join(df2, how="outer")
+                col1  col2  col3  col4
+            10   foo     1  <NA>  <NA>
+            11   bar     2   foo     3
+            22  <NA>  <NA>   baz     4
+            <BLANKLINE>
+            [3 rows x 4 columns]
+
+            >>> df1.join(df2, how="inner")
+               col1  col2 col3  col4
+            11  bar     2  foo     3
+            <BLANKLINE>
+            [1 rows x 4 columns]
+
+
+        Another option to join using the key columns is to use the on parameter:
+
+            >>> df1.join(df2, on="col1", how="right")
+                  col1  col2 col3  col4
+            <NA>    11  <NA>  foo     3
+            <NA>    22  <NA>  baz     4
+            <BLANKLINE>
+            [2 rows x 4 columns]
+
         Args:
             other:
                 DataFrame with an Index similar to the Index of this one.
@@ -2292,6 +2413,78 @@ def merge(
             rows will be matched against each other. This is different from usual SQL
             join behaviour and can lead to unexpected results.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        Merge DataFrames df1 and df2 by specifiying type of merge:
+
+            >>> df1 = bpd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
+            >>> df1
+                 a  b
+            0  foo  1
+            1  bar  2
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df2 = bpd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
+            >>> df2
+                 a  c
+            0  foo  3
+            1  baz  4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df1.merge(df2, how="inner", on="a")
+                 a  b  c
+            0  foo  1  3
+            <BLANKLINE>
+            [1 rows x 3 columns]
+
+            >>> df1.merge(df2, how='left', on='a')
+                 a  b     c
+            0  foo  1     3
+            1  bar  2  <NA>
+            <BLANKLINE>
+            [2 rows x 3 columns]
+
+        Merge df1 and df2 on the lkey and rkey columns. The value columns have
+        the default suffixes, _x and _y, appended.
+
+            >>> df1 = bpd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
+            ...                     'value': [1, 2, 3, 5]})
+            >>> df1
+              lkey  value
+            0  foo      1
+            1  bar      2
+            2  baz      3
+            3  foo      5
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+            >>> df2 = bpd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'],
+            ...                     'value': [5, 6, 7, 8]})
+            >>> df2
+              rkey  value
+            0  foo      5
+            1  bar      6
+            2  baz      7
+            3  foo      8
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+            >>> df1.merge(df2, left_on='lkey', right_on='rkey')
+              lkey  value_x rkey  value_y
+            0  foo        1  foo        5
+            1  foo        1  foo        8
+            2  bar        2  bar        6
+            3  baz        3  baz        7
+            4  foo        5  foo        5
+            5  foo        5  foo        8
+            <BLANKLINE>
+            [6 rows x 4 columns]
+
         Args:
             right:
                 Object to merge with.
@@ -2342,6 +2535,29 @@ def apply(self, func, *, args=(), **kwargs):
         the DataFrame's index (``axis=0``) the final return type
         is inferred from the return type of the applied function.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+            >>> df
+            col1	col2
+            0	1	3
+            1	2	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> def sqaure(x):
+            ...     return x * x
+            >>> df1 = df.apply(sqaure)
+            >>> df
+               col1  col2
+            0     1     3
+            1     2     4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
         Args:
             func (function):
                 Function to apply to each column or row.

From dd78acb174545ba292776a642afcec46f8ee4a2a Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Mon, 20 Nov 2023 10:36:14 -0800
Subject: [PATCH 08/26] fix: dedup special character (#209)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: dedup special character

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 tests/unit/core/test_bf_utils.py                   |  6 +++---
 third_party/bigframes_vendored/pandas/io/common.py | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/unit/core/test_bf_utils.py b/tests/unit/core/test_bf_utils.py
index fc34f35d9c..10ce1fd09e 100644
--- a/tests/unit/core/test_bf_utils.py
+++ b/tests/unit/core/test_bf_utils.py
@@ -25,7 +25,7 @@ def test_get_standardized_ids_columns():
         "0",
         utils.UNNAMED_COLUMN_ID,
         "duplicate",
-        "duplicate.1",
+        "duplicate_1",
         "with_space",
     ]
     assert idx_ids == []
@@ -37,13 +37,13 @@ def test_get_standardized_ids_indexes():
 
     col_ids, idx_ids = utils.get_standardized_ids(col_labels, idx_labels)
 
-    assert col_ids == ["duplicate.2"]
+    assert col_ids == ["duplicate_2"]
     assert idx_ids == [
         "string",
         "0",
         utils.UNNAMED_INDEX_ID,
         "duplicate",
-        "duplicate.1",
+        "duplicate_1",
         "with_space",
     ]
 
diff --git a/third_party/bigframes_vendored/pandas/io/common.py b/third_party/bigframes_vendored/pandas/io/common.py
index 506984e64d..e186f02b5b 100644
--- a/third_party/bigframes_vendored/pandas/io/common.py
+++ b/third_party/bigframes_vendored/pandas/io/common.py
@@ -13,13 +13,13 @@ def dedup_names(
     """
     Rename column names if duplicates exist.
 
-    Currently the renaming is done by appending a period and an autonumeric,
-    but a custom pattern may be supported in the future.
+    Currently the renaming is done by appending a underscore and an
+    autonumeric, but a custom pattern may be supported in the future.
 
     Examples
     ```
     dedup_names(["x", "y", "x", "x"], is_potential_multiindex=False)
-    ['x', 'y', 'x.1', 'x.2']
+    ['x', 'y', 'x_1', 'x_2']
     ```
     """
     names = list(names)  # so we can index
@@ -34,9 +34,9 @@ def dedup_names(
             if is_potential_multiindex:
                 # for mypy
                 assert isinstance(col, tuple)
-                col = col[:-1] + (f"{col[-1]}.{cur_count}",)
+                col = col[:-1] + (f"{col[-1]}_{cur_count}",)
             else:
-                col = f"{col}.{cur_count}"
+                col = f"{col}_{cur_count}"
             cur_count = counts[col]
 
         names[i] = col

From c88d38e69682f4c620174086b8f16f4780c04811 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Mon, 20 Nov 2023 23:02:15 +0000
Subject: [PATCH 09/26] docs: add code samples for `index` and `column`
 properties (#212)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated
  - `DataFrame.columns`: https://screenshot.googleplex.com/3Bwdb482FBfEsi2
  - `DataFrame.index`: https://screenshot.googleplex.com/4iJymH3FxMn8Hhb
  - `Series.index`: https://screenshot.googleplex.com/7MXQcuASbQ3c8s5

Fixes internal issue 310260952 🦕
---
 .../bigframes_vendored/pandas/core/frame.py   | 79 ++++++++++++++++++-
 .../bigframes_vendored/pandas/core/series.py  | 49 +++++++++++-
 2 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 8033c064d7..f448ad7939 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3106,6 +3106,47 @@ def index(self):
         index is used for label-based access and alignment, and can be accessed
         or modified using this attribute.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        You can access the index of a DataFrame via ``index`` property.
+
+            >>> df = bpd.DataFrame({'Name': ['Alice', 'Bob', 'Aritra'],
+            ...                     'Age': [25, 30, 35],
+            ...                     'Location': ['Seattle', 'New York', 'Kona']},
+            ...                    index=([10, 20, 30]))
+            >>> df
+                Name  Age  Location
+            10   Alice   25   Seattle
+            20     Bob   30  New York
+            30  Aritra   35      Kona
+            <BLANKLINE>
+            [3 rows x 3 columns]
+            >>> df.index # doctest: +ELLIPSIS
+            <bigframes.core.indexes.index.Index object at ...>
+            >>> df.index.values
+            array([10, 20, 30], dtype=object)
+
+        Let's try setting a new index for the dataframe and see that reflect via
+        ``index`` property.
+
+            >>> df1 = df.set_index(["Name", "Location"])
+            >>> df1
+                            Age
+            Name   Location
+            Alice  Seattle    25
+            Bob    New York   30
+            Aritra Kona       35
+            <BLANKLINE>
+            [3 rows x 1 columns]
+            >>> df1.index # doctest: +ELLIPSIS
+            <bigframes.core.indexes.index.Index object at ...>
+            >>> df1.index.values
+            array([('Alice', 'Seattle'), ('Bob', 'New York'), ('Aritra', 'Kona')],
+                dtype=object)
+
         Returns:
             The index labels of the DataFrame.
         """
@@ -3113,7 +3154,43 @@ def index(self):
 
     @property
     def columns(self):
-        "The column labels of the DataFrame."
+        """The column labels of the DataFrame.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        You can access the column labels of a DataFrame via ``columns`` property.
+
+            >>> df = bpd.DataFrame({'Name': ['Alice', 'Bob', 'Aritra'],
+            ...                     'Age': [25, 30, 35],
+            ...                     'Location': ['Seattle', 'New York', 'Kona']},
+            ...                    index=([10, 20, 30]))
+            >>> df
+                  Name  Age  Location
+            10   Alice   25   Seattle
+            20     Bob   30  New York
+            30  Aritra   35      Kona
+            <BLANKLINE>
+            [3 rows x 3 columns]
+            >>> df.columns
+            Index(['Name', 'Age', 'Location'], dtype='object')
+
+        You can also set new labels for columns.
+
+            >>> df.columns = ["NewName", "NewAge", "NewLocation"]
+            >>> df
+               NewName  NewAge NewLocation
+            10   Alice      25     Seattle
+            20     Bob      30    New York
+            30  Aritra      35        Kona
+            <BLANKLINE>
+            [3 rows x 3 columns]
+            >>> df.columns
+            Index(['NewName', 'NewAge', 'NewLocation'], dtype='object')
+
+        """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def value_counts(
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 01175dc0ef..a86765a412 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -44,7 +44,54 @@ def struct(self):
 
     @property
     def index(self):
-        """The index (axis labels) of the Series."""
+        """The index (axis labels) of the Series.
+
+        The index of a Series is used to label and identify each element of the
+        underlying data. The index can be thought of as an immutable ordered set
+        (technically a multi-set, as it may contain duplicate labels), and is
+        used to index and align data.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        You can access the index of a Series via ``index`` property.
+
+            >>> df = bpd.DataFrame({'Name': ['Alice', 'Bob', 'Aritra'],
+            ...                     'Age': [25, 30, 35],
+            ...                     'Location': ['Seattle', 'New York', 'Kona']},
+            ...                    index=([10, 20, 30]))
+            >>> s = df["Age"]
+            >>> s
+            10    25
+            20    30
+            30    35
+            Name: Age, dtype: Int64
+            >>> s.index # doctest: +ELLIPSIS
+            <bigframes.core.indexes.index.Index object at ...>
+            >>> s.index.values
+            array([10, 20, 30], dtype=object)
+
+        Let's try setting a multi-index case reflect via ``index`` property.
+
+            >>> df1 = df.set_index(["Name", "Location"])
+            >>> s1 = df1["Age"]
+            >>> s1
+            Name    Location
+            Alice   Seattle     25
+            Bob     New York    30
+            Aritra  Kona        35
+            Name: Age, dtype: Int64
+            >>> s1.index # doctest: +ELLIPSIS
+            <bigframes.core.indexes.index.Index object at ...>
+            >>> s1.index.values
+            array([('Alice', 'Seattle'), ('Bob', 'New York'), ('Aritra', 'Kona')],
+                dtype=object)
+
+        Returns:
+            The index labels of the Series.
+        """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     @property

From 1d1477158b5a8e84d099e07c6f566182a1abd7fc Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 21 Nov 2023 16:16:25 +0000
Subject: [PATCH 10/26] test: re-enable `system_prerelease` tests (#221)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* test: re-enable `system_prerelease` tests

* exclude ibis from prerelease install list

* install explicit 6.2.0 version for ibis in pre prelease

* add unit_prerelease to pre and post submit e2e tests

* Update noxfile.py

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Tim Swast <swast@google.com>
Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 .kokoro/continuous/e2e.cfg |  2 +-
 .kokoro/presubmit/e2e.cfg  |  2 +-
 noxfile.py                 | 50 ++++++++++++++++++++++++++++++++------
 3 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/.kokoro/continuous/e2e.cfg b/.kokoro/continuous/e2e.cfg
index d875f36060..2f93a58212 100644
--- a/.kokoro/continuous/e2e.cfg
+++ b/.kokoro/continuous/e2e.cfg
@@ -3,5 +3,5 @@
 # Only run this nox session.
 env_vars: {
     key: "NOX_SESSION"
-    value: "system_noextras e2e notebook samples"
+    value: "unit_prerelease system_prerelease system_noextras e2e notebook samples"
 }
diff --git a/.kokoro/presubmit/e2e.cfg b/.kokoro/presubmit/e2e.cfg
index d875f36060..2f93a58212 100644
--- a/.kokoro/presubmit/e2e.cfg
+++ b/.kokoro/presubmit/e2e.cfg
@@ -3,5 +3,5 @@
 # Only run this nox session.
 env_vars: {
     key: "NOX_SESSION"
-    value: "system_noextras e2e notebook samples"
+    value: "unit_prerelease system_prerelease system_noextras e2e notebook samples"
 }
diff --git a/noxfile.py b/noxfile.py
index da61232fc7..8d6d641fc1 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -494,6 +494,11 @@ def prerelease(session: nox.sessions.Session, tests_path):
         CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
     )
 
+    # Ignore officially released versions of certain packages specified in
+    # testing/constraints-*.txt and install a more recent, pre-release versions
+    # directly
+    already_installed = set()
+
     # PyArrow prerelease packages are published to an alternative PyPI host.
     # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages
     session.install(
@@ -504,6 +509,8 @@ def prerelease(session: nox.sessions.Session, tests_path):
         "--upgrade",
         "pyarrow",
     )
+    already_installed.add("pyarrow")
+
     session.install(
         "--extra-index-url",
         "https://pypi.anaconda.org/scipy-wheels-nightly/simple",
@@ -512,16 +519,48 @@ def prerelease(session: nox.sessions.Session, tests_path):
         "--upgrade",
         "pandas",
     )
+    already_installed.add("pandas")
+
+    # TODO(shobs):
+    # Commit https://github.com/ibis-project/ibis/commit/c20ba7feab6bdea6c299721310e04dbc10551cc2
+    # introduced breaking change that removed the following:
+    #   ibis.expr.rules.column
+    #   ibis.expr.rules.value
+    #   ibis.expr.rules.any
+    # Let's exclude ibis head from prerelease install list for now. Instead, use
+    # a working ibis-framework version resolved via setup.by (currently resolves
+    # to version 6.2.0 due to version requirement "6.2.0,<7.0.0dev").
+    # We should enable the head back once bigframes support a version that
+    # includes the above commit.
+    # session.install(
+    #    "--upgrade",
+    #    "-e",  # Use -e so that py.typed file is included.
+    #    "git+https://github.com/ibis-project/ibis.git#egg=ibis-framework",
+    # )
+    session.install("--no-deps", "ibis-framework==6.2.0")
+    already_installed.add("ibis-framework")
+
+    # Workaround https://github.com/googleapis/python-db-dtypes-pandas/issues/178
+    session.install("--no-deps", "db-dtypes")
+    already_installed.add("db-dtypes")
+
+    # Ensure we catch breaking changes in the client libraries early.
+    session.install(
+        "--upgrade",
+        "-e",
+        "git+https://github.com/googleapis/python-bigquery.git#egg=google-cloud-bigquery",
+    )
+    already_installed.add("google-cloud-bigquery")
     session.install(
         "--upgrade",
-        "-e",  # Use -e so that py.typed file is included.
-        "git+https://github.com/ibis-project/ibis.git#egg=ibis-framework",
+        "-e",
+        "git+https://github.com/googleapis/python-bigquery-storage.git#egg=google-cloud-bigquery-storage",
     )
-    # Workaround https://github.com/googleapis/python-db-dtypes-pandas/issues/178
-    session.install("--no-deps", "db-dtypes")
+    already_installed.add("google-cloud-bigquery-storage")
 
     # Workaround to install pandas-gbq >=0.15.0, which is required by test only.
     session.install("--no-deps", "pandas-gbq")
+    already_installed.add("pandas-gbq")
 
     session.install(
         *set(UNIT_TEST_STANDARD_DEPENDENCIES + SYSTEM_TEST_STANDARD_DEPENDENCIES),
@@ -541,9 +580,6 @@ def prerelease(session: nox.sessions.Session, tests_path):
         constraints_text = constraints_file.read()
 
     # Ignore leading whitespace and comment lines.
-    already_installed = frozenset(
-        ("db-dtypes", "pandas", "pyarrow", "ibis-framework", "pandas-gbq")
-    )
     deps = [
         match.group(1)
         for match in re.finditer(

From ed8876d3439a3b45b65e8789737c3c2e3a7f1adb Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Tue, 21 Nov 2023 14:02:31 -0800
Subject: [PATCH 11/26] feat: add the recent api method for ML component (#225)

* feat: add the recent api method for ML component
---
 bigframes/ml/cluster.py                | 2 ++
 bigframes/ml/compose.py                | 2 ++
 bigframes/ml/decomposition.py          | 2 ++
 bigframes/ml/ensemble.py               | 5 +++++
 bigframes/ml/forecasting.py            | 2 ++
 bigframes/ml/imported.py               | 3 +++
 bigframes/ml/linear_model.py           | 3 +++
 bigframes/ml/llm.py                    | 4 +++-
 bigframes/ml/pipeline.py               | 2 ++
 bigframes/ml/preprocessing.py          | 7 +++++++
 tests/unit/session/test_io_bigquery.py | 3 +++
 11 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/bigframes/ml/cluster.py b/bigframes/ml/cluster.py
index c9f52ba0b6..6b79d356a2 100644
--- a/bigframes/ml/cluster.py
+++ b/bigframes/ml/cluster.py
@@ -22,11 +22,13 @@
 from google.cloud import bigquery
 
 import bigframes
+from bigframes.core import log_adapter
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 import third_party.bigframes_vendored.sklearn.cluster._kmeans
 
 
+@log_adapter.class_logger
 class KMeans(
     base.UnsupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.cluster._kmeans.KMeans,
diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py
index bf046ff691..ace876dd2d 100644
--- a/bigframes/ml/compose.py
+++ b/bigframes/ml/compose.py
@@ -22,6 +22,7 @@
 from typing import List, Optional, Tuple, Union
 
 from bigframes import constants
+from bigframes.core import log_adapter
 from bigframes.ml import base, core, globals, preprocessing, utils
 import bigframes.pandas as bpd
 import third_party.bigframes_vendored.sklearn.compose._column_transformer
@@ -36,6 +37,7 @@
 ]
 
 
+@log_adapter.class_logger
 class ColumnTransformer(
     base.Transformer,
     third_party.bigframes_vendored.sklearn.compose._column_transformer.ColumnTransformer,
diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py
index 7cda7a6993..ef777cb33a 100644
--- a/bigframes/ml/decomposition.py
+++ b/bigframes/ml/decomposition.py
@@ -22,11 +22,13 @@
 from google.cloud import bigquery
 
 import bigframes
+from bigframes.core import log_adapter
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 import third_party.bigframes_vendored.sklearn.decomposition._pca
 
 
+@log_adapter.class_logger
 class PCA(
     base.UnsupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.decomposition._pca.PCA,
diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py
index fcb3fe5343..1cc9fb3739 100644
--- a/bigframes/ml/ensemble.py
+++ b/bigframes/ml/ensemble.py
@@ -22,6 +22,7 @@
 from google.cloud import bigquery
 
 import bigframes
+from bigframes.core import log_adapter
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 import third_party.bigframes_vendored.sklearn.ensemble._forest
@@ -47,6 +48,7 @@
 }
 
 
+@log_adapter.class_logger
 class XGBRegressor(
     base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.xgboost.sklearn.XGBRegressor,
@@ -202,6 +204,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBRegressor:
         return new_model.session.read_gbq_model(model_name)
 
 
+@log_adapter.class_logger
 class XGBClassifier(
     base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.xgboost.sklearn.XGBClassifier,
@@ -356,6 +359,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBClassifier:
         return new_model.session.read_gbq_model(model_name)
 
 
+@log_adapter.class_logger
 class RandomForestRegressor(
     base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.ensemble._forest.RandomForestRegressor,
@@ -521,6 +525,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestRegresso
         return new_model.session.read_gbq_model(model_name)
 
 
+@log_adapter.class_logger
 class RandomForestClassifier(
     base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.ensemble._forest.RandomForestClassifier,
diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py
index cf23854fa0..995201062b 100644
--- a/bigframes/ml/forecasting.py
+++ b/bigframes/ml/forecasting.py
@@ -21,10 +21,12 @@
 from google.cloud import bigquery
 
 import bigframes
+from bigframes.core import log_adapter
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 
 
+@log_adapter.class_logger
 class ARIMAPlus(base.SupervisedTrainablePredictor):
     """Time Series ARIMA Plus model."""
 
diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py
index f6afc9aa38..4ae0a8ea4d 100644
--- a/bigframes/ml/imported.py
+++ b/bigframes/ml/imported.py
@@ -21,10 +21,12 @@
 from google.cloud import bigquery
 
 import bigframes
+from bigframes.core import log_adapter
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 
 
+@log_adapter.class_logger
 class TensorFlowModel(base.Predictor):
     """Imported TensorFlow model.
 
@@ -101,6 +103,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> TensorFlowModel:
         return new_model.session.read_gbq_model(model_name)
 
 
+@log_adapter.class_logger
 class ONNXModel(base.Predictor):
     """Imported Open Neural Network Exchange (ONNX) model.
 
diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index 433d9fbc38..5ee87b8850 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -23,6 +23,7 @@
 
 import bigframes
 import bigframes.constants as constants
+from bigframes.core import log_adapter
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 import third_party.bigframes_vendored.sklearn.linear_model._base
@@ -46,6 +47,7 @@
 }
 
 
+@log_adapter.class_logger
 class LinearRegression(
     base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.linear_model._base.LinearRegression,
@@ -178,6 +180,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> LinearRegression:
         return new_model.session.read_gbq_model(model_name)
 
 
+@log_adapter.class_logger
 class LogisticRegression(
     base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.linear_model._logistic.LogisticRegression,
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 78f3369daf..5beb54a32d 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -21,7 +21,7 @@
 
 import bigframes
 from bigframes import clients, constants
-from bigframes.core import blocks
+from bigframes.core import blocks, log_adapter
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 
@@ -43,6 +43,7 @@
 _ML_EMBED_TEXT_STATUS = "ml_embed_text_status"
 
 
+@log_adapter.class_logger
 class PaLM2TextGenerator(base.Predictor):
     """PaLM2 text generator LLM model.
 
@@ -200,6 +201,7 @@ def predict(
         return df
 
 
+@log_adapter.class_logger
 class PaLM2TextEmbeddingGenerator(base.Predictor):
     """PaLM2 text embedding generator LLM model.
 
diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py
index ad0b3fae11..4ae2bfe555 100644
--- a/bigframes/ml/pipeline.py
+++ b/bigframes/ml/pipeline.py
@@ -24,11 +24,13 @@
 
 import bigframes
 import bigframes.constants as constants
+from bigframes.core import log_adapter
 from bigframes.ml import base, compose, forecasting, loader, preprocessing, utils
 import bigframes.pandas as bpd
 import third_party.bigframes_vendored.sklearn.pipeline
 
 
+@log_adapter.class_logger
 class Pipeline(
     base.BaseEstimator,
     third_party.bigframes_vendored.sklearn.pipeline.Pipeline,
diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py
index 5f44d40218..a403e57e71 100644
--- a/bigframes/ml/preprocessing.py
+++ b/bigframes/ml/preprocessing.py
@@ -20,6 +20,7 @@
 import typing
 from typing import Any, cast, List, Literal, Optional, Tuple, Union
 
+from bigframes.core import log_adapter
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 import third_party.bigframes_vendored.sklearn.preprocessing._data
@@ -28,6 +29,7 @@
 import third_party.bigframes_vendored.sklearn.preprocessing._label
 
 
+@log_adapter.class_logger
 class StandardScaler(
     base.Transformer,
     third_party.bigframes_vendored.sklearn.preprocessing._data.StandardScaler,
@@ -111,6 +113,7 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
         )
 
 
+@log_adapter.class_logger
 class MaxAbsScaler(
     base.Transformer,
     third_party.bigframes_vendored.sklearn.preprocessing._data.MaxAbsScaler,
@@ -194,6 +197,7 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
         )
 
 
+@log_adapter.class_logger
 class MinMaxScaler(
     base.Transformer,
     third_party.bigframes_vendored.sklearn.preprocessing._data.MinMaxScaler,
@@ -277,6 +281,7 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
         )
 
 
+@log_adapter.class_logger
 class KBinsDiscretizer(
     base.Transformer,
     third_party.bigframes_vendored.sklearn.preprocessing._discretization.KBinsDiscretizer,
@@ -395,6 +400,7 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
         )
 
 
+@log_adapter.class_logger
 class OneHotEncoder(
     base.Transformer,
     third_party.bigframes_vendored.sklearn.preprocessing._encoder.OneHotEncoder,
@@ -524,6 +530,7 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
         )
 
 
+@log_adapter.class_logger
 class LabelEncoder(
     base.LabelTransformer,
     third_party.bigframes_vendored.sklearn.preprocessing._label.LabelEncoder,
diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py
index e1481d3f05..c87835c412 100644
--- a/tests/unit/session/test_io_bigquery.py
+++ b/tests/unit/session/test_io_bigquery.py
@@ -59,6 +59,7 @@ def test_create_job_configs_labels_length_limit_not_met():
 
 
 def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit():
+    log_adapter.get_and_reset_api_methods()
     cur_labels = {
         "bigframes-api": "read_pandas",
         "source": "bigquery-dataframes-temp",
@@ -87,6 +88,7 @@ def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit():
 
 
 def test_create_job_configs_labels_length_limit_met_and_labels_is_none():
+    log_adapter.get_and_reset_api_methods()
     df = bpd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
     # Test running methods more than the labels' length limit
     for i in range(66):
@@ -102,6 +104,7 @@ def test_create_job_configs_labels_length_limit_met_and_labels_is_none():
 
 
 def test_create_job_configs_labels_length_limit_met():
+    log_adapter.get_and_reset_api_methods()
     cur_labels = {
         "bigframes-api": "read_pandas",
         "source": "bigquery-dataframes-temp",

From d7957fad071d223ef8f6fb8f3de395c865ff60aa Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Tue, 21 Nov 2023 21:26:52 -0800
Subject: [PATCH 12/26] docs: code samples for dataframe.any, dataframe.all and
 dataframe.prod (#223)

* docs: code samples for dataframe.any, dataframe.all and dataframe.prod

* Update examples

* update example output
---
 .../bigframes_vendored/pandas/core/frame.py   | 85 ++++++++++++++++++-
 1 file changed, 84 insertions(+), 1 deletion(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index f448ad7939..b771be3041 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -2584,6 +2584,33 @@ def any(self, *, axis=0, bool_only: bool = False):
         along a Dataframe axis that is True or equivalent (e.g. non-zero or
         non-empty).
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [True, True], "B": [False, False]})
+            >>> df
+                    A        B
+            0    True    False
+            1    True    False
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Checking if each column contains at least one True element(the default behavior without an explicit axis parameter).
+
+            >>> df.any()
+            A     True
+            B    False
+            dtype: boolean
+
+        Checking if each row contains at least one True element.
+
+            >>> df.any(axis=1)
+            0    True
+            1    True
+            dtype: boolean
+
         Args:
             axis ({index (0), columns (1)}):
                 Axis for the function to be applied on.
@@ -2604,6 +2631,33 @@ def all(self, axis=0, *, bool_only: bool = False):
         along a DataFrame axis that is False or equivalent (e.g. zero or
         empty).
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [True, True], "B": [False, False]})
+            >>> df
+                    A        B
+            0    True    False
+            1    True    False
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Checking if all values in each column are True(the default behavior without an explicit axis parameter).
+
+            >>> df.all()
+            A     True
+            B    False
+            dtype: boolean
+
+        Checking across rows to see if all values are True.
+
+            >>> df.all(axis=1)
+            0    False
+            1    False
+            dtype: boolean
+
         Args:
             axis ({index (0), columns (1)}):
                 Axis for the function to be applied on.
@@ -2620,8 +2674,37 @@ def prod(self, axis=0, *, numeric_only: bool = False):
         """
         Return the product of the values over the requested axis.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 2, 3], "B": [4.5, 5.5, 6.5]})
+            >>> df
+                A    B
+            0   1  4.5
+            1   2  5.5
+            2   3  6.5
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Calculating the product of each column(the default behavior without an explicit axis parameter).
+
+            >>> df.prod()
+            A        6.0
+            B    160.875
+            dtype: Float64
+
+        Calculating the product of each row.
+
+            >>> df.prod(axis=1)
+            0     4.5
+            1    11.0
+            2    19.5
+            dtype: Float64
+
         Args:
-            aßxis ({index (0), columns (1)}):
+            axis ({index (0), columns (1)}):
                 Axis for the function to be applied on.
                 For Series this parameter is unused and defaults to 0.
             numeric_only (bool. default False):

From 71844b03cdbfe684320c186a0488c8c7fb4fcd6e Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Wed, 22 Nov 2023 23:46:14 +0000
Subject: [PATCH 13/26] docs: make the code samples reflect default bq
 connection usage (#206)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue 305136837 🦕
---
 .../getting_started_bq_dataframes.ipynb       |   4 +-
 .../remote_functions/remote_function.ipynb    | 514 ++++++++----------
 samples/snippets/remote_function.py           |   6 +-
 3 files changed, 241 insertions(+), 283 deletions(-)

diff --git a/notebooks/getting_started/getting_started_bq_dataframes.ipynb b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
index 6cc6acc993..18be5e48fd 100644
--- a/notebooks/getting_started/getting_started_bq_dataframes.ipynb
+++ b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
@@ -802,7 +802,7 @@
       "source": [
         "Running the cell below creates a custom function using the `remote_function` method. This function categorizes a value into one of two buckets: >= 4000 or <4000.\n",
         "\n",
-        "> Note: Creating a function requires a [BigQuery connection](https://cloud.google.com/bigquery/docs/remote-functions#create_a_remote_function). This code assumes a pre-created connection named `bigframes-rf-conn`. If\n",
+        "> Note: Creating a function requires a [BigQuery connection](https://cloud.google.com/bigquery/docs/remote-functions#create_a_remote_function). This code assumes a pre-created connection named `bigframes-default-connection`. If\n",
         "the connection is not already created, BigQuery DataFrames attempts to create one assuming the [necessary APIs\n",
         "and IAM permissions](https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.pandas#bigframes_pandas_remote_function) are set up in the project.\n",
         "\n",
@@ -817,7 +817,7 @@
       },
       "outputs": [],
       "source": [
-        "@bf.remote_function([float], str, bigquery_connection='bigframes-rf-conn')\n",
+        "@bf.remote_function([float], str)\n",
         "def get_bucket(num):\n",
         "  if not num: return \"NA\"\n",
         "  boundary = 4000\n",
diff --git a/notebooks/remote_functions/remote_function.ipynb b/notebooks/remote_functions/remote_function.ipynb
index 06be0e7293..063c1738b4 100644
--- a/notebooks/remote_functions/remote_function.ipynb
+++ b/notebooks/remote_functions/remote_function.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 19,
    "id": "3613b1cd",
    "metadata": {},
    "outputs": [],
@@ -16,24 +16,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 20,
    "id": "f1175247",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/shobs/code/bigframes1/venv/lib/python3.10/site-packages/google/auth/_default.py:78: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n",
-      "  warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 25.4 s, sys: 2.5 s, total: 27.9 s\n",
-      "Wall time: 2min 31s\n"
+      "CPU times: user 2.34 s, sys: 307 ms, total: 2.65 s\n",
+      "Wall time: 17.8 s\n"
      ]
     },
     {
@@ -141,7 +133,7 @@
        "9  154  Sure, but what about a solution using O(1) mem...      8"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -160,7 +152,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 21,
    "id": "fd8a04a3",
    "metadata": {},
    "outputs": [],
@@ -191,7 +183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 22,
    "id": "2b5e4568",
    "metadata": {},
    "outputs": [
@@ -199,8 +191,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 4.22 s, sys: 18.2 ms, total: 4.24 s\n",
-      "Wall time: 4.26 s\n"
+      "CPU times: user 3.32 s, sys: 0 ns, total: 3.32 s\n",
+      "Wall time: 3.32 s\n"
      ]
     },
     {
@@ -319,7 +311,7 @@
        "9  154  Sure, but what about a solution using O(1) mem...      8       19"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -333,65 +325,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 23,
    "id": "b81feaef",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/shobs/code/bigframes1/venv/lib/python3.10/site-packages/google/auth/_default.py:78: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n",
-      "  warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n",
-      "/usr/local/google/home/shobs/code/bigframes1/venv/lib/python3.10/site-packages/google/auth/_default.py:78: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n",
-      "  warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2b1c9d671db14d2ca3be6a0b0c698430",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 6b0a39de-40a0-4dd4-be88-248bd8ebcd77 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 695 ms, sys: 94.3 ms, total: 790 ms\n",
-      "Wall time: 20.3 s\n"
+      "CPU times: user 71.2 ms, sys: 0 ns, total: 71.2 ms\n",
+      "Wall time: 1.99 s\n"
      ]
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d9ae255cc10843e882fef1d2943be3fb",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 635f0a21-4f43-4159-bdf2-f167b14e60cc is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0052d103678f47ffb3777ec3ac4e30f7",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job ca4f5d64-fcb3-4388-b9f1-924c55c1aaa5 is DONE. 17.2 GB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:ca4f5d64-fcb3-4388-b9f1-924c55c1aaa5&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job ecaf079e-76ef-47bb-828d-a27e9552b597 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -399,13 +351,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "09706700e8dd4cf39f65a0d58371c1eb",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job a283cb39-41b1-44cd-a6c3-f2a2c6a55b25 is DONE. 17.2 GB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:a283cb39-41b1-44cd-a6c3-f2a2c6a55b25&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 4c1d9d3e-be25-4818-b74d-6214164d99ab is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -440,62 +390,62 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>11012908</td>\n",
-       "      <td>you're welcome! according to the docs it shoul...</td>\n",
+       "      <td>11231597</td>\n",
+       "      <td>In your update, why are some of the system fun...</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>11013760</td>\n",
-       "      <td>You *should* be concerned with the disk being ...</td>\n",
-       "      <td>0</td>\n",
+       "      <td>49684807</td>\n",
+       "      <td>what you have tried so far .  ??</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>11013784</td>\n",
-       "      <td>have you looked at `Integrate` or `NIntegrate`?</td>\n",
+       "      <td>7623925</td>\n",
+       "      <td>@Michael: It should work. Perhaps you looked i...</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>11015512</td>\n",
-       "      <td>sorry, is a typo. The variable name is dist. (...</td>\n",
+       "      <td>34046685</td>\n",
+       "      <td>Will it work with SQL compact? Please excuse m...</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>11016238</td>\n",
-       "      <td>Pfff, I'm having trouble with that formula too...</td>\n",
+       "      <td>6426146</td>\n",
+       "      <td>do you know the equation to your pdf?</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>11016276</td>\n",
-       "      <td>Thanks thinksteep! Does this mean that by usin...</td>\n",
+       "      <td>60686114</td>\n",
+       "      <td>m sorry but at least you have to think about it.</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>11016551</td>\n",
-       "      <td>Jason, thanks for the reply.  I've been workin...</td>\n",
+       "      <td>16631986</td>\n",
+       "      <td>i think also making disable this by only jquer...</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>11017973</td>\n",
-       "      <td>I assume an `off` of 0.5 would put be exactly ...</td>\n",
+       "      <td>16498565</td>\n",
+       "      <td>I am including these files on my header of the...</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>11018225</td>\n",
-       "      <td>Thank you very much.  I do worry too much abou...</td>\n",
+       "      <td>26601001</td>\n",
+       "      <td>wrong answer, you didn't understand the logic</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>11018370</td>\n",
-       "      <td>@IanClelland, I edited my question a bit. The ...</td>\n",
+       "      <td>73255842</td>\n",
+       "      <td>Call the setOnClickListener before return row.</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -505,21 +455,21 @@
       ],
       "text/plain": [
        "         id                                               text  score\n",
-       "0  11012908  you're welcome! according to the docs it shoul...      0\n",
-       "1  11013760  You *should* be concerned with the disk being ...      0\n",
-       "2  11013784    have you looked at `Integrate` or `NIntegrate`?      0\n",
-       "3  11015512  sorry, is a typo. The variable name is dist. (...      0\n",
-       "4  11016238  Pfff, I'm having trouble with that formula too...      0\n",
-       "5  11016276  Thanks thinksteep! Does this mean that by usin...      0\n",
-       "6  11016551  Jason, thanks for the reply.  I've been workin...      0\n",
-       "7  11017973  I assume an `off` of 0.5 would put be exactly ...      0\n",
-       "8  11018225  Thank you very much.  I do worry too much abou...      0\n",
-       "9  11018370  @IanClelland, I edited my question a bit. The ...      0\n",
+       "0  11231597  In your update, why are some of the system fun...      0\n",
+       "1  49684807                   what you have tried so far .  ??      1\n",
+       "2   7623925  @Michael: It should work. Perhaps you looked i...      0\n",
+       "3  34046685  Will it work with SQL compact? Please excuse m...      0\n",
+       "4   6426146              do you know the equation to your pdf?      0\n",
+       "5  60686114   m sorry but at least you have to think about it.      0\n",
+       "6  16631986  i think also making disable this by only jquer...      0\n",
+       "7  16498565  I am including these files on my header of the...      0\n",
+       "8  26601001      wrong answer, you didn't understand the logic      0\n",
+       "9  73255842     Call the setOnClickListener before return row.      0\n",
        "\n",
        "[10 rows x 3 columns]"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -539,7 +489,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 24,
    "id": "55ed241e",
    "metadata": {},
    "outputs": [
@@ -549,8 +499,9 @@
      "text": [
       "Help on function remote_function in module bigframes.pandas:\n",
       "\n",
-      "remote_function(input_types: 'List[type]', output_type: 'type', dataset: 'Optional[str]' = None, bigquery_connection: 'Optional[str]' = None, reuse: 'bool' = True)\n",
-      "    Decorator to turn a user defined function into a BigQuery remote function.\n",
+      "remote_function(input_types: 'List[type]', output_type: 'type', dataset: 'Optional[str]' = None, bigquery_connection: 'Optional[str]' = None, reuse: 'bool' = True, name: 'Optional[str]' = None, packages: 'Optional[Sequence[str]]' = None)\n",
+      "    Decorator to turn a user defined function into a BigQuery remote function. Check out\n",
+      "    the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.\n",
       "    \n",
       "    .. note::\n",
       "        Please make sure following is setup before using this API:\n",
@@ -576,7 +527,7 @@
       "        * BigQuery Data Editor (roles/bigquery.dataEditor)\n",
       "        * BigQuery Connection Admin (roles/bigquery.connectionAdmin)\n",
       "        * Cloud Functions Developer (roles/cloudfunctions.developer)\n",
-      "        * Service Account User (roles/iam.serviceAccountUser)\n",
+      "        * Service Account User (roles/iam.serviceAccountUser) on the service account `PROJECT_NUMBER-compute@developer.gserviceaccount.com`\n",
       "        * Storage Object Viewer (roles/storage.objectViewer)\n",
       "        * Project IAM Admin (roles/resourcemanager.projectIamAdmin) (Only required if the bigquery connection being used is not pre-created and is created dynamically with user credentials.)\n",
       "    \n",
@@ -602,15 +553,25 @@
       "            Name of the BigQuery connection. You should either have the\n",
       "            connection already created in the `location` you have chosen, or\n",
       "            you should have the Project IAM Admin role to enable the service\n",
-      "            to create the connection for you if you need it.If this parameter is\n",
+      "            to create the connection for you if you need it. If this parameter is\n",
       "            not provided then the BigQuery connection from the session is used.\n",
       "        reuse (bool, Optional):\n",
       "            Reuse the remote function if already exists.\n",
       "            `True` by default, which will result in reusing an existing remote\n",
-      "            function (if any) that was previously created for the same udf.\n",
-      "            Setting it to false would force creating a unique remote function.\n",
+      "            function and corresponding cloud function (if any) that was\n",
+      "            previously created for the same udf.\n",
+      "            Setting it to `False` would force creating a unique remote function.\n",
       "            If the required remote function does not exist then it would be\n",
       "            created irrespective of this param.\n",
+      "        name (str, Optional):\n",
+      "            Explicit name of the persisted BigQuery remote function. Use it with\n",
+      "            caution, because two users working in the same project and dataset\n",
+      "            could overwrite each other's remote functions if they use the same\n",
+      "            persistent name.\n",
+      "        packages (str[], Optional):\n",
+      "            Explicit name of the external package dependencies. Each dependency\n",
+      "            is added to the `requirements.txt` as is, and can be of the form\n",
+      "            supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/.\n",
       "    Returns:\n",
       "        callable: A remote function object pointing to the cloud assets created\n",
       "        in the background to support the remote execution. The cloud assets can be\n",
@@ -631,49 +592,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "c9a8d03d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# BigQuery DataFrames user is a data scientist and may not have privileges to\n",
-    "# create a BQ connector and set it up for invoking a cloud function. They\n",
-    "# should get such a connector created from their cloud admin and use it with\n",
-    "# BigQuery DataFrames remote functions. If the provided connection name does not\n",
-    "# exist, BigQuery DataFrames will try to create it on the fly assuming the user\n",
-    "# has sufficient privileges.\n",
-    "bq_connection_name = 'bigframes-rf-conn'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 25,
    "id": "fbc27f81",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[INFO][2023-08-18 21:23:29,687][bigframes.remote_function] Creating new cloud function: gcloud functions deploy bigframes-b0feb1fbaf8188b64d7e70118d93c5d4 --gen2 --runtime=python310 --project=bigframes-dev --region=us-central1 --source=/tmp/tmpl2ewfnue --entry-point=udf_http --trigger-http --no-allow-unauthenticated\n",
-      "[INFO][2023-08-18 21:24:43,689][bigframes.remote_function] Successfully created cloud function bigframes-b0feb1fbaf8188b64d7e70118d93c5d4 with uri (https://bigframes-b0feb1fbaf8188b64d7e70118d93c5d4-7krlje3eoq-uc.a.run.app)\n",
-      "[INFO][2023-08-18 21:24:57,348][bigframes.remote_function] Connector bigframes-rf-conn already exists\n",
-      "[INFO][2023-08-18 21:24:57,351][bigframes.remote_function] Creating BQ remote function: \n",
-      "            CREATE OR REPLACE FUNCTION `bigframes-dev.bigframes_temp_us`.bigframes_b0feb1fbaf8188b64d7e70118d93c5d4(n INT64)\n",
-      "            RETURNS INT64\n",
-      "            REMOTE WITH CONNECTION `bigframes-dev.us.bigframes-rf-conn`\n",
-      "            OPTIONS (\n",
-      "              endpoint = \"https://bigframes-b0feb1fbaf8188b64d7e70118d93c5d4-7krlje3eoq-uc.a.run.app\"\n",
-      "            )\n",
-      "[INFO][2023-08-18 21:24:58,300][bigframes.remote_function] Created remote function bigframes-dev.bigframes_temp_us.bigframes_b0feb1fbaf8188b64d7e70118d93c5d4\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "\n",
-      "Wall time: 89.0601 s\n"
+      "Wall time: 76.2628 s\n"
      ]
     }
    ],
@@ -684,7 +612,7 @@
     "\n",
     "# User defined function\n",
     "# https://www.codespeedy.com/find-nth-prime-number-in-python/\n",
-    "@pd.remote_function([int], int, bigquery_connection=bq_connection_name)\n",
+    "@pd.remote_function([int], int, reuse=False)\n",
     "def nth_prime(n):\n",
     "    prime_numbers = [2,3]\n",
     "    i=3\n",
@@ -712,7 +640,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 26,
    "id": "c1c9355f",
    "metadata": {},
    "outputs": [
@@ -720,33 +648,17 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 16.8 ms, sys: 61 µs, total: 16.8 ms\n",
-      "Wall time: 17 ms\n"
+      "CPU times: user 55.8 ms, sys: 182 µs, total: 56 ms\n",
+      "Wall time: 54.5 ms\n"
      ]
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2f840ad27c514ed19c759a004b32de33",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 0f421233-9d02-4746-bb39-86a3b0880aba is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "adffcff769be46b1bc6e50f9622cdd30",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 762aa155-a3ed-4bab-aca2-a0380ed96ebf is DONE. 17.2 GB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:762aa155-a3ed-4bab-aca2-a0380ed96ebf&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 4f8d5734-8070-4630-8a59-c05a31d60476 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -754,13 +666,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "116d6ef3d6b247d3aaafef5fe6b970de",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job c0a2c187-364d-4978-97bc-30352828f624 is DONE. 17.2 GB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:c0a2c187-364d-4978-97bc-30352828f624&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job ec057f9e-726b-44f0-a5c0-24c05c7ecfeb is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -796,71 +706,71 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>11012908</td>\n",
-       "      <td>you're welcome! according to the docs it shoul...</td>\n",
+       "      <td>11231597</td>\n",
+       "      <td>In your update, why are some of the system fun...</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>11013760</td>\n",
-       "      <td>You *should* be concerned with the disk being ...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>-1</td>\n",
+       "      <td>49684807</td>\n",
+       "      <td>what you have tried so far .  ??</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>11013784</td>\n",
-       "      <td>have you looked at `Integrate` or `NIntegrate`?</td>\n",
+       "      <td>7623925</td>\n",
+       "      <td>@Michael: It should work. Perhaps you looked i...</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>11015512</td>\n",
-       "      <td>sorry, is a typo. The variable name is dist. (...</td>\n",
+       "      <td>34046685</td>\n",
+       "      <td>Will it work with SQL compact? Please excuse m...</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>11016238</td>\n",
-       "      <td>Pfff, I'm having trouble with that formula too...</td>\n",
+       "      <td>6426146</td>\n",
+       "      <td>do you know the equation to your pdf?</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>11016276</td>\n",
-       "      <td>Thanks thinksteep! Does this mean that by usin...</td>\n",
+       "      <td>60686114</td>\n",
+       "      <td>m sorry but at least you have to think about it.</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>11016551</td>\n",
-       "      <td>Jason, thanks for the reply.  I've been workin...</td>\n",
+       "      <td>16631986</td>\n",
+       "      <td>i think also making disable this by only jquer...</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>11017973</td>\n",
-       "      <td>I assume an `off` of 0.5 would put be exactly ...</td>\n",
+       "      <td>16498565</td>\n",
+       "      <td>I am including these files on my header of the...</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>11018225</td>\n",
-       "      <td>Thank you very much.  I do worry too much abou...</td>\n",
+       "      <td>26601001</td>\n",
+       "      <td>wrong answer, you didn't understand the logic</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>11018370</td>\n",
-       "      <td>@IanClelland, I edited my question a bit. The ...</td>\n",
+       "      <td>73255842</td>\n",
+       "      <td>Call the setOnClickListener before return row.</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
@@ -871,21 +781,21 @@
       ],
       "text/plain": [
        "         id                                               text  score  n_prime\n",
-       "0  11012908  you're welcome! according to the docs it shoul...      0       -1\n",
-       "1  11013760  You *should* be concerned with the disk being ...      0       -1\n",
-       "2  11013784    have you looked at `Integrate` or `NIntegrate`?      0       -1\n",
-       "3  11015512  sorry, is a typo. The variable name is dist. (...      0       -1\n",
-       "4  11016238  Pfff, I'm having trouble with that formula too...      0       -1\n",
-       "5  11016276  Thanks thinksteep! Does this mean that by usin...      0       -1\n",
-       "6  11016551  Jason, thanks for the reply.  I've been workin...      0       -1\n",
-       "7  11017973  I assume an `off` of 0.5 would put be exactly ...      0       -1\n",
-       "8  11018225  Thank you very much.  I do worry too much abou...      0       -1\n",
-       "9  11018370  @IanClelland, I edited my question a bit. The ...      0       -1\n",
+       "0  11231597  In your update, why are some of the system fun...      0       -1\n",
+       "1  49684807                   what you have tried so far .  ??      1        2\n",
+       "2   7623925  @Michael: It should work. Perhaps you looked i...      0       -1\n",
+       "3  34046685  Will it work with SQL compact? Please excuse m...      0       -1\n",
+       "4   6426146              do you know the equation to your pdf?      0       -1\n",
+       "5  60686114   m sorry but at least you have to think about it.      0       -1\n",
+       "6  16631986  i think also making disable this by only jquer...      0       -1\n",
+       "7  16498565  I am including these files on my header of the...      0       -1\n",
+       "8  26601001      wrong answer, you didn't understand the logic      0       -1\n",
+       "9  73255842     Call the setOnClickListener before return row.      0       -1\n",
        "\n",
        "[10 rows x 4 columns]"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -900,7 +810,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 27,
    "id": "2701cb81",
    "metadata": {},
    "outputs": [
@@ -908,8 +818,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "bigframes-dev.bigframes_temp_us.bigframes_b0feb1fbaf8188b64d7e70118d93c5d4\n",
-      "projects/bigframes-dev/locations/us-central1/functions/bigframes-b0feb1fbaf8188b64d7e70118d93c5d4\n"
+      "shobs-test.bigframes_temp_us.bigframes_343b7b4bb93ca8747dae20c22bdaec8b_p27heyce\n",
+      "projects/shobs-test/locations/us-central1/functions/bigframes-343b7b4bb93ca8747dae20c22bdaec8b-p27heyce\n"
      ]
     }
    ],
@@ -922,7 +832,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 28,
    "id": "920fa18e",
    "metadata": {},
    "outputs": [
@@ -937,6 +847,42 @@
       "    \n",
       "    Then it can be applied to a DataFrame or Series.\n",
       "    \n",
+      "    .. note::\n",
+      "        The return type of the function must be explicitly specified in the\n",
+      "        function's original definition even if not otherwise required.\n",
+      "    \n",
+      "    BigQuery Utils provides many public functions under the ``bqutil`` project on Google Cloud Platform project\n",
+      "    (See: https://github.com/GoogleCloudPlatform/bigquery-utils/tree/master/udfs#using-the-udfs).\n",
+      "    You can checkout Community UDFs to use community-contributed functions.\n",
+      "    (See: https://github.com/GoogleCloudPlatform/bigquery-utils/tree/master/udfs/community#community-udfs).\n",
+      "    \n",
+      "    **Examples:**\n",
+      "    \n",
+      "    Use the ``cw_lower_case_ascii_only`` function from Community UDFs.\n",
+      "    (https://github.com/GoogleCloudPlatform/bigquery-utils/blob/master/udfs/community/cw_lower_case_ascii_only.sqlx)\n",
+      "    \n",
+      "        >>> import bigframes.pandas as bpd\n",
+      "        >>> bpd.options.display.progress_bar = None\n",
+      "    \n",
+      "        >>> df = bpd.DataFrame({'id': [1, 2, 3], 'name': ['AURÉLIE', 'CÉLESTINE', 'DAPHNÉ']})\n",
+      "        >>> df\n",
+      "           id       name\n",
+      "        0   1    AURÉLIE\n",
+      "        1   2  CÉLESTINE\n",
+      "        2   3     DAPHNÉ\n",
+      "        <BLANKLINE>\n",
+      "        [3 rows x 2 columns]\n",
+      "    \n",
+      "        >>> func = bpd.read_gbq_function(\"bqutil.fn.cw_lower_case_ascii_only\")\n",
+      "        >>> df1 = df.assign(new_name=df['name'].apply(func))\n",
+      "        >>> df1\n",
+      "           id       name   new_name\n",
+      "        0   1    AURÉLIE    aurÉlie\n",
+      "        1   2  CÉLESTINE  cÉlestine\n",
+      "        2   3     DAPHNÉ     daphnÉ\n",
+      "        <BLANKLINE>\n",
+      "        [3 rows x 3 columns]\n",
+      "    \n",
       "    Args:\n",
       "        function_name (str):\n",
       "            the function's name in BigQuery in the format\n",
@@ -965,7 +911,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 29,
    "id": "a6c9da0a",
    "metadata": {},
    "outputs": [],
@@ -978,7 +924,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 30,
    "id": "d7e7de7f",
    "metadata": {},
    "outputs": [
@@ -986,33 +932,17 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 10.9 ms, sys: 0 ns, total: 10.9 ms\n",
-      "Wall time: 11.4 ms\n"
+      "CPU times: user 70.8 ms, sys: 3.49 ms, total: 74.3 ms\n",
+      "Wall time: 75.2 ms\n"
      ]
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "73d1a73593cb4115821ab128c221a48d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job bec5f7d1-3df1-4292-8c68-c396bce7dc5d is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "827770710c1549cf819fe47a5a1cd70f",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job f9a0e979-aeac-4ddd-a4c9-6720a5e91009 is DONE. 17.2 GB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:f9a0e979-aeac-4ddd-a4c9-6720a5e91009&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 02e3bf43-a387-41c7-85c7-4a5366251de7 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1020,13 +950,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "faf93766ce1e489183c86a9daf5ce7d1",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 4d3da7ed-42e6-4b2b-b656-ac9ef6d2e871 is DONE. 17.2 GB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:4d3da7ed-42e6-4b2b-b656-ac9ef6d2e871&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job fa4329e8-2918-44c4-96c5-d8591364abc9 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1063,80 +991,80 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>11012908</td>\n",
-       "      <td>you're welcome! according to the docs it shoul...</td>\n",
+       "      <td>11231597</td>\n",
+       "      <td>In your update, why are some of the system fun...</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>11013760</td>\n",
-       "      <td>You *should* be concerned with the disk being ...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
+       "      <td>49684807</td>\n",
+       "      <td>what you have tried so far .  ??</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>11013784</td>\n",
-       "      <td>have you looked at `Integrate` or `NIntegrate`?</td>\n",
+       "      <td>7623925</td>\n",
+       "      <td>@Michael: It should work. Perhaps you looked i...</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>11015512</td>\n",
-       "      <td>sorry, is a typo. The variable name is dist. (...</td>\n",
+       "      <td>34046685</td>\n",
+       "      <td>Will it work with SQL compact? Please excuse m...</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>11016238</td>\n",
-       "      <td>Pfff, I'm having trouble with that formula too...</td>\n",
+       "      <td>6426146</td>\n",
+       "      <td>do you know the equation to your pdf?</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>11016276</td>\n",
-       "      <td>Thanks thinksteep! Does this mean that by usin...</td>\n",
+       "      <td>60686114</td>\n",
+       "      <td>m sorry but at least you have to think about it.</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>11016551</td>\n",
-       "      <td>Jason, thanks for the reply.  I've been workin...</td>\n",
+       "      <td>16631986</td>\n",
+       "      <td>i think also making disable this by only jquer...</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>11017973</td>\n",
-       "      <td>I assume an `off` of 0.5 would put be exactly ...</td>\n",
+       "      <td>16498565</td>\n",
+       "      <td>I am including these files on my header of the...</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>11018225</td>\n",
-       "      <td>Thank you very much.  I do worry too much abou...</td>\n",
+       "      <td>26601001</td>\n",
+       "      <td>wrong answer, you didn't understand the logic</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>11018370</td>\n",
-       "      <td>@IanClelland, I edited my question a bit. The ...</td>\n",
+       "      <td>73255842</td>\n",
+       "      <td>Call the setOnClickListener before return row.</td>\n",
        "      <td>0</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
@@ -1148,20 +1076,20 @@
       ],
       "text/plain": [
        "         id                                               text  score  \\\n",
-       "0  11012908  you're welcome! according to the docs it shoul...      0   \n",
-       "1  11013760  You *should* be concerned with the disk being ...      0   \n",
-       "2  11013784    have you looked at `Integrate` or `NIntegrate`?      0   \n",
-       "3  11015512  sorry, is a typo. The variable name is dist. (...      0   \n",
-       "4  11016238  Pfff, I'm having trouble with that formula too...      0   \n",
-       "5  11016276  Thanks thinksteep! Does this mean that by usin...      0   \n",
-       "6  11016551  Jason, thanks for the reply.  I've been workin...      0   \n",
-       "7  11017973  I assume an `off` of 0.5 would put be exactly ...      0   \n",
-       "8  11018225  Thank you very much.  I do worry too much abou...      0   \n",
-       "9  11018370  @IanClelland, I edited my question a bit. The ...      0   \n",
+       "0  11231597  In your update, why are some of the system fun...      0   \n",
+       "1  49684807                   what you have tried so far .  ??      1   \n",
+       "2   7623925  @Michael: It should work. Perhaps you looked i...      0   \n",
+       "3  34046685  Will it work with SQL compact? Please excuse m...      0   \n",
+       "4   6426146              do you know the equation to your pdf?      0   \n",
+       "5  60686114   m sorry but at least you have to think about it.      0   \n",
+       "6  16631986  i think also making disable this by only jquer...      0   \n",
+       "7  16498565  I am including these files on my header of the...      0   \n",
+       "8  26601001      wrong answer, you didn't understand the logic      0   \n",
+       "9  73255842     Call the setOnClickListener before return row.      0   \n",
        "\n",
        "   n_prime  n_prime_again  \n",
        "0       -1             -1  \n",
-       "1       -1             -1  \n",
+       "1        2              2  \n",
        "2       -1             -1  \n",
        "3       -1             -1  \n",
        "4       -1             -1  \n",
@@ -1174,7 +1102,7 @@
        "[10 rows x 5 columns]"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1186,6 +1114,38 @@
     "df = df.assign(n_prime_again=df['score'].apply(nth_prime_existing))\n",
     "df.head(10)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "bafab950",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Clean up GCP assets created as part of bigframes remote_function\n",
+    "def cleanup_remote_function_assets(remote_udf, ignore_failures=False):\n",
+    "    \"\"\"Clean up the GCP assets behind a bigframes remote function.\"\"\"\n",
+    "\n",
+    "    session = pd.get_global_session()\n",
+    "\n",
+    "    # Clean up BQ remote function\n",
+    "    try:\n",
+    "        session.bqclient.delete_routine(remote_udf.bigframes_remote_function)\n",
+    "    except Exception:\n",
+    "        # By default don't raise exception in cleanup\n",
+    "        if not ignore_failures:\n",
+    "            raise\n",
+    "\n",
+    "    # Clean up cloud function\n",
+    "    try:\n",
+    "        session.cloudfunctionsclient.delete_function(name=remote_udf.bigframes_cloud_function)\n",
+    "    except Exception:\n",
+    "        # By default don't raise exception in cleanup\n",
+    "        if not ignore_failures:\n",
+    "            raise\n",
+    "\n",
+    "cleanup_remote_function_assets(nth_prime)"
+   ]
   }
  ],
  "metadata": {
diff --git a/samples/snippets/remote_function.py b/samples/snippets/remote_function.py
index 646d7b0c30..61b7dc092a 100644
--- a/samples/snippets/remote_function.py
+++ b/samples/snippets/remote_function.py
@@ -38,8 +38,8 @@ def run_remote_function_and_read_gbq_function(project_id: str):
     # function. It requires a BigQuery connection. If the connection is not
     # already created, BigQuery DataFrames will attempt to create one assuming
     # the necessary APIs and IAM permissions are setup in the project. In our
-    # examples we would be using a pre-created connection named
-    # `bigframes-rf-conn`. We will also set `reuse=False` to make sure we don't
+    # examples we will be letting the default connection `bigframes-default-connection`
+    # be used. We will also set `reuse=False` to make sure we don't
     # step over someone else creating remote function in the same project from
     # the exact same source code at the same time. Let's try a `pandas`-like use
     # case in which we want to apply a user defined scalar function to every
@@ -49,7 +49,6 @@ def run_remote_function_and_read_gbq_function(project_id: str):
     @bpd.remote_function(
         [float],
         str,
-        bigquery_connection="bigframes-rf-conn",
         reuse=False,
     )
     def get_bucket(num):
@@ -94,7 +93,6 @@ def get_bucket(num):
     @bpd.remote_function(
         [str],
         str,
-        bigquery_connection="bigframes-rf-conn",
         reuse=False,
         packages=["cryptography"],
     )

From 3a375e87b64b8fb51370bfec8f2cfdbcd8fe960a Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Wed, 22 Nov 2023 16:52:22 -0800
Subject: [PATCH 14/26] docs: add examples for dataframe.min, dataframe.max and
 dataframe.sum (#227)

* docs: add examples for dataframe.min, dataframe.max and dataframe.sum

* update spacing
---
 .../bigframes_vendored/pandas/core/frame.py   | 87 ++++++++++++++++++-
 1 file changed, 84 insertions(+), 3 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index b771be3041..e41ac905aa 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -2597,7 +2597,7 @@ def any(self, *, axis=0, bool_only: bool = False):
             <BLANKLINE>
             [2 rows x 2 columns]
 
-        Checking if each column contains at least one True element(the default behavior without an explicit axis parameter).
+        Checking if each column contains at least one True element (the default behavior without an explicit axis parameter).
 
             >>> df.any()
             A     True
@@ -2644,7 +2644,7 @@ def all(self, axis=0, *, bool_only: bool = False):
             <BLANKLINE>
             [2 rows x 2 columns]
 
-        Checking if all values in each column are True(the default behavior without an explicit axis parameter).
+        Checking if all values in each column are True (the default behavior without an explicit axis parameter).
 
             >>> df.all()
             A     True
@@ -2688,7 +2688,7 @@ def prod(self, axis=0, *, numeric_only: bool = False):
             <BLANKLINE>
             [3 rows x 2 columns]
 
-        Calculating the product of each column(the default behavior without an explicit axis parameter).
+        Calculating the product of each column (the default behavior without an explicit axis parameter).
 
             >>> df.prod()
             A        6.0
@@ -2721,6 +2721,33 @@ def min(self, axis=0, *, numeric_only: bool = False):
         If you want the *index* of the minimum, use ``idxmin``. This is the
         equivalent of the ``numpy.ndarray`` method ``argmin``.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]})
+            >>> df
+                A	B
+            0	1	2
+            1	3	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Finding the minimum value in each column (the default behavior without an explicit axis parameter).
+
+            >>> df.min()
+            A    1.0
+            B    2.0
+            dtype: Float64
+
+        Finding the minimum value in each row.
+
+            >>> df.min(axis=1)
+            0    1.0
+            1    3.0
+            dtype: Float64
+
         Args:
             axis ({index (0), columns (1)}):
                 Axis for the function to be applied on.
@@ -2739,6 +2766,33 @@ def max(self, axis=0, *, numeric_only: bool = False):
         If you want the *index* of the maximum, use ``idxmax``. This is
         the equivalent of the ``numpy.ndarray`` method ``argmax``.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]})
+            >>> df
+                A	B
+            0	1	2
+            1	3	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Finding the maximum value in each column (the default behavior without an explicit axis parameter).
+
+            >>> df.max()
+            A    3.0
+            B    4.0
+            dtype: Float64
+
+        Finding the maximum value in each row.
+
+            >>> df.max(axis=1)
+            0    2.0
+            1    4.0
+            dtype: Float64
+
         Args:
             axis ({index (0), columns (1)}):
                 Axis for the function to be applied on.
@@ -2756,6 +2810,33 @@ def sum(self, axis=0, *, numeric_only: bool = False):
 
         This is equivalent to the method ``numpy.sum``.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]})
+            >>> df
+                A	B
+            0	1	2
+            1	3	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Calculating the sum of each column (the default behavior without an explicit axis parameter).
+
+            >>> df.sum()
+            A    4.0
+            B    6.0
+            dtype: Float64
+
+        Calculating the sum of each row.
+
+            >>> df.sum(axis=1)
+            0    3.0
+            1    7.0
+            dtype: Float64
+
         Args:
             axis ({index (0), columns (1)}):
                 Axis for the function to be applied on.

From b62a07a95cd60f995a48825c9874822d0eb02483 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Fri, 24 Nov 2023 00:10:18 +0000
Subject: [PATCH 15/26] docs: code samples for `Series.dot` and `DataFrame.dot`
 (#226)

---
 bigframes/dataframe.py                        |  3 +-
 bigframes/operations/base.py                  |  2 +-
 tests/system/small/test_dataframe.py          | 23 ++++++
 .../bigframes_vendored/pandas/core/frame.py   | 71 +++++++++++++++++++
 .../bigframes_vendored/pandas/core/series.py  | 15 ++++
 5 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 1f1275e217..8567296e29 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2797,7 +2797,8 @@ def get_right_id(id):
         result = result[other_frame.columns]
 
         if isinstance(other, bf_series.Series):
-            result = result[other.name].rename()
+            # There should be exactly one column in the result
+            result = result[result.columns[0]].rename()
 
         return result
 
diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py
index d33befe4da..85ce1dd9e6 100644
--- a/bigframes/operations/base.py
+++ b/bigframes/operations/base.py
@@ -141,7 +141,7 @@ def _apply_binary_op(
         if isinstance(other, pd.Series):
             # TODO: Convert to BigQuery DataFrames series
             raise NotImplementedError(
-                f"Pandas series not supported supported as operand. {constants.FEEDBACK_LINK}"
+                f"Pandas series not supported as operand. {constants.FEEDBACK_LINK}"
             )
         if isinstance(other, series.Series):
             (left, right, block) = self._align(other, how=alignment)
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index a0cf25807c..e25e9ce501 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3493,6 +3493,29 @@ def test_df_dot_operator(
     )
 
 
+def test_df_dot_series_inline():
+    left = [[1, 2, 3], [2, 5, 7]]
+    right = [2, 1, 3]
+
+    bf1 = dataframe.DataFrame(left)
+    bf2 = series.Series(right)
+    bf_result = bf1.dot(bf2).to_pandas()
+
+    df1 = pd.DataFrame(left)
+    df2 = pd.Series(right)
+    pd_result = df1.dot(df2)
+
+    # Patch pandas dtypes for testing parity
+    # Pandas result is int64 instead of Int64 (nullable) dtype.
+    pd_result = pd_result.astype(pd.Int64Dtype())
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_df_dot_series(
     matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df
 ):
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index e41ac905aa..a1aac5d2b5 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3485,6 +3485,77 @@ def dot(self, other):
             The dot method for Series computes the inner product, instead of the
             matrix product here.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> left = bpd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
+            >>> left
+               0  1   2   3
+            0  0  1  -2  -1
+            1  1  1   1   1
+            <BLANKLINE>
+            [2 rows x 4 columns]
+            >>> right = bpd.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]])
+            >>> right
+                0   1
+            0   0   1
+            1   1   2
+            2  -1  -1
+            3   2   0
+            <BLANKLINE>
+            [4 rows x 2 columns]
+            >>> left.dot(right)
+               0  1
+            0  1  4
+            1  2  2
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        You can also use the operator ``@`` for the dot product:
+
+            >>> left @ right
+               0  1
+            0  1  4
+            1  2  2
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        The right input can be a Series, in which case the result will also be a
+        Series:
+
+            >>> right = bpd.Series([1, 2, -1,0])
+            >>> left @ right
+            0    4
+            1    2
+            dtype: Int64
+
+        Any user defined index of the left matrix and columns of the right
+        matrix will reflect in the result.
+
+            >>> left = bpd.DataFrame([[1, 2, 3], [2, 5, 7]], index=["alpha", "beta"])
+            >>> left
+                   0  1  2
+            alpha  1  2  3
+            beta   2  5  7
+            <BLANKLINE>
+            [2 rows x 3 columns]
+            >>> right = bpd.DataFrame([[2, 4, 8], [1, 5, 10], [3, 6, 9]], columns=["red", "green", "blue"])
+            >>> right
+               red  green  blue
+            0    2      4     8
+            1    1      5    10
+            2    3      6     9
+            <BLANKLINE>
+            [3 rows x 3 columns]
+            >>> left.dot(right)
+                   red  green  blue
+            alpha   13     32    55
+            beta    30     75   129
+            <BLANKLINE>
+            [2 rows x 3 columns]
+
         Args:
             other (Series or DataFrame):
                 The other object to compute the matrix product with.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index a86765a412..1b751ed83b 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -631,6 +631,21 @@ def dot(self, other) -> Series | np.ndarray:
             BigQuery Dataframes does not validate this property and will produce
             incorrect results if indices are not equal.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([0, 1, 2, 3])
+            >>> other = bpd.Series([-1, 2, -3, 4])
+            >>> s.dot(other)
+            8
+
+        You can also use the operator ``@`` for the dot product:
+
+            >>> s @ other
+            8
+
         Args:
             other (Series):
                 The other object to compute the dot product with its columns.

From f9c6e727e2b901310bb5301da449d616ea85e135 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Tue, 28 Nov 2023 10:48:21 -0800
Subject: [PATCH 16/26] docs: add examples for dataframe.kurt, dataframe.std,
 dataframe.count (#232)

* docs: add examples for dataframe.kurt, dataframe.std, dataframe.count

* update count example

* update count example

* update examples

* update . to :
---
 .../bigframes_vendored/pandas/core/frame.py   | 96 +++++++++++++++++--
 1 file changed, 87 insertions(+), 9 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index a1aac5d2b5..a7018ed3a2 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -2597,14 +2597,14 @@ def any(self, *, axis=0, bool_only: bool = False):
             <BLANKLINE>
             [2 rows x 2 columns]
 
-        Checking if each column contains at least one True element (the default behavior without an explicit axis parameter).
+        Checking if each column contains at least one True element(the default behavior without an explicit axis parameter):
 
             >>> df.any()
             A     True
             B    False
             dtype: boolean
 
-        Checking if each row contains at least one True element.
+        Checking if each row contains at least one True element:
 
             >>> df.any(axis=1)
             0    True
@@ -2644,14 +2644,14 @@ def all(self, axis=0, *, bool_only: bool = False):
             <BLANKLINE>
             [2 rows x 2 columns]
 
-        Checking if all values in each column are True (the default behavior without an explicit axis parameter).
+        Checking if all values in each column are True(the default behavior without an explicit axis parameter):
 
             >>> df.all()
             A     True
             B    False
             dtype: boolean
 
-        Checking across rows to see if all values are True.
+        Checking across rows to see if all values are True:
 
             >>> df.all(axis=1)
             0    False
@@ -2688,14 +2688,14 @@ def prod(self, axis=0, *, numeric_only: bool = False):
             <BLANKLINE>
             [3 rows x 2 columns]
 
-        Calculating the product of each column (the default behavior without an explicit axis parameter).
+        Calculating the product of each column(the default behavior without an explicit axis parameter):
 
             >>> df.prod()
             A        6.0
             B    160.875
             dtype: Float64
 
-        Calculating the product of each row.
+        Calculating the product of each row:
 
             >>> df.prod(axis=1)
             0     4.5
@@ -2911,11 +2911,37 @@ def skew(self, *, numeric_only: bool = False):
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def kurt(self, *, numeric_only: bool = False):
-        """Return unbiased kurtosis over requested axis.
+        """Return unbiased kurtosis over columns.
 
         Kurtosis obtained using Fisher's definition of
         kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 2, 3, 4, 5],
+            ...                     "B": [3, 4, 3, 2, 1],
+            ...                     "C": [2, 2, 3, 2, 2]})
+            >>> df
+                A	B	C
+            0	1	3	2
+            1	2	4	2
+            2	3	3	3
+            3	4	2	2
+            4	5	1	2
+            <BLANKLINE>
+            [5 rows x 3 columns]
+
+        Calculating the kurtosis value of each column:
+
+            >>> df.kurt()
+            A        -1.2
+            B   -0.177515
+            C         5.0
+            dtype: Float64
+
         Args:
             numeric_only (bool, default False):
                 Include only float, int, boolean columns.
@@ -2926,10 +2952,36 @@ def kurt(self, *, numeric_only: bool = False):
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def std(self, *, numeric_only: bool = False):
-        """Return sample standard deviation over requested axis.
+        """Return sample standard deviation over columns.
 
         Normalized by N-1 by default.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 2, 3, 4, 5],
+            ...                     "B": [3, 4, 3, 2, 1],
+            ...                     "C": [2, 2, 3, 2, 2]})
+            >>> df
+                A	B	C
+            0	1	3	2
+            1	2	4	2
+            2	3	3	3
+            3	4	2	2
+            4	5	1	2
+            <BLANKLINE>
+            [5 rows x 3 columns]
+
+        Calculating the standard deviation of each column:
+
+            >>> df.std()
+            A    1.581139
+            B    1.140175
+            C    0.447214
+            dtype: Float64
+
         Args:
             numeric_only (bool. default False):
                 Default False. Include only float, int, boolean columns.
@@ -2941,11 +2993,37 @@ def std(self, *, numeric_only: bool = False):
 
     def count(self, *, numeric_only: bool = False):
         """
-        Count non-NA cells for each column or row.
+        Count non-NA cells for each column.
 
         The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending
         on `pandas.options.mode.use_inf_as_na`) are considered NA.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, None, 3, 4, 5],
+            ...                     "B": [1, 2, 3, 4, 5],
+            ...                     "C": [None, 3.5, None, 4.5, 5.0]})
+            >>> df
+                   A	B	   C
+            0	 1.0	1	<NA>
+            1	<NA>	2	 3.5
+            2	 3.0	3	<NA>
+            3	 4.0	4	 4.5
+            4	 5.0	5	 5.0
+            <BLANKLINE>
+            [5 rows x 3 columns]
+
+        Counting non-NA values for each column:
+
+            >>> df.count()
+            A    4.0
+            B    5.0
+            C    3.0
+            dtype: Float64
+
         Args:
             numeric_only (bool, default False):
                 Include only `float`, `int` or `boolean` data.

From edd0522747eadb74780124fb18ed7face251441d Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Tue, 28 Nov 2023 11:40:28 -0800
Subject: [PATCH 17/26] =?UTF-8?q?docs:=20add=20examples=20for=20dataframe.?=
 =?UTF-8?q?mean,=20dataframe.median,=20dataframe.va=E2=80=A6=20(#228)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* docs: add examples for dataframe.mean, dataframe.median, dataframe.var and dataframe.skew

* column to columns

* update var example
---
 .../bigframes_vendored/pandas/core/frame.py   | 105 +++++++++++++++++-
 1 file changed, 103 insertions(+), 2 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index a7018ed3a2..2a8972f2e5 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -2852,6 +2852,33 @@ def sum(self, axis=0, *, numeric_only: bool = False):
     def mean(self, axis=0, *, numeric_only: bool = False):
         """Return the mean of the values over the requested axis.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]})
+            >>> df
+                A	B
+            0	1	2
+            1	3	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Calculating the mean of each column (the default behavior without an explicit axis parameter).
+
+            >>> df.mean()
+            A    2.0
+            B    3.0
+            dtype: Float64
+
+        Calculating the mean of each row.
+
+            >>> df.mean(axis=1)
+            0    1.5
+            1    3.5
+            dtype: Float64
+
         Args:
             axis ({index (0), columns (1)}):
                 Axis for the function to be applied on.
@@ -2865,7 +2892,27 @@ def mean(self, axis=0, *, numeric_only: bool = False):
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def median(self, *, numeric_only: bool = False, exact: bool = False):
-        """Return the median of the values over the requested axis.
+        """Return the median of the values over colunms.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]})
+            >>> df
+                A	B
+            0	1	2
+            1	3	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Finding the median value of each column.
+
+            >>> df.median()
+            A    1.0
+            B    2.0
+            dtype: Float64
 
         Args:
             numeric_only (bool. default False):
@@ -2884,6 +2931,34 @@ def var(self, axis=0, *, numeric_only: bool = False):
 
         Normalized by N-1 by default.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]})
+            >>> df
+                A	B
+            0	1	2
+            1	3	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Calculating the variance of each column (the default behavior without an explicit axis parameter).
+
+            >>> df.var()
+            A    2.0
+            B    2.0
+            dtype: Float64
+
+        Calculating the variance of each row.
+
+            >>> df.var(axis=1)
+            0    0.5
+            1    0.5
+            dtype: Float64
+
+
         Args:
             axis ({index (0), columns (1)}):
                 Axis for the function to be applied on.
@@ -2897,10 +2972,36 @@ def var(self, axis=0, *, numeric_only: bool = False):
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def skew(self, *, numeric_only: bool = False):
-        """Return unbiased skew over requested axis.
+        """Return unbiased skew over columns.
 
         Normalized by N-1.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'A': [1, 2, 3, 4, 5],
+            ...                    'B': [5, 4, 3, 2, 1],
+            ...                    'C': [2, 2, 3, 2, 2]})
+            >>> df
+                A	B	C
+            0	1	5	2
+            1	2	4	2
+            2	3	3	3
+            3	4	2	2
+            4	5	1	2
+            <BLANKLINE>
+            [5 rows x 3 columns]
+
+        Calculating the skewness of each column.
+
+            >>> df.skew()
+            A         0.0
+            B         0.0
+            C    2.236068
+            dtype: Float64
+
         Args:
             numeric_only (bool, default False):
                 Include only float, int, boolean columns.

From ae03756f5ee45e0e74e0c0bdd4777e018eba2273 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Tue, 28 Nov 2023 12:53:51 -0800
Subject: [PATCH 18/26] fix: make to_pandas override enable_downsampling when
 sampling_method is manually set. (#200)

* fix: make to_pandas override enable_downsampling when sampling_method is manually set.

* fix: make to_pandas override enable_downsampling when sampling_method is manually set.

* fix: make to_pandas override enable_downsampling when sampling_method is manually set.
---
 bigframes/core/blocks.py             | 41 ++++++++++++++++------------
 tests/system/small/test_dataframe.py | 11 ++++++++
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index f1113d938e..34913872e7 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -389,23 +389,6 @@ def to_pandas(
         ordered: bool = True,
     ) -> Tuple[pd.DataFrame, bigquery.QueryJob]:
         """Run query and download results as a pandas DataFrame."""
-        if max_download_size is None:
-            max_download_size = bigframes.options.sampling.max_download_size
-        if sampling_method is None:
-            sampling_method = (
-                bigframes.options.sampling.sampling_method
-                if bigframes.options.sampling.sampling_method is not None
-                else _UNIFORM
-            )
-        if random_state is None:
-            random_state = bigframes.options.sampling.random_state
-
-        sampling_method = sampling_method.lower()
-        if sampling_method not in _SAMPLING_METHODS:
-            raise NotImplementedError(
-                f"The downsampling method {sampling_method} is not implemented, "
-                f"please choose from {','.join(_SAMPLING_METHODS)}."
-            )
 
         df, _, query_job = self._compute_and_count(
             value_keys=value_keys,
@@ -453,6 +436,28 @@ def _compute_and_count(
     ) -> Tuple[pd.DataFrame, int, bigquery.QueryJob]:
         """Run query and download results as a pandas DataFrame. Return the total number of results as well."""
         # TODO(swast): Allow for dry run and timeout.
+        enable_downsampling = (
+            True
+            if sampling_method is not None
+            else bigframes.options.sampling.enable_downsampling
+        )
+
+        max_download_size = (
+            max_download_size or bigframes.options.sampling.max_download_size
+        )
+
+        random_state = random_state or bigframes.options.sampling.random_state
+
+        if sampling_method is None:
+            sampling_method = bigframes.options.sampling.sampling_method or _UNIFORM
+        sampling_method = sampling_method.lower()
+
+        if sampling_method not in _SAMPLING_METHODS:
+            raise NotImplementedError(
+                f"The downsampling method {sampling_method} is not implemented, "
+                f"please choose from {','.join(_SAMPLING_METHODS)}."
+            )
+
         expr = self._apply_value_keys_to_expr(value_keys=value_keys)
 
         results_iterator, query_job = expr.start_query(
@@ -469,7 +474,7 @@ def _compute_and_count(
         )
 
         if fraction < 1:
-            if not bigframes.options.sampling.enable_downsampling:
+            if not enable_downsampling:
                 raise RuntimeError(
                     f"The data size ({table_size:.2f} MB) exceeds the maximum download limit of "
                     f"{max_download_size} MB. You can:\n\t* Enable downsampling in global options:\n"
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index e25e9ce501..9b9567418b 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3546,3 +3546,14 @@ def test_df_dot_operator_series(
         bf_result,
         pd_result,
     )
+
+
+def test_to_pandas_downsampling_option_override(session):
+    df = session.read_gbq("bigframes-dev.bigframes_tests_sys.batting")
+    download_size = 1
+
+    df = df.to_pandas(max_download_size=download_size, sampling_method="head")
+
+    total_memory_bytes = df.memory_usage(deep=True).sum()
+    total_memory_mb = total_memory_bytes / (1024 * 1024)
+    assert total_memory_mb == pytest.approx(download_size, rel=0.3)

From 9d6613d318b558722b7bab12773efdea4bbe9931 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Tue, 28 Nov 2023 13:46:18 -0800
Subject: [PATCH 19/26] feat: add info and memory_usage methods to dataframe
 (#219)

---
 .pre-commit-config.yaml                       |  2 +-
 bigframes/_config/display_options.py          |  4 +
 bigframes/core/indexes/index.py               | 13 ++-
 bigframes/dataframe.py                        | 84 +++++++++++++++++++
 bigframes/dtypes.py                           | 13 +++
 noxfile.py                                    |  1 +
 setup.py                                      |  1 +
 tests/system/small/test_dataframe.py          | 42 ++++++++++
 .../pandas/core/config_init.py                | 11 +++
 .../bigframes_vendored/pandas/core/frame.py   | 66 +++++++++++++++
 10 files changed, 235 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6e0fd8b98f..517176da89 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,4 +38,4 @@ repos:
     rev: v1.1.1
     hooks:
     -   id: mypy
-        additional_dependencies: [types-requests]
+        additional_dependencies: [types-requests, types-tabulate]
diff --git a/bigframes/_config/display_options.py b/bigframes/_config/display_options.py
index ad3ea3f68c..afa36aa84c 100644
--- a/bigframes/_config/display_options.py
+++ b/bigframes/_config/display_options.py
@@ -32,6 +32,10 @@ class DisplayOptions:
     progress_bar: Optional[str] = "auto"
     repr_mode: Literal["head", "deferred"] = "head"
 
+    max_info_columns: int = 100
+    max_info_rows: Optional[int] = 200000
+    memory_usage: bool = True
+
 
 @contextlib.contextmanager
 def pandas_repr(display_options: DisplayOptions):
diff --git a/bigframes/core/indexes/index.py b/bigframes/core/indexes/index.py
index 6c66c36062..fc7cf167d4 100644
--- a/bigframes/core/indexes/index.py
+++ b/bigframes/core/indexes/index.py
@@ -155,6 +155,14 @@ def _block(self) -> blocks.Block:
     def T(self) -> Index:
         return self.transpose()
 
+    def _memory_usage(self) -> int:
+        (n_rows,) = self.shape
+        return sum(
+            self.dtypes.map(
+                lambda dtype: bigframes.dtypes.DTYPE_BYTE_SIZES.get(dtype, 8) * n_rows
+            )
+        )
+
     def transpose(self) -> Index:
         return self
 
@@ -326,7 +334,10 @@ def _apply_aggregation(self, op: agg_ops.AggregateOp) -> typing.Any:
 
     def __getitem__(self, key: int) -> typing.Any:
         if isinstance(key, int):
-            result_pd_df, _ = self._block.slice(key, key + 1, 1).to_pandas()
+            if key != -1:
+                result_pd_df, _ = self._block.slice(key, key + 1, 1).to_pandas()
+            else:  # special case, want [-1:] instead of [-1:0]
+                result_pd_df, _ = self._block.slice(key).to_pandas()
             if result_pd_df.empty:
                 raise IndexError("single positional indexer is out-of-bounds")
             return result_pd_df.index[0]
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 8567296e29..f7796291b9 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -18,6 +18,7 @@
 
 import datetime
 import re
+import sys
 import textwrap
 import typing
 from typing import (
@@ -36,6 +37,7 @@
 import google.cloud.bigquery as bigquery
 import numpy
 import pandas
+import tabulate
 
 import bigframes
 import bigframes._config.display_options as display_options
@@ -350,6 +352,88 @@ def query_job(self) -> Optional[bigquery.QueryJob]:
             self._set_internal_query_job(self._compute_dry_run())
         return self._query_job
 
+    def memory_usage(self, index: bool = True):
+        n_rows, _ = self.shape
+        # like pandas, treat all variable-size objects as just 8-byte pointers, ignoring actual object
+        column_sizes = self.dtypes.map(
+            lambda dtype: bigframes.dtypes.DTYPE_BYTE_SIZES.get(dtype, 8) * n_rows
+        )
+        if index:
+            index_size = pandas.Series([self.index._memory_usage()], index=["Index"])
+            column_sizes = pandas.concat([index_size, column_sizes])
+        return column_sizes
+
+    def info(
+        self,
+        verbose: Optional[bool] = None,
+        buf=None,
+        max_cols: Optional[int] = None,
+        memory_usage: Optional[bool] = None,
+        show_counts: Optional[bool] = None,
+    ):
+        obuf = buf or sys.stdout
+
+        n_rows, n_columns = self.shape
+
+        max_cols = (
+            max_cols
+            if max_cols is not None
+            else bigframes.options.display.max_info_columns
+        )
+
+        show_all_columns = verbose if verbose is not None else (n_columns < max_cols)
+
+        obuf.write(f"{type(self)}\n")
+
+        index_type = "MultiIndex" if self.index.nlevels > 1 else "Index"
+
+        # These accessses are kind of expensive, maybe should try to skip?
+        first_indice = self.index[0]
+        last_indice = self.index[-1]
+        obuf.write(f"{index_type}: {n_rows} entries, {first_indice} to {last_indice}\n")
+
+        dtype_strings = self.dtypes.astype("string")
+        if show_all_columns:
+            obuf.write(f"Data columns (total {n_columns} columns):\n")
+            column_info = self.columns.to_frame(name="Column")
+
+            max_rows = bigframes.options.display.max_info_rows
+            too_many_rows = n_rows > max_rows if max_rows is not None else False
+
+            if show_counts if show_counts is not None else (not too_many_rows):
+                non_null_counts = self.count().to_pandas()
+                column_info["Non-Null Count"] = non_null_counts.map(
+                    lambda x: f"{int(x)} non-null"
+                )
+
+            column_info["Dtype"] = dtype_strings
+
+            column_info = column_info.reset_index(drop=True)
+            column_info.index.name = "#"
+
+            column_info_formatted = tabulate.tabulate(column_info, headers="keys")  # type: ignore
+            obuf.write(column_info_formatted)
+            obuf.write("\n")
+
+        else:  # Just number of columns and first, last
+            obuf.write(
+                f"Columns: {n_columns} entries, {self.columns[0]} to {self.columns[-1]}\n"
+            )
+        dtype_counts = dtype_strings.value_counts().sort_index(ascending=True).items()
+        dtype_counts_formatted = ", ".join(
+            f"{dtype}({count})" for dtype, count in dtype_counts
+        )
+        obuf.write(f"dtypes: {dtype_counts_formatted}\n")
+
+        show_memory = (
+            memory_usage
+            if memory_usage is not None
+            else bigframes.options.display.memory_usage
+        )
+        if show_memory:
+            # TODO: Convert to different units (kb, mb, etc.)
+            obuf.write(f"memory usage: {self.memory_usage().sum()} bytes\n")
+
     def _set_internal_query_job(self, query_job: bigquery.QueryJob):
         self._query_job = query_job
 
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index cd35e380c0..774eb74d06 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -143,6 +143,19 @@
 # "string" and "string[pyarrow] are accepted"
 BIGFRAMES_STRING_TO_BIGFRAMES["string[pyarrow]"] = pd.StringDtype(storage="pyarrow")
 
+# For the purposes of dataframe.memory_usage
+# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data_type_sizes
+DTYPE_BYTE_SIZES = {
+    pd.BooleanDtype(): 1,
+    pd.Int64Dtype(): 8,
+    pd.Float32Dtype(): 8,
+    pd.StringDtype(): 8,
+    pd.ArrowDtype(pa.time64("us")): 8,
+    pd.ArrowDtype(pa.timestamp("us")): 8,
+    pd.ArrowDtype(pa.timestamp("us", tz="UTC")): 8,
+    pd.ArrowDtype(pa.date32()): 8,
+}
+
 
 def ibis_dtype_to_bigframes_dtype(
     ibis_dtype: ibis_dtypes.DataType,
diff --git a/noxfile.py b/noxfile.py
index 8d6d641fc1..c1fb53f794 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -228,6 +228,7 @@ def mypy(session):
                 "types-python-dateutil",
                 "types-requests",
                 "types-setuptools",
+                "types-tabulate",
             ]
         )
         | set(SYSTEM_TEST_STANDARD_DEPENDENCIES)
diff --git a/setup.py b/setup.py
index 29eacb74a9..abf165b3df 100644
--- a/setup.py
+++ b/setup.py
@@ -50,6 +50,7 @@
     "requests >=2.27.1",
     "scikit-learn >=1.2.2",
     "sqlalchemy >=1.4,<3.0dev",
+    "tabulate >= 0.9",
     "ipywidgets >=7.7.1",
     "humanize >= 4.6.0",
 ]
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 9b9567418b..9744d3f6e9 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import io
 import operator
 import tempfile
 import typing
@@ -255,6 +256,47 @@ def test_drop_with_custom_column_labels(scalars_dfs):
     assert_pandas_df_equal(bf_result, pd_result)
 
 
+def test_df_memory_usage(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.memory_usage()
+    bf_result = scalars_df.memory_usage()
+
+    pd.testing.assert_series_equal(pd_result, bf_result, rtol=1.5)
+
+
+def test_df_info(scalars_dfs):
+    expected = (
+        "<class 'bigframes.dataframe.DataFrame'>\n"
+        "Index: 9 entries, 0 to 8\n"
+        "Data columns (total 13 columns):\n"
+        "  #  Column         Non-Null Count    Dtype\n"
+        "---  -------------  ----------------  ------------------------------\n"
+        "  0  bool_col       8 non-null        boolean\n"
+        "  1  bytes_col      6 non-null        object\n"
+        "  2  date_col       7 non-null        date32[day][pyarrow]\n"
+        "  3  datetime_col   6 non-null        timestamp[us][pyarrow]\n"
+        "  4  geography_col  4 non-null        geometry\n"
+        "  5  int64_col      8 non-null        Int64\n"
+        "  6  int64_too      9 non-null        Int64\n"
+        "  7  numeric_col    6 non-null        object\n"
+        "  8  float64_col    7 non-null        Float64\n"
+        "  9  rowindex_2     9 non-null        Int64\n"
+        " 10  string_col     8 non-null        string\n"
+        " 11  time_col       6 non-null        time64[us][pyarrow]\n"
+        " 12  timestamp_col  6 non-null        timestamp[us, tz=UTC][pyarrow]\n"
+        "dtypes: Float64(1), Int64(3), boolean(1), date32[day][pyarrow](1), geometry(1), object(2), string(1), time64[us][pyarrow](1), timestamp[us, tz=UTC][pyarrow](1), timestamp[us][pyarrow](1)\n"
+        "memory usage: 945 bytes\n"
+    )
+
+    scalars_df, _ = scalars_dfs
+    bf_result = io.StringIO()
+
+    scalars_df.info(buf=bf_result)
+
+    assert expected == bf_result.getvalue()
+
+
 def test_drop_index(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
 
diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py
index 198654015e..dfb91dfeb8 100644
--- a/third_party/bigframes_vendored/pandas/core/config_init.py
+++ b/third_party/bigframes_vendored/pandas/core/config_init.py
@@ -33,6 +33,17 @@
             Instead estimated bytes processed will be shown. Dataframe and Series
             objects can still be computed with methods that explicitly execute and
             download results.
+    max_info_columns (int):
+        max_info_columns is used in DataFrame.info method to decide if
+        per column information will be printed.
+    max_info_rows (int or None):
+        df.info() will usually show null-counts for each column.
+        For large frames this can be quite slow. max_info_rows and max_info_cols
+        limit this null check only to frames with smaller dimensions than
+        specified.
+    memory_usage (bool):
+        This specifies if the memory usage of a DataFrame should be displayed when
+        df.info() is called. Valid values True,False,
 """
 
 sampling_options_doc = """
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 2a8972f2e5..099d8b8e66 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -92,6 +92,72 @@ def values(self) -> np.ndarray:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def info(
+        self,
+        verbose: bool | None = None,
+        buf=None,
+        max_cols: int | None = None,
+        memory_usage: bool | None = None,
+        show_counts: bool | None = None,
+    ) -> None:
+        """
+        Print a concise summary of a DataFrame.
+
+        This method prints information about a DataFrame including
+        the index dtypeand columns, non-null values and memory usage.
+
+        Args:
+            verbose (bool, optional):
+                Whether to print the full summary. By default, the setting in
+                ``pandas.options.display.max_info_columns`` is followed.
+            buf (writable buffer, defaults to sys.stdout):
+                Where to send the output. By default, the output is printed to
+                sys.stdout. Pass a writable buffer if you need to further process
+                the output.
+            max_cols (int, optional):
+                When to switch from the verbose to the truncated output. If the
+                DataFrame has more than `max_cols` columns, the truncated output
+                is used. By default, the setting in
+                ``pandas.options.display.max_info_columns`` is used.
+            memory_usage (bool, optional):
+                Specifies whether total memory usage of the DataFrame
+                elements (including the index) should be displayed. By default,
+                this follows the ``pandas.options.display.memory_usage`` setting.
+                True always show memory usage. False never shows memory usage.
+                Memory estimation is made based in column dtype and number of rows
+                assuming values consume the same memory amount for corresponding dtypes.
+            show_counts (bool, optional):
+                Whether to show the non-null counts. By default, this is shown
+                only if the DataFrame is smaller than
+                ``pandas.options.display.max_info_rows`` and
+                ``pandas.options.display.max_info_columns``. A value of True always
+                shows the counts, and False never shows the counts.
+
+        Returns:
+            None: This method prints a summary of a DataFrame and returns None."""
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def memory_usage(self, index: bool = True):
+        """
+        Return the memory usage of each column in bytes.
+
+        The memory usage can optionally include the contribution of
+        the index and elements of `object` dtype.
+
+        This value is displayed in `DataFrame.info` by default. This can be
+        suppressed by setting ``pandas.options.display.memory_usage`` to False.
+
+        Args:
+            index (bool, default True):
+                Specifies whether to include the memory usage of the DataFrame's
+                index in returned Series. If ``index=True``, the memory usage of
+                the index is the first item in the output.
+
+        Returns:
+            Series: A Series whose index is the original column names and whose values is the memory usage of each column in bytes.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     # ----------------------------------------------------------------------
     # IO methods (to / from other formats)
     def to_numpy(self, dtype=None, copy=False, na_value=None, **kwargs) -> np.ndarray:

From 69b016eae7ea97d84ceeb22ba09f5472841db072 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 28 Nov 2023 23:42:58 +0000
Subject: [PATCH 20/26] fix: use anonymous dataset to create `remote_function`
 (#205)

* fix: use anonymous dataset to create `remote_function`

* update README about anonymous dataset instead of bigframes_temp_location

* remove dataset creation step from remote function

This is because now the dataset is an anonymous dataset that must have
been created previously as part of bigframes session creation.

* restore create_dataset, guarded by get_dataset
---
 README.rst                                 |  7 ++--
 bigframes/remote_function.py               | 19 ++++++---
 bigframes/session/__init__.py              | 14 -------
 tests/system/large/test_remote_function.py | 47 +++++++++++++++++++++-
 tests/system/small/test_remote_function.py | 37 +++++------------
 5 files changed, 73 insertions(+), 51 deletions(-)

diff --git a/README.rst b/README.rst
index 5ddb4a7639..91dac12751 100644
--- a/README.rst
+++ b/README.rst
@@ -267,10 +267,9 @@ definition. To view and manage connections, do the following:
 3. In the Explorer pane, expand that project and then expand External connections.
 
 BigQuery remote functions are created in the dataset you specify, or
-in a dataset with the name ``bigframes_temp_location``, where location is
-the location used by the BigQuery DataFrames session. For example,
-``bigframes_temp_us_central1``. To view and manage remote functions, do
-the following:
+in a special type of `hidden dataset <https://cloud.google.com/bigquery/docs/datasets#hidden_datasets>`__
+referred to as an anonymous dataset. To view and manage remote functions created
+in a user provided dataset, do the following:
 
 1. Go to `BigQuery in the Google Cloud Console <https://console.cloud.google.com/bigquery>`__.
 2. Select the project in which you created the remote function.
diff --git a/bigframes/remote_function.py b/bigframes/remote_function.py
index a39cd033f6..7280ac7d42 100644
--- a/bigframes/remote_function.py
+++ b/bigframes/remote_function.py
@@ -188,6 +188,7 @@ def create_bq_remote_function(
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_remote_function_2
         bq_function_args = []
         bq_function_return_type = BigQueryType.from_ibis(output_type)
+
         # We are expecting the input type annotations to be 1:1 with the input args
         for idx, name in enumerate(input_args):
             bq_function_args.append(
@@ -204,14 +205,22 @@ def create_bq_remote_function(
 
         logger.info(f"Creating BQ remote function: {create_function_ddl}")
 
-        # Make sure the dataset exists
+        # Make sure the dataset exists. I.e. if it doesn't exist, go ahead and
+        # create it
         dataset = bigquery.Dataset(
             bigquery.DatasetReference.from_string(
                 self._bq_dataset, default_project=self._gcp_project_id
             )
         )
         dataset.location = self._bq_location
-        self._bq_client.create_dataset(dataset, exists_ok=True)
+        try:
+            # This check does not require bigquery.datasets.create IAM
+            # permission. So, if the data set already exists, then user can work
+            # without having that permission.
+            self._bq_client.get_dataset(dataset)
+        except google.api_core.exceptions.NotFound:
+            # This requires bigquery.datasets.create IAM permission
+            self._bq_client.create_dataset(dataset, exists_ok=True)
 
         # TODO: Use session._start_query() so we get progress bar
         query_job = self._bq_client.query(create_function_ddl)  # Make an API request.
@@ -610,7 +619,7 @@ def get_routine_reference(
             raise DatasetMissingError
 
         dataset_ref = bigquery.DatasetReference(
-            bigquery_client.project, session._session_dataset_id
+            bigquery_client.project, session._anonymous_dataset.dataset_id
         )
         return dataset_ref.routine(routine_ref_str)
 
@@ -778,9 +787,7 @@ def remote_function(
             dataset, default_project=bigquery_client.project
         )
     else:
-        dataset_ref = bigquery.DatasetReference.from_string(
-            session._session_dataset_id, default_project=bigquery_client.project
-        )
+        dataset_ref = session._anonymous_dataset
 
     bq_location, cloud_function_region = get_remote_function_locations(
         bigquery_client.location
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 928123ce74..d2f6137883 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -198,13 +198,6 @@ def cloudfunctionsclient(self):
     def resourcemanagerclient(self):
         return self._clients_provider.resourcemanagerclient
 
-    @property
-    def _session_dataset_id(self):
-        """A dataset for storing temporary objects local to the session
-        This is a workaround for remote functions that do not
-        yet support session-temporary instances."""
-        return self._session_dataset.dataset_id
-
     @property
     def _project(self):
         return self.bqclient.project
@@ -229,13 +222,6 @@ def _create_bq_datasets(self):
             query_destination.dataset_id,
         )
 
-        # Dataset for storing remote functions, which don't yet
-        # support proper session temporary storage yet
-        self._session_dataset = bigquery.Dataset(
-            f"{self.bqclient.project}.bigframes_temp_{self._location.lower().replace('-', '_')}"
-        )
-        self._session_dataset.location = self._location
-
     def close(self):
         """No-op. Temporary resources are deleted after 7 days."""
 
diff --git a/tests/system/large/test_remote_function.py b/tests/system/large/test_remote_function.py
index 6ed3e6511a..5cb4df188c 100644
--- a/tests/system/large/test_remote_function.py
+++ b/tests/system/large/test_remote_function.py
@@ -22,7 +22,7 @@
 import textwrap
 
 from google.api_core.exceptions import NotFound, ResourceExhausted
-from google.cloud import functions_v2
+from google.cloud import bigquery, functions_v2
 import pandas
 import pytest
 import test_utils.prefixer
@@ -1210,3 +1210,48 @@ def square(x):
         cleanup_remote_function_assets(
             session.bqclient, session.cloudfunctionsclient, square
         )
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_remote_function_anonymous_dataset(session, scalars_dfs):
+    try:
+        # This usage of remote_function is expected to create the remote
+        # function in the bigframes session's anonymous dataset. Use reuse=False
+        # param to make sure parallel instances of the test don't step over each
+        # other due to the common anonymous dataset.
+        @session.remote_function([int], int, reuse=False)
+        def square(x):
+            return x * x
+
+        assert (
+            bigquery.Routine(square.bigframes_remote_function).dataset_id
+            == session._anonymous_dataset.dataset_id
+        )
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+
+        bf_int64_col = scalars_df["int64_col"]
+        bf_int64_col_filter = bf_int64_col.notnull()
+        bf_int64_col_filtered = bf_int64_col[bf_int64_col_filter]
+        bf_result_col = bf_int64_col_filtered.apply(square)
+        bf_result = (
+            bf_int64_col_filtered.to_frame().assign(result=bf_result_col).to_pandas()
+        )
+
+        pd_int64_col = scalars_pandas_df["int64_col"]
+        pd_int64_col_filter = pd_int64_col.notnull()
+        pd_int64_col_filtered = pd_int64_col[pd_int64_col_filter]
+        pd_result_col = pd_int64_col_filtered.apply(lambda x: x * x)
+        # TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
+        # pd_int64_col_filtered.dtype is Int64Dtype()
+        # pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
+        # For this test let's force the pandas dtype to be same as bigframes' dtype.
+        pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
+        pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
+
+        assert_pandas_df_equal(bf_result, pd_result)
+    finally:
+        # clean up the gcp assets created for the remote function
+        cleanup_remote_function_assets(
+            session.bqclient, session.cloudfunctionsclient, square
+        )
diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py
index 3d8532a13b..960a384126 100644
--- a/tests/system/small/test_remote_function.py
+++ b/tests/system/small/test_remote_function.py
@@ -62,13 +62,12 @@ def bq_cf_connection_location_project_mismatched() -> str:
 
 
 @pytest.fixture(scope="module")
-def session_with_bq_connection_and_permanent_dataset(
+def session_with_bq_connection(
     bq_cf_connection, dataset_id_permanent
 ) -> bigframes.Session:
     session = bigframes.Session(
         bigframes.BigQueryOptions(bq_connection=bq_cf_connection)
     )
-    session._session_dataset = bigquery.Dataset(dataset_id_permanent)
     return session
 
 
@@ -277,13 +276,11 @@ def square(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_remote_function_direct_session_param(
-    session_with_bq_connection_and_permanent_dataset, scalars_dfs
-):
+def test_remote_function_direct_session_param(session_with_bq_connection, scalars_dfs):
     @rf.remote_function(
         [int],
         int,
-        session=session_with_bq_connection_and_permanent_dataset,
+        session=session_with_bq_connection,
     )
     def square(x):
         return x * x
@@ -313,9 +310,7 @@ def square(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_remote_function_via_session_default(
-    session_with_bq_connection_and_permanent_dataset, scalars_dfs
-):
+def test_remote_function_via_session_default(session_with_bq_connection, scalars_dfs):
     # Session has bigquery connection initialized via context. Without an
     # explicit dataset the default dataset from the session would be used.
     # Without an explicit bigquery connection, the one present in Session set
@@ -323,7 +318,7 @@ def test_remote_function_via_session_default(
     # the default behavior of reuse=True will take effect. Please note that the
     # udf is same as the one used in other tests in this file so the underlying
     # cloud function would be common and quickly reused.
-    @session_with_bq_connection_and_permanent_dataset.remote_function([int], int)
+    @session_with_bq_connection.remote_function([int], int)
     def square(x):
         return x * x
 
@@ -391,15 +386,11 @@ def square(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_dataframe_applymap(
-    session_with_bq_connection_and_permanent_dataset, scalars_dfs
-):
+def test_dataframe_applymap(session_with_bq_connection, scalars_dfs):
     def add_one(x):
         return x + 1
 
-    remote_add_one = session_with_bq_connection_and_permanent_dataset.remote_function(
-        [int], int
-    )(add_one)
+    remote_add_one = session_with_bq_connection.remote_function([int], int)(add_one)
 
     scalars_df, scalars_pandas_df = scalars_dfs
     int64_cols = ["int64_col", "int64_too"]
@@ -422,15 +413,11 @@ def add_one(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_dataframe_applymap_na_ignore(
-    session_with_bq_connection_and_permanent_dataset, scalars_dfs
-):
+def test_dataframe_applymap_na_ignore(session_with_bq_connection, scalars_dfs):
     def add_one(x):
         return x + 1
 
-    remote_add_one = session_with_bq_connection_and_permanent_dataset.remote_function(
-        [int], int
-    )(add_one)
+    remote_add_one = session_with_bq_connection.remote_function([int], int)(add_one)
 
     scalars_df, scalars_pandas_df = scalars_dfs
     int64_cols = ["int64_col", "int64_too"]
@@ -451,13 +438,11 @@ def add_one(x):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_series_map(session_with_bq_connection_and_permanent_dataset, scalars_dfs):
+def test_series_map(session_with_bq_connection, scalars_dfs):
     def add_one(x):
         return x + 1
 
-    remote_add_one = session_with_bq_connection_and_permanent_dataset.remote_function(
-        [int], int
-    )(add_one)
+    remote_add_one = session_with_bq_connection.remote_function([int], int)(add_one)
 
     scalars_df, scalars_pandas_df = scalars_dfs
 

From f8917abc094e222e0435891d4d184b77bfe67722 Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Tue, 28 Nov 2023 18:24:16 -0800
Subject: [PATCH 21/26] fix: update the llm+kmeans notebook with recent change
 (#236)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue 313682530 🦕
---
 .../bq_dataframes_llm_kmeans.ipynb            | 47 +++++--------------
 1 file changed, 12 insertions(+), 35 deletions(-)

diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
index 8d75950925..5f74046fc0 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
@@ -366,18 +366,6 @@
         "predicted_embeddings.head() "
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "4H_etYfsEOFP"
-      },
-      "outputs": [],
-      "source": [
-        "# Join the complaints with their embeddings in the same DataFrame\n",
-        "combined_df = downsampled_issues_df.join(predicted_embeddings)"
-      ]
-    },
     {
       "attachments": {},
       "cell_type": "markdown",
@@ -426,30 +414,19 @@
       "outputs": [],
       "source": [
         "# Use KMeans clustering to calculate our groups. Will take ~3 minutes.\n",
-        "cluster_model.fit(combined_df[[\"text_embedding\"]])\n",
-        "clustered_result = cluster_model.predict(combined_df[[\"text_embedding\"]])\n",
+        "cluster_model.fit(predicted_embeddings[[\"text_embedding\"]])\n",
+        "clustered_result = cluster_model.predict(predicted_embeddings)\n",
         "# Notice the CENTROID_ID column, which is the ID number of the group that\n",
         "# each complaint belongs to.\n",
         "clustered_result.head(n=5)"
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Join the group number to the complaints and their text embeddings\n",
-        "combined_clustered_result = combined_df.join(clustered_result)\n",
-        "combined_clustered_result.head(n=5) "
-      ]
-    },
     {
       "attachments": {},
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Our dataframe combined_clustered_result now has three columns: the complaints, their text embeddings, and an ID from 1-10 (inclusive) indicating which semantically similar group they belong to."
+        "Our dataframe combined_clustered_result now has three complaint columns: the content, their text embeddings, and an ID from 1-10 (inclusive) indicating which semantically similar group they belong to."
       ]
     },
     {
@@ -480,14 +457,14 @@
       "source": [
         "# Using bigframes, with syntax identical to pandas,\n",
         "# filter out the first and second groups\n",
-        "cluster_1_result = combined_clustered_result[\n",
-        "    combined_clustered_result[\"CENTROID_ID\"] == 1\n",
-        "][[\"consumer_complaint_narrative\"]]\n",
+        "cluster_1_result = clustered_result[\n",
+        "    clustered_result[\"CENTROID_ID\"] == 1\n",
+        "][[\"content\"]]\n",
         "cluster_1_result_pandas = cluster_1_result.head(5).to_pandas()\n",
         "\n",
-        "cluster_2_result = combined_clustered_result[\n",
-        "    combined_clustered_result[\"CENTROID_ID\"] == 2\n",
-        "][[\"consumer_complaint_narrative\"]]\n",
+        "cluster_2_result = clustered_result[\n",
+        "    clustered_result[\"CENTROID_ID\"] == 2\n",
+        "][[\"content\"]]\n",
         "cluster_2_result_pandas = cluster_2_result.head(5).to_pandas()"
       ]
     },
@@ -503,15 +480,15 @@
         "prompt1 = 'comment list 1:\\n'\n",
         "for i in range(5):\n",
         "    prompt1 += str(i + 1) + '. ' + \\\n",
-        "        cluster_1_result_pandas[\"consumer_complaint_narrative\"].iloc[i] + '\\n'\n",
+        "        cluster_1_result_pandas[\"content\"].iloc[i] + '\\n'\n",
         "\n",
         "prompt2 = 'comment list 2:\\n'\n",
         "for i in range(5):\n",
         "    prompt2 += str(i + 1) + '. ' + \\\n",
-        "        cluster_2_result_pandas[\"consumer_complaint_narrative\"].iloc[i] + '\\n'\n",
+        "        cluster_2_result_pandas[\"content\"].iloc[i] + '\\n'\n",
         "\n",
         "print(prompt1)\n",
-        "print(prompt2)\n"
+        "print(prompt2)"
       ]
     },
     {

From fbc31ab0fb166e6ffab48b3507614fef566ec64c Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Wed, 29 Nov 2023 03:52:20 +0000
Subject: [PATCH 22/26] test: fix prerelease tests (#239)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The heads of google-cloud-bigquery and google-cloud-bigquery-storage packages are installed with -e. This is leading to `from google.cloud import bigquery` looking into the folder of the latter and running into "ImportError: cannot import name 'bigquery' from 'google.cloud' (.../google-cloud-bigquery-storage/google/cloud/__init__.py). Removing -e from google-cloud-bigquery installation gets rid of this error.

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue 313701211 🦕
---
 noxfile.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index c1fb53f794..3b10a37fc7 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -548,7 +548,6 @@ def prerelease(session: nox.sessions.Session, tests_path):
     # Ensure we catch breaking changes in the client libraries early.
     session.install(
         "--upgrade",
-        "-e",
         "git+https://github.com/googleapis/python-bigquery.git#egg=google-cloud-bigquery",
     )
     already_installed.add("google-cloud-bigquery")

From d0d9b84b101eb03c499d85e74dcfc900dedd4137 Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Wed, 29 Nov 2023 09:15:41 -0800
Subject: [PATCH 23/26] fix: add df snapshots lookup for `read_gbq` (#229)

---
 bigframes/pandas/__init__.py                  |  6 ++
 bigframes/session/__init__.py                 | 56 +++++++++++--------
 bigframes/session/_io/bigquery.py             |  5 --
 tests/system/small/test_session.py            | 18 ++++++
 tests/unit/session/test_io_bigquery.py        | 14 -----
 .../bigframes_vendored/pandas/io/gbq.py       |  3 +
 6 files changed, 59 insertions(+), 43 deletions(-)

diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index d35f838366..0c2c1f87aa 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -486,6 +486,7 @@ def read_gbq(
     index_col: Iterable[str] | str = (),
     col_order: Iterable[str] = (),
     max_results: Optional[int] = None,
+    use_cache: bool = True,
 ) -> bigframes.dataframe.DataFrame:
     _set_default_session_location_if_possible(query_or_table)
     return global_session.with_default_session(
@@ -494,6 +495,7 @@ def read_gbq(
         index_col=index_col,
         col_order=col_order,
         max_results=max_results,
+        use_cache=use_cache,
     )
 
 
@@ -516,6 +518,7 @@ def read_gbq_query(
     index_col: Iterable[str] | str = (),
     col_order: Iterable[str] = (),
     max_results: Optional[int] = None,
+    use_cache: bool = True,
 ) -> bigframes.dataframe.DataFrame:
     _set_default_session_location_if_possible(query)
     return global_session.with_default_session(
@@ -524,6 +527,7 @@ def read_gbq_query(
         index_col=index_col,
         col_order=col_order,
         max_results=max_results,
+        use_cache=use_cache,
     )
 
 
@@ -536,6 +540,7 @@ def read_gbq_table(
     index_col: Iterable[str] | str = (),
     col_order: Iterable[str] = (),
     max_results: Optional[int] = None,
+    use_cache: bool = True,
 ) -> bigframes.dataframe.DataFrame:
     _set_default_session_location_if_possible(query)
     return global_session.with_default_session(
@@ -544,6 +549,7 @@ def read_gbq_table(
         index_col=index_col,
         col_order=col_order,
         max_results=max_results,
+        use_cache=use_cache,
     )
 
 
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index d2f6137883..84a6eb5638 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -177,6 +177,7 @@ def __init__(
         # Now that we're starting the session, don't allow the options to be
         # changed.
         context._session_started = True
+        self._df_snapshot: Dict[bigquery.TableReference, datetime.datetime] = {}
 
     @property
     def bqclient(self):
@@ -232,6 +233,7 @@ def read_gbq(
         index_col: Iterable[str] | str = (),
         col_order: Iterable[str] = (),
         max_results: Optional[int] = None,
+        use_cache: bool = True,
         # Add a verify index argument that fails if the index is not unique.
     ) -> dataframe.DataFrame:
         # TODO(b/281571214): Generate prompt to show the progress of read_gbq.
@@ -242,6 +244,7 @@ def read_gbq(
                 col_order=col_order,
                 max_results=max_results,
                 api_name="read_gbq",
+                use_cache=use_cache,
             )
         else:
             # TODO(swast): Query the snapshot table but mark it as a
@@ -253,6 +256,7 @@ def read_gbq(
                 col_order=col_order,
                 max_results=max_results,
                 api_name="read_gbq",
+                use_cache=use_cache,
             )
 
     def _query_to_destination(
@@ -260,6 +264,7 @@ def _query_to_destination(
         query: str,
         index_cols: List[str],
         api_name: str,
+        use_cache: bool = True,
     ) -> Tuple[Optional[bigquery.TableReference], Optional[bigquery.QueryJob]]:
         # If a dry_run indicates this is not a query type job, then don't
         # bother trying to do a CREATE TEMP TABLE ... AS SELECT ... statement.
@@ -284,6 +289,7 @@ def _query_to_destination(
         job_config = bigquery.QueryJobConfig()
         job_config.labels["bigframes-api"] = api_name
         job_config.destination = temp_table
+        job_config.use_query_cache = use_cache
 
         try:
             # Write to temp table to workaround BigQuery 10 GB query results
@@ -305,6 +311,7 @@ def read_gbq_query(
         index_col: Iterable[str] | str = (),
         col_order: Iterable[str] = (),
         max_results: Optional[int] = None,
+        use_cache: bool = True,
     ) -> dataframe.DataFrame:
         """Turn a SQL query into a DataFrame.
 
@@ -362,6 +369,7 @@ def read_gbq_query(
             col_order=col_order,
             max_results=max_results,
             api_name="read_gbq_query",
+            use_cache=use_cache,
         )
 
     def _read_gbq_query(
@@ -372,6 +380,7 @@ def _read_gbq_query(
         col_order: Iterable[str] = (),
         max_results: Optional[int] = None,
         api_name: str = "read_gbq_query",
+        use_cache: bool = True,
     ) -> dataframe.DataFrame:
         if isinstance(index_col, str):
             index_cols = [index_col]
@@ -379,7 +388,10 @@ def _read_gbq_query(
             index_cols = list(index_col)
 
         destination, query_job = self._query_to_destination(
-            query, index_cols, api_name=api_name
+            query,
+            index_cols,
+            api_name=api_name,
+            use_cache=use_cache,
         )
 
         # If there was no destination table, that means the query must have
@@ -403,6 +415,7 @@ def _read_gbq_query(
             index_col=index_cols,
             col_order=col_order,
             max_results=max_results,
+            use_cache=use_cache,
         )
 
     def read_gbq_table(
@@ -412,6 +425,7 @@ def read_gbq_table(
         index_col: Iterable[str] | str = (),
         col_order: Iterable[str] = (),
         max_results: Optional[int] = None,
+        use_cache: bool = True,
     ) -> dataframe.DataFrame:
         """Turn a BigQuery table into a DataFrame.
 
@@ -434,6 +448,7 @@ def read_gbq_table(
             col_order=col_order,
             max_results=max_results,
             api_name="read_gbq_table",
+            use_cache=use_cache,
         )
 
     def _get_snapshot_sql_and_primary_key(
@@ -441,6 +456,7 @@ def _get_snapshot_sql_and_primary_key(
         table_ref: bigquery.table.TableReference,
         *,
         api_name: str,
+        use_cache: bool = True,
     ) -> Tuple[ibis_types.Table, Optional[Sequence[str]]]:
         """Create a read-only Ibis table expression representing a table.
 
@@ -448,19 +464,6 @@ def _get_snapshot_sql_and_primary_key(
         column(s), then return those too so that ordering generation can be
         avoided.
         """
-        if table_ref.dataset_id.upper() == "_SESSION":
-            # _SESSION tables aren't supported by the tables.get REST API.
-            return (
-                self.ibis_client.sql(
-                    f"SELECT * FROM `_SESSION`.`{table_ref.table_id}`"
-                ),
-                None,
-            )
-        table_expression = self.ibis_client.table(
-            table_ref.table_id,
-            database=f"{table_ref.project}.{table_ref.dataset_id}",
-        )
-
         # If there are primary keys defined, the query engine assumes these
         # columns are unique, even if the constraint is not enforced. We make
         # the same assumption and use these columns as the total ordering keys.
@@ -481,14 +484,18 @@ def _get_snapshot_sql_and_primary_key(
 
         job_config = bigquery.QueryJobConfig()
         job_config.labels["bigframes-api"] = api_name
-        current_timestamp = list(
-            self.bqclient.query(
-                "SELECT CURRENT_TIMESTAMP() AS `current_timestamp`",
-                job_config=job_config,
-            ).result()
-        )[0][0]
+        if use_cache and table_ref in self._df_snapshot.keys():
+            snapshot_timestamp = self._df_snapshot[table_ref]
+        else:
+            snapshot_timestamp = list(
+                self.bqclient.query(
+                    "SELECT CURRENT_TIMESTAMP() AS `current_timestamp`",
+                    job_config=job_config,
+                ).result()
+            )[0][0]
+            self._df_snapshot[table_ref] = snapshot_timestamp
         table_expression = self.ibis_client.sql(
-            bigframes_io.create_snapshot_sql(table_ref, current_timestamp)
+            bigframes_io.create_snapshot_sql(table_ref, snapshot_timestamp)
         )
         return table_expression, primary_keys
 
@@ -500,12 +507,11 @@ def _read_gbq_table(
         col_order: Iterable[str] = (),
         max_results: Optional[int] = None,
         api_name: str,
+        use_cache: bool = True,
     ) -> dataframe.DataFrame:
         if max_results and max_results <= 0:
             raise ValueError("`max_results` should be a positive number.")
 
-        # TODO(swast): Can we re-use the temp table from other reads in the
-        # session, if the original table wasn't modified?
         table_ref = bigquery.table.TableReference.from_string(
             query, default_project=self.bqclient.project
         )
@@ -513,7 +519,9 @@ def _read_gbq_table(
         (
             table_expression,
             total_ordering_cols,
-        ) = self._get_snapshot_sql_and_primary_key(table_ref, api_name=api_name)
+        ) = self._get_snapshot_sql_and_primary_key(
+            table_ref, api_name=api_name, use_cache=use_cache
+        )
 
         for key in col_order:
             if key not in table_expression.columns:
diff --git a/bigframes/session/_io/bigquery.py b/bigframes/session/_io/bigquery.py
index dae73301e7..4770f12089 100644
--- a/bigframes/session/_io/bigquery.py
+++ b/bigframes/session/_io/bigquery.py
@@ -117,11 +117,6 @@ def create_snapshot_sql(
     table_ref: bigquery.TableReference, current_timestamp: datetime.datetime
 ) -> str:
     """Query a table via 'time travel' for consistent reads."""
-
-    # If we have a _SESSION table, assume that it's already a copy. Nothing to do here.
-    if table_ref.dataset_id.upper() == "_SESSION":
-        return f"SELECT * FROM `_SESSION`.`{table_ref.table_id}`"
-
     # If we have an anonymous query results table, it can't be modified and
     # there isn't any BigQuery time travel.
     if table_ref.dataset_id.startswith("_"):
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 7cd9f1dd59..26c5093b35 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -16,6 +16,7 @@
 import random
 import tempfile
 import textwrap
+import time
 import typing
 from typing import List
 
@@ -308,6 +309,23 @@ def test_read_gbq_w_script_no_select(session, dataset_id: str):
     assert df["statement_type"][0] == "SCRIPT"
 
 
+def test_read_gbq_twice_with_same_timestamp(session, penguins_table_id):
+    df1 = session.read_gbq(penguins_table_id)
+    time.sleep(1)
+    df2 = session.read_gbq(penguins_table_id)
+    df1.columns = [
+        "species1",
+        "island1",
+        "culmen_length_mm1",
+        "culmen_depth_mm1",
+        "flipper_length_mm1",
+        "body_mass_g1",
+        "sex1",
+    ]
+    df3 = df1.join(df2)
+    assert df3 is not None
+
+
 def test_read_gbq_model(session, penguins_linear_model_name):
     model = session.read_gbq_model(penguins_linear_model_name)
     assert isinstance(model, bigframes.ml.linear_model.LinearRegression)
diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py
index c87835c412..3f3bfbe7d3 100644
--- a/tests/unit/session/test_io_bigquery.py
+++ b/tests/unit/session/test_io_bigquery.py
@@ -147,20 +147,6 @@ def test_create_snapshot_sql_doesnt_timetravel_anonymous_datasets():
     assert "`my-test-project`.`_e8166e0cdb`.`anonbb92cd`" in sql
 
 
-def test_create_snapshot_sql_doesnt_timetravel_session_tables():
-    table_ref = bigquery.TableReference.from_string("my-test-project._session.abcdefg")
-
-    sql = bigframes.session._io.bigquery.create_snapshot_sql(
-        table_ref, datetime.datetime.now(datetime.timezone.utc)
-    )
-
-    # We aren't modifying _SESSION tables, so don't use time travel.
-    assert "SYSTEM_TIME" not in sql
-
-    # Don't need the project ID for _SESSION tables.
-    assert "my-test-project" not in sql
-
-
 def test_create_temp_table_default_expiration():
     """Make sure the created table has an expiration."""
     bqclient = mock.create_autospec(bigquery.Client)
diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py
index 2161310b07..eabb48e600 100644
--- a/third_party/bigframes_vendored/pandas/io/gbq.py
+++ b/third_party/bigframes_vendored/pandas/io/gbq.py
@@ -16,6 +16,7 @@ def read_gbq(
         index_col: Iterable[str] | str = (),
         col_order: Iterable[str] = (),
         max_results: Optional[int] = None,
+        use_cache: bool = True,
     ):
         """Loads a DataFrame from BigQuery.
 
@@ -83,6 +84,8 @@ def read_gbq(
             max_results (Optional[int], default None):
                 If set, limit the maximum number of rows to fetch from the
                 query results.
+            use_cache (bool, default True):
+                Whether to cache the query inputs. Default to True.
 
         Returns:
             bigframes.dataframe.DataFrame: A DataFrame representing results of the query or table.

From 0bfc4fb117686c734d4a2503d5a6de0e64e9f9b9 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Wed, 29 Nov 2023 11:44:16 -0800
Subject: [PATCH 24/26] feat: add remote vertex model support (#237)

b/299356085
---
 bigframes/ml/__init__.py               |   1 +
 bigframes/ml/core.py                   |   8 ++
 bigframes/ml/remote.py                 | 157 +++++++++++++++++++++++++
 bigframes/ml/sql.py                    |  26 +++-
 docs/reference/bigframes.ml/index.rst  |   2 +
 docs/reference/bigframes.ml/remote.rst |   7 ++
 docs/templates/toc.yml                 |   6 +
 tests/system/small/ml/conftest.py      |  41 +++++++
 tests/system/small/ml/test_core.py     |  16 +++
 tests/system/small/ml/test_remote.py   |  33 ++++++
 tests/unit/ml/test_sql.py              |  26 ++++
 11 files changed, 319 insertions(+), 4 deletions(-)
 create mode 100644 bigframes/ml/remote.py
 create mode 100644 docs/reference/bigframes.ml/remote.rst
 create mode 100644 tests/system/small/ml/test_remote.py

diff --git a/bigframes/ml/__init__.py b/bigframes/ml/__init__.py
index 55c8709d8d..b2c62ff961 100644
--- a/bigframes/ml/__init__.py
+++ b/bigframes/ml/__init__.py
@@ -26,4 +26,5 @@
     "llm",
     "forecasting",
     "imported",
+    "remote",
 ]
diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index d8135f7085..5aad77a394 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -294,6 +294,8 @@ def create_remote_model(
         self,
         session: bigframes.Session,
         connection_name: str,
+        input: Mapping[str, str] = {},
+        output: Mapping[str, str] = {},
         options: Mapping[str, Union[str, int, float, Iterable[str]]] = {},
     ) -> BqmlModel:
         """Create a session-temporary BQML remote model with the CREATE OR REPLACE MODEL statement
@@ -301,6 +303,10 @@ def create_remote_model(
         Args:
             connection_name:
                 a BQ connection to talk with Vertex AI, of the format <PROJECT_NUMBER>.<REGION>.<CONNECTION_NAME>. https://cloud.google.com/bigquery/docs/create-cloud-resource-connection
+            input:
+                input schema for general remote models
+            output:
+                output schema for general remote models
             options:
                 a dict of options to configure the model. Generates a BQML OPTIONS clause
 
@@ -311,6 +317,8 @@ def create_remote_model(
         sql = self._model_creation_sql_generator.create_remote_model(
             connection_name=connection_name,
             model_ref=model_ref,
+            input=input,
+            output=output,
             options=options,
         )
 
diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py
new file mode 100644
index 0000000000..d4c34bbd0d
--- /dev/null
+++ b/bigframes/ml/remote.py
@@ -0,0 +1,157 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""BigFrames general remote models."""
+
+from __future__ import annotations
+
+from typing import Mapping, Optional, Union
+import warnings
+
+import bigframes
+from bigframes import clients
+from bigframes.core import log_adapter
+from bigframes.ml import base, core, globals, utils
+import bigframes.pandas as bpd
+
+_SUPPORTED_DTYPES = (
+    "bool",
+    "string",
+    "int64",
+    "float64",
+    "array<bool>",
+    "array<string>",
+    "array<int64>",
+    "array<float64>",
+)
+
+_REMOTE_MODEL_STATUS = "remote_model_status"
+
+
+@log_adapter.class_logger
+class VertexAIModel(base.BaseEstimator):
+    """Remote model from a Vertex AI https endpoint. User must specify https endpoint, input schema and output schema.
+    How to deploy a model in Vertex AI https://cloud.google.com/bigquery/docs/bigquery-ml-remote-model-tutorial#Deploy-Model-on-Vertex-AI.
+
+    Args:
+        endpoint (str):
+            Vertex AI https endpoint.
+        input ({column_name: column_type}):
+            Input schema. Supported types are "bool", "string", "int64", "float64", "array<bool>", "array<string>", "array<int64>", "array<float64>".
+        output ({column_name: column_type}):
+            Output label schema. Supported the same types as the input.
+        session (bigframes.Session or None):
+            BQ session to create the model. If None, use the global default session.
+        connection_name (str or None):
+            Connection to connect with remote service. str of the format <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>.
+            if None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach
+            permission if the connection isn't fully setup.
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        input: Mapping[str, str],
+        output: Mapping[str, str],
+        session: Optional[bigframes.Session] = None,
+        connection_name: Optional[str] = None,
+    ):
+        self.endpoint = endpoint
+        self.input = input
+        self.output = output
+        self.session = session or bpd.get_global_session()
+
+        self._bq_connection_manager = clients.BqConnectionManager(
+            self.session.bqconnectionclient, self.session.resourcemanagerclient
+        )
+        connection_name = connection_name or self.session._bq_connection
+        self.connection_name = self._bq_connection_manager.resolve_full_connection_name(
+            connection_name,
+            default_project=self.session._project,
+            default_location=self.session._location,
+        )
+
+        self._bqml_model_factory = globals.bqml_model_factory()
+        self._bqml_model: core.BqmlModel = self._create_bqml_model()
+
+    def _create_bqml_model(self):
+        # Parse and create connection if needed.
+        if not self.connection_name:
+            raise ValueError(
+                "Must provide connection_name, either in constructor or through session options."
+            )
+        connection_name_parts = self.connection_name.split(".")
+        if len(connection_name_parts) != 3:
+            raise ValueError(
+                f"connection_name must be of the format <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>, got {self.connection_name}."
+            )
+        self._bq_connection_manager.create_bq_connection(
+            project_id=connection_name_parts[0],
+            location=connection_name_parts[1],
+            connection_id=connection_name_parts[2],
+            iam_role="aiplatform.user",
+        )
+
+        options = {
+            "endpoint": self.endpoint,
+        }
+
+        def standardize_type(v: str):
+            v = v.lower()
+            v = v.replace("boolean", "bool")
+
+            if v not in _SUPPORTED_DTYPES:
+                raise ValueError(
+                    f"Data type {v} is not supported. We only support {', '.join(_SUPPORTED_DTYPES)}."
+                )
+
+            return v
+
+        self.input = {k: standardize_type(v) for k, v in self.input.items()}
+        self.output = {k: standardize_type(v) for k, v in self.output.items()}
+
+        return self._bqml_model_factory.create_remote_model(
+            session=self.session,
+            connection_name=self.connection_name,
+            input=self.input,
+            output=self.output,
+            options=options,
+        )
+
+    def predict(
+        self,
+        X: Union[bpd.DataFrame, bpd.Series],
+    ) -> bpd.DataFrame:
+        """Predict the result from the input DataFrame.
+
+        Args:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
+                Input DataFrame or Series, which needs to comply with the input parameter of the model.
+
+        Returns:
+            bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
+        """
+
+        (X,) = utils.convert_to_dataframe(X)
+
+        df = self._bqml_model.predict(X)
+
+        # unlike LLM models, the general remote model status is null for successful runs.
+        if (df[_REMOTE_MODEL_STATUS].notna()).any():
+            warnings.warn(
+                f"Some predictions failed. Check column {_REMOTE_MODEL_STATUS} for detailed status. You may want to filter the failed rows and retry.",
+                RuntimeWarning,
+            )
+
+        return df
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index ab051231fb..1c88eda4ab 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -57,6 +57,12 @@ def build_expressions(self, *expr_sqls: str) -> str:
         indent_str = "  "
         return "\n" + indent_str + f",\n{indent_str}".join(expr_sqls)
 
+    def build_schema(self, **kwargs: str) -> str:
+        """Encode a dict of values into a formatted schema type items for SQL"""
+        indent_str = "  "
+        param_strs = [f"{k} {v}" for k, v in kwargs.items()]
+        return "\n" + indent_str + f",\n{indent_str}".join(param_strs)
+
     def options(self, **kwargs: Union[str, int, float, Iterable[str]]) -> str:
         """Encode the OPTIONS clause for BQML"""
         return f"OPTIONS({self.build_parameters(**kwargs)})"
@@ -65,6 +71,14 @@ def struct_options(self, **kwargs: Union[int, float]) -> str:
         """Encode a BQ STRUCT as options."""
         return f"STRUCT({self.build_structs(**kwargs)})"
 
+    def input(self, **kwargs: str) -> str:
+        """Encode a BQML INPUT clause."""
+        return f"INPUT({self.build_schema(**kwargs)})"
+
+    def output(self, **kwargs: str) -> str:
+        """Encode a BQML OUTPUT clause."""
+        return f"OUTPUT({self.build_schema(**kwargs)})"
+
     # Connection
     def connection(self, conn_name: str) -> str:
         """Encode the REMOTE WITH CONNECTION clause for BQML. conn_name is of the format <PROJECT_NUMBER/PROJECT_ID>.<REGION>.<CONNECTION_NAME>."""
@@ -154,15 +168,19 @@ def create_remote_model(
         self,
         connection_name: str,
         model_ref: google.cloud.bigquery.ModelReference,
+        input: Mapping[str, str] = {},
+        output: Mapping[str, str] = {},
         options: Mapping[str, Union[str, int, float, Iterable[str]]] = {},
     ) -> str:
         """Encode the CREATE OR REPLACE MODEL statement for BQML remote model."""
-        options_sql = self.options(**options)
-
         parts = [f"CREATE OR REPLACE MODEL {self._model_id_sql(model_ref)}"]
+        if input:
+            parts.append(self.input(**input))
+        if output:
+            parts.append(self.output(**output))
         parts.append(self.connection(connection_name))
-        if options_sql:
-            parts.append(options_sql)
+        if options:
+            parts.append(self.options(**options))
         return "\n".join(parts)
 
     def create_imported_model(
diff --git a/docs/reference/bigframes.ml/index.rst b/docs/reference/bigframes.ml/index.rst
index f3cbe1174a..1975d62e6d 100644
--- a/docs/reference/bigframes.ml/index.rst
+++ b/docs/reference/bigframes.ml/index.rst
@@ -30,3 +30,5 @@ API Reference
     pipeline
 
     preprocessing
+
+    remote
diff --git a/docs/reference/bigframes.ml/remote.rst b/docs/reference/bigframes.ml/remote.rst
new file mode 100644
index 0000000000..7827acfe92
--- /dev/null
+++ b/docs/reference/bigframes.ml/remote.rst
@@ -0,0 +1,7 @@
+bigframes.ml.remote
+===================
+
+.. automodule:: bigframes.ml.remote
+    :members:
+    :inherited-members:
+    :undoc-members:
diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml
index 9879721d28..58ac1c0efe 100644
--- a/docs/templates/toc.yml
+++ b/docs/templates/toc.yml
@@ -108,6 +108,12 @@
       - name: PaLM2TextEmbeddingGenerator
         uid: bigframes.ml.llm.PaLM2TextEmbeddingGenerator
       name: llm
+    - items:
+      - name: Overview
+        uid: bigframes.ml.remote
+      - name: VertexAIModel
+        uid: bigframes.ml.remote.VertexAIModel
+      name: remote
     - items:
       - name: metrics
         uid: bigframes.ml.metrics
diff --git a/tests/system/small/ml/conftest.py b/tests/system/small/ml/conftest.py
index c11445b79a..c4a1272e44 100644
--- a/tests/system/small/ml/conftest.py
+++ b/tests/system/small/ml/conftest.py
@@ -29,6 +29,7 @@
     imported,
     linear_model,
     llm,
+    remote,
 )
 
 
@@ -247,6 +248,46 @@ def palm2_embedding_generator_multilingual_model(
     )
 
 
+@pytest.fixture(scope="session")
+def linear_remote_model_params() -> dict:
+    # Pre-deployed endpoint of linear reg model in Vertex.
+    # bigframes-test-linreg2 -> bigframes-test-linreg-endpoint2
+    return {
+        "input": {"culmen_length_mm": "float64"},
+        "output": {"predicted_body_mass_g": "array<float64>"},
+        "endpoint": "https://us-central1-aiplatform.googleapis.com/v1/projects/1084210331973/locations/us-central1/endpoints/3193318217619603456",
+    }
+
+
+@pytest.fixture(scope="session")
+def bqml_linear_remote_model(
+    session, bq_connection, linear_remote_model_params
+) -> core.BqmlModel:
+    options = {
+        "endpoint": linear_remote_model_params["endpoint"],
+    }
+    return globals.bqml_model_factory().create_remote_model(
+        session=session,
+        input=linear_remote_model_params["input"],
+        output=linear_remote_model_params["output"],
+        connection_name=bq_connection,
+        options=options,
+    )
+
+
+@pytest.fixture(scope="session")
+def linear_remote_vertex_model(
+    session, bq_connection, linear_remote_model_params
+) -> remote.VertexAIModel:
+    return remote.VertexAIModel(
+        endpoint=linear_remote_model_params["endpoint"],
+        input=linear_remote_model_params["input"],
+        output=linear_remote_model_params["output"],
+        session=session,
+        connection_name=bq_connection,
+    )
+
+
 @pytest.fixture(scope="session")
 def time_series_bqml_arima_plus_model(
     session, time_series_arima_plus_model_name
diff --git a/tests/system/small/ml/test_core.py b/tests/system/small/ml/test_core.py
index be34a4871c..22cbbb1932 100644
--- a/tests/system/small/ml/test_core.py
+++ b/tests/system/small/ml/test_core.py
@@ -289,6 +289,22 @@ def test_model_predict_with_unnamed_index(
     )
 
 
+def test_remote_model_predict(
+    bqml_linear_remote_model: core.BqmlModel, new_penguins_df
+):
+    predictions = bqml_linear_remote_model.predict(new_penguins_df).to_pandas()
+    expected = pd.DataFrame(
+        {"predicted_body_mass_g": [[3739.54], [3675.79], [3619.54]]},
+        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
+    )
+    pd.testing.assert_frame_equal(
+        predictions[["predicted_body_mass_g"]].sort_index(),
+        expected,
+        check_exact=False,
+        rtol=0.1,
+    )
+
+
 @pytest.mark.flaky(retries=2, delay=120)
 def test_model_generate_text(
     bqml_palm2_text_generator_model: core.BqmlModel, llm_text_df
diff --git a/tests/system/small/ml/test_remote.py b/tests/system/small/ml/test_remote.py
new file mode 100644
index 0000000000..e8eb1c85e8
--- /dev/null
+++ b/tests/system/small/ml/test_remote.py
@@ -0,0 +1,33 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pandas as pd
+
+from bigframes.ml import remote
+
+
+def test_remote_linear_vertex_model_predict(
+    linear_remote_vertex_model: remote.VertexAIModel, new_penguins_df
+):
+    predictions = linear_remote_vertex_model.predict(new_penguins_df).to_pandas()
+    expected = pd.DataFrame(
+        {"predicted_body_mass_g": [[3739.54], [3675.79], [3619.54]]},
+        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
+    )
+    pd.testing.assert_frame_equal(
+        predictions[["predicted_body_mass_g"]].sort_index(),
+        expected,
+        check_exact=False,
+        rtol=0.1,
+    )
diff --git a/tests/unit/ml/test_sql.py b/tests/unit/ml/test_sql.py
index ea16722393..9223058540 100644
--- a/tests/unit/ml/test_sql.py
+++ b/tests/unit/ml/test_sql.py
@@ -190,6 +190,32 @@ def test_create_remote_model_produces_correct_sql(
     )
 
 
+def test_create_remote_model_with_params_produces_correct_sql(
+    model_creation_sql_generator: ml_sql.ModelCreationSqlGenerator,
+):
+    sql = model_creation_sql_generator.create_remote_model(
+        connection_name="my_project.us.my_connection",
+        model_ref=bigquery.ModelReference.from_string(
+            "test-proj._anonXYZ.create_remote_model"
+        ),
+        input={"column1": "int64"},
+        output={"result": "array<float64>"},
+        options={"option_key1": "option_value1", "option_key2": 2},
+    )
+    assert (
+        sql
+        == """CREATE OR REPLACE MODEL `test-proj`.`_anonXYZ`.`create_remote_model`
+INPUT(
+  column1 int64)
+OUTPUT(
+  result array<float64>)
+REMOTE WITH CONNECTION `my_project.us.my_connection`
+OPTIONS(
+  option_key1="option_value1",
+  option_key2=2)"""
+    )
+
+
 def test_create_imported_model_produces_correct_sql(
     model_creation_sql_generator: ml_sql.ModelCreationSqlGenerator,
 ):

From 6c899be2989e24f697d72fe1bb92ebbf7dec84cb Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Wed, 29 Nov 2023 13:12:15 -0800
Subject: [PATCH 25/26] chore: release 0.15.0 (#241)

Release-As: 0.15.0

From 8089b15feddaeb9c56a8f976b439315fcfed0301 Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Wed, 29 Nov 2023 14:46:09 -0800
Subject: [PATCH 26/26] chore(main): release 0.15.0 (#214)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 CHANGELOG.md         | 45 ++++++++++++++++++++++++++++++++++++++++++++
 bigframes/version.py |  2 +-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 091967513a..ef75a017e0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,51 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [0.15.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.14.1...v0.15.0) (2023-11-29)
+
+
+### ⚠ BREAKING CHANGES
+
+* model.predict returns all the columns ([#204](https://github.com/googleapis/python-bigquery-dataframes/issues/204))
+
+### Features
+
+* Add info and memory_usage methods to dataframe ([#219](https://github.com/googleapis/python-bigquery-dataframes/issues/219)) ([9d6613d](https://github.com/googleapis/python-bigquery-dataframes/commit/9d6613d318b558722b7bab12773efdea4bbe9931))
+* Add remote vertex model support ([#237](https://github.com/googleapis/python-bigquery-dataframes/issues/237)) ([0bfc4fb](https://github.com/googleapis/python-bigquery-dataframes/commit/0bfc4fb117686c734d4a2503d5a6de0e64e9f9b9))
+* Add the recent api method for ML component ([#225](https://github.com/googleapis/python-bigquery-dataframes/issues/225)) ([ed8876d](https://github.com/googleapis/python-bigquery-dataframes/commit/ed8876d3439a3b45b65e8789737c3c2e3a7f1adb))
+* Model.predict returns all the columns ([#204](https://github.com/googleapis/python-bigquery-dataframes/issues/204)) ([416171a](https://github.com/googleapis/python-bigquery-dataframes/commit/416171a70d91d4a6b71622ba72685147ab7d6186))
+* Send warnings on LLM prediction partial failures ([#216](https://github.com/googleapis/python-bigquery-dataframes/issues/216)) ([81125f9](https://github.com/googleapis/python-bigquery-dataframes/commit/81125f9505ad98e89939769a8e1fcf30518705f0))
+
+
+### Bug Fixes
+
+* Add df snapshots lookup for `read_gbq` ([#229](https://github.com/googleapis/python-bigquery-dataframes/issues/229)) ([d0d9b84](https://github.com/googleapis/python-bigquery-dataframes/commit/d0d9b84b101eb03c499d85e74dcfc900dedd4137))
+* Avoid unnecessary row_number() on sort key for io ([#211](https://github.com/googleapis/python-bigquery-dataframes/issues/211)) ([a18d40e](https://github.com/googleapis/python-bigquery-dataframes/commit/a18d40e808ee0822d21715cc3e8f794c418aeebc))
+* Dedup special character ([#209](https://github.com/googleapis/python-bigquery-dataframes/issues/209)) ([dd78acb](https://github.com/googleapis/python-bigquery-dataframes/commit/dd78acb174545ba292776a642afcec46f8ee4a2a))
+* Invalid JSON type of the notebook ([#215](https://github.com/googleapis/python-bigquery-dataframes/issues/215)) ([a729831](https://github.com/googleapis/python-bigquery-dataframes/commit/a7298317ea2604faa6ae31817f1f729d7e0b9818))
+* Make to_pandas override enable_downsampling when sampling_method is manually set. ([#200](https://github.com/googleapis/python-bigquery-dataframes/issues/200)) ([ae03756](https://github.com/googleapis/python-bigquery-dataframes/commit/ae03756f5ee45e0e74e0c0bdd4777e018eba2273))
+* Polish the llm+kmeans notebook ([#208](https://github.com/googleapis/python-bigquery-dataframes/issues/208)) ([e8532b1](https://github.com/googleapis/python-bigquery-dataframes/commit/e8532b1d999d26ea1ebdd30efb8f2c0a93a6a28d))
+* Update the llm+kmeans notebook with recent change ([#236](https://github.com/googleapis/python-bigquery-dataframes/issues/236)) ([f8917ab](https://github.com/googleapis/python-bigquery-dataframes/commit/f8917abc094e222e0435891d4d184b77bfe67722))
+* Use anonymous dataset to create `remote_function` ([#205](https://github.com/googleapis/python-bigquery-dataframes/issues/205)) ([69b016e](https://github.com/googleapis/python-bigquery-dataframes/commit/69b016eae7ea97d84ceeb22ba09f5472841db072))
+
+
+### Documentation
+
+* Add code samples for `index` and `column` properties ([#212](https://github.com/googleapis/python-bigquery-dataframes/issues/212)) ([c88d38e](https://github.com/googleapis/python-bigquery-dataframes/commit/c88d38e69682f4c620174086b8f16f4780c04811))
+* Add code samples for df reshaping, function, merge, and join methods ([#203](https://github.com/googleapis/python-bigquery-dataframes/issues/203)) ([010486c](https://github.com/googleapis/python-bigquery-dataframes/commit/010486c3494e05d714da6cc7d51514518d9ae1ea))
+* Add examples for dataframe.kurt, dataframe.std, dataframe.count ([#232](https://github.com/googleapis/python-bigquery-dataframes/issues/232)) ([f9c6e72](https://github.com/googleapis/python-bigquery-dataframes/commit/f9c6e727e2b901310bb5301da449d616ea85e135))
+* Add examples for dataframe.mean, dataframe.median, dataframe.va… ([#228](https://github.com/googleapis/python-bigquery-dataframes/issues/228)) ([edd0522](https://github.com/googleapis/python-bigquery-dataframes/commit/edd0522747eadb74780124fb18ed7face251441d))
+* Add examples for dataframe.min, dataframe.max and dataframe.sum ([#227](https://github.com/googleapis/python-bigquery-dataframes/issues/227)) ([3a375e8](https://github.com/googleapis/python-bigquery-dataframes/commit/3a375e87b64b8fb51370bfec8f2cfdbcd8fe960a))
+* Code samples for `Series.dot` and `DataFrame.dot` ([#226](https://github.com/googleapis/python-bigquery-dataframes/issues/226)) ([b62a07a](https://github.com/googleapis/python-bigquery-dataframes/commit/b62a07a95cd60f995a48825c9874822d0eb02483))
+* Code samples for `Series.where` and `Series.mask` ([#217](https://github.com/googleapis/python-bigquery-dataframes/issues/217)) ([52dfad2](https://github.com/googleapis/python-bigquery-dataframes/commit/52dfad281def82548751a276ce42b087dbb09f9a))
+* Code samples for dataframe.any, dataframe.all and dataframe.prod ([#223](https://github.com/googleapis/python-bigquery-dataframes/issues/223)) ([d7957fa](https://github.com/googleapis/python-bigquery-dataframes/commit/d7957fad071d223ef8f6fb8f3de395c865ff60aa))
+* Make the code samples reflect default bq connection usage ([#206](https://github.com/googleapis/python-bigquery-dataframes/issues/206)) ([71844b0](https://github.com/googleapis/python-bigquery-dataframes/commit/71844b03cdbfe684320c186a0488c8c7fb4fcd6e))
+
+
+### Miscellaneous Chores
+
+* Release 0.15.0 ([#241](https://github.com/googleapis/python-bigquery-dataframes/issues/241)) ([6c899be](https://github.com/googleapis/python-bigquery-dataframes/commit/6c899be2989e24f697d72fe1bb92ebbf7dec84cb))
+
 ## [0.14.1](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.14.0...v0.14.1) (2023-11-16)
 
 
diff --git a/bigframes/version.py b/bigframes/version.py
index 46e57e5b88..920cb95c3d 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.14.1"
+__version__ = "0.15.0"

	consumer_complaint_narrative
0	I signed a contract as a condition of employme...
1	First, I want to disclose that XXXX and XXXX b...
2	Frequent calls from Focused Receivables Manage...
3	I recently contacted Enhanced Recovery Company...
4	This began when I subscribed to XXXX XXXX inte...
	text_embedding
422	[-0.012013785541057587, 0.003669967409223318, ...
616	[-0.014948881231248379, -0.04672442376613617, ...
833	[-0.01951478235423565, -0.027120858430862427, ...
1370	[-0.03140445053577423, -0.048797041177749634, ...
1430	[-0.02244548313319683, -0.03336532413959503, 0...
	consumer_complaint_narrative	text_embedding
2580664	Hello, my name is XXXX XXXX, and I am writing ...	[0.0003211698785889894, -0.01816680282354355, ...
1806973	This is XXXX XXXX and I am submitting this com...	[-0.009485247544944286, -0.025846892967820168,...
2055053	XXXX XXXX XXXX, XXXX. ( address : XXXX XXXX XX...	[-0.010950954630970955, -0.0249345600605011, 0...
2515231	When I reinvestigated my credit report, I real...	[-0.009660656563937664, -0.05793113633990288, ...
2633049	Checking my credit report XX/XX/2018 with all ...	[-0.0022159104701131582, -0.03330004960298538,...
3117273	I contacted TransUnion and spoke a credit rep ...	[-0.015955328941345215, -0.006488671060651541,...
698814	XXXX XXXX XXXX. makes daily calls to me cell c...	[0.005397460889071226, -0.01276913657784462, 0...
267826	Can we please reopen Case : XXXX? \n", + "\n", + "Wells Farg...	[0.004065403249114752, -0.0005381882656365633,...
54019	My rights under 15 USC 1681 have been violated...	[0.013823015615344048, -0.02010691538453102, 0...
141050	To whom it may concern : My personal informati...	[0.008104532025754452, -0.01856449618935585, 0...
2962076	I have had a CashApp account since last year, ...	[-0.0003019514260813594, -0.03750108182430267,...
2481105	that some of the information was erroneous. Th...	[-0.014868081547319889, -0.0443895161151886, -...
431562	I have disputed the referenced accounts to the...	[-0.0020524838473647833, -0.04830990731716156,...
1953029	On, XX/XX/22, I attempted to complete a transa...	[-0.01599179394543171, -0.0074900356121361256,...
2395979	Subject : XXXX XXXX XXXX compensation, refund,...	[-0.0035950862802565098, -0.014652969315648079...
455524	I paid off my mortgage on XX/XX/2019. The comp...	[-0.01100730150938034, -0.03495829552412033, 0...
2155924	This kind of account is placed as a charged of...	[-0.028635455295443535, -0.028604287654161453,...
1069497	This is one of many issues I have had with Wel...	[0.008871790021657944, -0.028502725064754486, ...
3181689	I have disputed this account with MONTEREY FIN...	[-0.004721717908978462, -0.03673810139298439, ...
274268	Lender is not updating my loan status in the V...	[-0.009221495129168034, -0.0289347805082798, 0...
1671305	XXXX is a peer to peer lending conmpany that u...	[-0.02911308966577053, -0.01850792020559311, -...
886026	( DISPUTE CODE - XXXX ) My personal informatio...	[-0.007220877334475517, -0.016615957021713257,...
1044431	I filed a complaint against PNC this year and ...	[0.002848619595170021, -0.035117778927087784, ...
1938481	I applied for a modification and was approved....	[-0.03114932030439377, -0.0421406552195549, 0....
1987834	Ive been Disputting my XXXX XXXX I opened this...	[-0.009406660683453083, -0.020967338234186172,...