From 52b7786c3a28da6c29e3ddf12629802215194ad9 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Mon, 5 Aug 2024 16:21:51 -0700
Subject: [PATCH 01/15] fix: Fix caching from generating row numbers in partial
 ordering mode (#872)

---
 bigframes/series.py                  |  2 +-
 bigframes/session/__init__.py        |  4 +++-
 tests/system/small/test_unordered.py | 15 ++++++++++++++-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index d41553d0d7..069c469a85 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -641,7 +641,7 @@ def head(self, n: int = 5) -> Series:
     def tail(self, n: int = 5) -> Series:
         return typing.cast(Series, self.iloc[-n:])
 
-    def peek(self, n: int = 5, *, force: bool = True) -> pandas.DataFrame:
+    def peek(self, n: int = 5, *, force: bool = True) -> pandas.Series:
         """
         Preview n arbitrary elements from the series without guarantees about row selection or ordering.
 
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index dc1da488a1..8ff5862bfc 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1997,8 +1997,10 @@ def _cache_with_session_awareness(self, array_value: core.ArrayValue) -> None:
         )
         if len(cluster_cols) > 0:
             self._cache_with_cluster_cols(core.ArrayValue(target), cluster_cols)
-        else:
+        elif self._strictly_ordered:
             self._cache_with_offsets(core.ArrayValue(target))
+        else:
+            self._cache_with_cluster_cols(core.ArrayValue(target), [])
 
     def _simplify_with_caching(self, array_value: core.ArrayValue):
         """Attempts to handle the complexity by caching duplicated subtrees and breaking the query into pieces."""
diff --git a/tests/system/small/test_unordered.py b/tests/system/small/test_unordered.py
index 9f85ec99f9..5e124d73cd 100644
--- a/tests/system/small/test_unordered.py
+++ b/tests/system/small/test_unordered.py
@@ -19,7 +19,11 @@
 
 import bigframes.exceptions
 import bigframes.pandas as bpd
-from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
+from tests.system.utils import (
+    assert_pandas_df_equal,
+    assert_series_equal,
+    skip_legacy_pandas,
+)
 
 
 def test_unordered_mode_sql_no_hash(unordered_session):
@@ -51,6 +55,15 @@ def test_unordered_mode_cache_aggregate(unordered_session):
     assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
+def test_unordered_mode_series_peek(unordered_session):
+    pd_series = pd.Series([1, 2, 3, 4, 5, 6], dtype=pd.Int64Dtype())
+    bf_series = bpd.Series(pd_series, session=unordered_session)
+    pd_result = pd_series.groupby(pd_series % 4).sum()
+    bf_peek = bf_series.groupby(bf_series % 4).sum().peek(2)
+
+    assert_series_equal(bf_peek, pd_result.reindex(bf_peek.index))
+
+
 def test_unordered_mode_single_aggregate(unordered_session):
     pd_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=pd.Int64Dtype())
     bf_df = bpd.DataFrame(pd_df, session=unordered_session)

From 6e6f9df55d435afe0b3ade728ca06826e92a6ee6 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Tue, 6 Aug 2024 12:22:53 -0700
Subject: [PATCH 02/15] docs: update streaming notebook (#887)

---
 notebooks/streaming/streaming_dataframe.ipynb | 203 +++++++++++-------
 1 file changed, 127 insertions(+), 76 deletions(-)

diff --git a/notebooks/streaming/streaming_dataframe.ipynb b/notebooks/streaming/streaming_dataframe.ipynb
index d4cc255fa5..9b52c2d71e 100644
--- a/notebooks/streaming/streaming_dataframe.ipynb
+++ b/notebooks/streaming/streaming_dataframe.ipynb
@@ -17,10 +17,22 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'1.13.0'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "import bigframes\n",
-    "import bigframes.streaming as bst"
+    "# make sure bigframes version >= 1.12.0\n",
+    "bigframes.__version__"
    ]
   },
   {
@@ -29,10 +41,46 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "bigframes.options._bigquery_options.project = \"bigframes-load-testing\"\n",
+    "import bigframes.pandas as bpd\n",
+    "import bigframes.streaming as bst\n",
+    "bigframes.options._bigquery_options.project = \"bigframes-load-testing\" # Change to your own project ID\n",
     "job_id_prefix = \"test_streaming_\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 65df3a2f-cda8-405d-8b38-20a755f9b9a0 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:65df3a2f-cda8-405d-8b38-20a755f9b9a0&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'birds.penguins_bigtable_streaming'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Copy a table from the public dataset for streaming jobs. Any changes to the table can be reflected in the streaming destination.\n",
+    "df = bpd.read_gbq(\"bigquery-public-data.ml_datasets.penguins\")\n",
+    "df.to_gbq(\"birds.penguins_bigtable_streaming\", if_exists=\"replace\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -43,13 +91,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/session/__init__.py:773: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
+      "  warnings.warn(\n",
       "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/blocks.py:126: NullIndexPreviewWarning: Creating object with Null Index. Null Index is a preview feature.\n",
       "  warnings.warn(\n"
      ]
@@ -61,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -82,7 +132,7 @@
     {
      "data": {
       "text/html": [
-       "Query job d57200dd-e6f1-42c7-876b-7f4a54994ae6 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:d57200dd-e6f1-42c7-876b-7f4a54994ae6&page=queryresults\">Open Job</a>"
+       "Query job dd20bd9d-4844-43e4-86ab-95759d7e673a is DONE. 2.7 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:dd20bd9d-4844-43e4-86ab-95759d7e673a&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -104,7 +154,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 1decce4a-eb32-49f4-8e47-7bda0220037a is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:1decce4a-eb32-49f4-8e47-7bda0220037a&page=queryresults\">Open Job</a>"
+       "Query job 873e44ee-76e9-4254-83d3-04cf36fbd140 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:873e44ee-76e9-4254-83d3-04cf36fbd140&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -144,151 +194,151 @@
        "      <th>0</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Torgersen</td>\n",
-       "      <td>3875</td>\n",
+       "      <td>3875.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Torgersen</td>\n",
-       "      <td>2900</td>\n",
+       "      <td>2900.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>3725</td>\n",
+       "      <td>3725.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>2975</td>\n",
+       "      <td>2975.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Torgersen</td>\n",
-       "      <td>3050</td>\n",
+       "      <td>3050.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>2700</td>\n",
+       "      <td>2700.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3900</td>\n",
+       "      <td>3900.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>3825</td>\n",
+       "      <td>3825.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3775</td>\n",
+       "      <td>3775.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3350</td>\n",
+       "      <td>3350.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>3900</td>\n",
+       "      <td>3900.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Torgersen</td>\n",
-       "      <td>3650</td>\n",
+       "      <td>3650.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>12</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>3200</td>\n",
+       "      <td>3200.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>13</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3650</td>\n",
+       "      <td>3650.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>14</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3700</td>\n",
+       "      <td>3700.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>15</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3800</td>\n",
+       "      <td>3800.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>16</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3950</td>\n",
+       "      <td>3950.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>17</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3350</td>\n",
+       "      <td>3350.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>18</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3100</td>\n",
+       "      <td>3100.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>19</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3750</td>\n",
+       "      <td>3750.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>20</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Biscoe</td>\n",
-       "      <td>3550</td>\n",
+       "      <td>3550.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>21</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3400</td>\n",
+       "      <td>3400.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>22</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Torgersen</td>\n",
-       "      <td>3450</td>\n",
+       "      <td>3450.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>23</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Torgersen</td>\n",
-       "      <td>3600</td>\n",
+       "      <td>3600.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>24</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>3650</td>\n",
+       "      <td>3650.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -297,37 +347,37 @@
       ],
       "text/plain": [
        "                                  species    rowkey  body_mass_g\n",
-       "      Adelie Penguin (Pygoscelis adeliae) Torgersen         3875\n",
-       "      Adelie Penguin (Pygoscelis adeliae) Torgersen         2900\n",
-       "      Adelie Penguin (Pygoscelis adeliae)    Biscoe         3725\n",
-       "      Adelie Penguin (Pygoscelis adeliae)     Dream         2975\n",
-       "      Adelie Penguin (Pygoscelis adeliae) Torgersen         3050\n",
-       "Chinstrap penguin (Pygoscelis antarctica)     Dream         2700\n",
-       "      Adelie Penguin (Pygoscelis adeliae)     Dream         3900\n",
-       "      Adelie Penguin (Pygoscelis adeliae)    Biscoe         3825\n",
-       "Chinstrap penguin (Pygoscelis antarctica)     Dream         3775\n",
-       "      Adelie Penguin (Pygoscelis adeliae)     Dream         3350\n",
-       "      Adelie Penguin (Pygoscelis adeliae)    Biscoe         3900\n",
-       "      Adelie Penguin (Pygoscelis adeliae) Torgersen         3650\n",
-       "      Adelie Penguin (Pygoscelis adeliae)    Biscoe         3200\n",
-       "Chinstrap penguin (Pygoscelis antarctica)     Dream         3650\n",
-       "      Adelie Penguin (Pygoscelis adeliae)     Dream         3700\n",
-       "Chinstrap penguin (Pygoscelis antarctica)     Dream         3800\n",
-       "Chinstrap penguin (Pygoscelis antarctica)     Dream         3950\n",
-       "Chinstrap penguin (Pygoscelis antarctica)     Dream         3350\n",
-       "      Adelie Penguin (Pygoscelis adeliae)     Dream         3100\n",
-       "Chinstrap penguin (Pygoscelis antarctica)     Dream         3750\n",
-       "      Adelie Penguin (Pygoscelis adeliae)    Biscoe         3550\n",
-       "Chinstrap penguin (Pygoscelis antarctica)     Dream         3400\n",
-       "      Adelie Penguin (Pygoscelis adeliae) Torgersen         3450\n",
-       "      Adelie Penguin (Pygoscelis adeliae) Torgersen         3600\n",
-       "Chinstrap penguin (Pygoscelis antarctica)     Dream         3650\n",
+       "      Adelie Penguin (Pygoscelis adeliae) Torgersen       3875.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae) Torgersen       2900.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae)    Biscoe       3725.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae)     Dream       2975.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae) Torgersen       3050.0\n",
+       "Chinstrap penguin (Pygoscelis antarctica)     Dream       2700.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae)     Dream       3900.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae)    Biscoe       3825.0\n",
+       "Chinstrap penguin (Pygoscelis antarctica)     Dream       3775.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae)     Dream       3350.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae)    Biscoe       3900.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae) Torgersen       3650.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae)    Biscoe       3200.0\n",
+       "Chinstrap penguin (Pygoscelis antarctica)     Dream       3650.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae)     Dream       3700.0\n",
+       "Chinstrap penguin (Pygoscelis antarctica)     Dream       3800.0\n",
+       "Chinstrap penguin (Pygoscelis antarctica)     Dream       3950.0\n",
+       "Chinstrap penguin (Pygoscelis antarctica)     Dream       3350.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae)     Dream       3100.0\n",
+       "Chinstrap penguin (Pygoscelis antarctica)     Dream       3750.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae)    Biscoe       3550.0\n",
+       "Chinstrap penguin (Pygoscelis antarctica)     Dream       3400.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae) Torgersen       3450.0\n",
+       "      Adelie Penguin (Pygoscelis adeliae) Torgersen       3600.0\n",
+       "Chinstrap penguin (Pygoscelis antarctica)     Dream       3650.0\n",
        "...\n",
        "\n",
        "[165 rows x 3 columns]"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -351,22 +401,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/streaming/dataframe.py:338: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
+      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/streaming/dataframe.py:341: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
       "  warnings.warn(\n"
      ]
     }
    ],
    "source": [
-    "job = sdf.to_bigtable(instance=\"streaming-testing-instance\",\n",
-    "    table=\"garrettwu-no-col-family\",\n",
-    "    service_account_email=\"streaming-testing-admin@bigframes-load-testing.iam.gserviceaccount.com\",\n",
+    "job = sdf.to_bigtable(instance=\"streaming-testing-instance\", # Change to your own Bigtable instance name\n",
+    "    table=\"garrettwu-no-col-family\", # Change to your own Bigtable table name\n",
+    "    service_account_email=\"streaming-testing-admin@bigframes-load-testing.iam.gserviceaccount.com\", # Change to your own service account\n",
     "    app_profile=None,\n",
     "    truncate=True,\n",
     "    overwrite=True,\n",
@@ -378,7 +428,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -397,7 +447,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -406,7 +456,7 @@
        "True"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -419,13 +469,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### PubSub\n",
-    "Create Pubsub streaming job"
+    "### Pub/Sub\n",
+    "Create Pub/Sub streaming job"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -438,27 +488,28 @@
     }
    ],
    "source": [
+    "# Pub/Sub requires a single column\n",
     "sdf = sdf[[\"rowkey\"]]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/streaming/dataframe.py:453: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
+      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/streaming/dataframe.py:456: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
       "  warnings.warn(\n"
      ]
     }
    ],
    "source": [
     "job = sdf.to_pubsub(\n",
-    "        topic=\"penguins\",\n",
-    "        service_account_email=\"streaming-testing@bigframes-load-testing.iam.gserviceaccount.com\",\n",
+    "        topic=\"penguins\", # Change to your own Pub/Sub topic ID\n",
+    "        service_account_email=\"streaming-testing@bigframes-load-testing.iam.gserviceaccount.com\", # Change to your own service account\n",
     "        job_id=None,\n",
     "        job_id_prefix=job_id_prefix,\n",
     "    )"
@@ -466,7 +517,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -485,7 +536,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -494,7 +545,7 @@
        "True"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }

From eb6080460344aff2fabb7864536ea4fe24c5fbef Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Tue, 6 Aug 2024 15:02:19 -0700
Subject: [PATCH 03/15] perf: Generate SQL with fewer CTEs (#877)

---
 bigframes/core/compile/compiled.py | 135 +++++++++++++++++------------
 bigframes/core/compile/compiler.py |   1 -
 bigframes/core/window_spec.py      |  13 ++-
 tests/unit/session/test_session.py |   3 +-
 4 files changed, 93 insertions(+), 59 deletions(-)

diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 538789f9d7..cae527931c 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -24,6 +24,7 @@
 import ibis.backends.bigquery as ibis_bigquery
 import ibis.common.deferred  # type: ignore
 import ibis.expr.datatypes as ibis_dtypes
+import ibis.expr.operations as ibis_ops
 import ibis.expr.types as ibis_types
 import pandas
 
@@ -36,7 +37,6 @@
 from bigframes.core.ordering import (
     ascending_over,
     encode_order_string,
-    IntegerEncoding,
     join_orderings,
     OrderingExpression,
     RowOrdering,
@@ -71,19 +71,16 @@ def __init__(
         # Allow creating a DataFrame directly from an Ibis table expression.
         # TODO(swast): Validate that each column references the same table (or
         # no table for literal values).
-        self._columns = tuple(columns)
+        self._columns = tuple(
+            column.resolve(table)
+            # TODO(https://github.com/ibis-project/ibis/issues/7613): use
+            # public API to refer to Deferred type.
+            if isinstance(column, ibis.common.deferred.Deferred) else column
+            for column in columns
+        )
         # To allow for more efficient lookup by column name, create a
         # dictionary mapping names to column values.
-        self._column_names = {
-            (
-                column.resolve(table)
-                # TODO(https://github.com/ibis-project/ibis/issues/7613): use
-                # public API to refer to Deferred type.
-                if isinstance(column, ibis.common.deferred.Deferred)
-                else column
-            ).get_name(): column
-            for column in self._columns
-        }
+        self._column_names = {column.get_name(): column for column in self._columns}
 
     @property
     def columns(self) -> typing.Tuple[ibis_types.Value, ...]:
@@ -139,10 +136,6 @@ def projection(
             for expression, id in expression_id_pairs
         ]
         result = self._select(tuple(values))  # type: ignore
-
-        # Need to reproject to convert ibis Scalar to ibis Column object
-        if any(exp_id[0].is_const for exp_id in expression_id_pairs):
-            result = result._reproject_to_table()
         return result
 
     @abc.abstractmethod
@@ -300,8 +293,6 @@ def _to_ibis_expr(
         ArrayValue objects are sorted, so the following options are available
         to reflect this in the ibis expression.
 
-        * "offset_col": Zero-based offsets are generated as a column, this will
-          not sort the rows however.
         * "string_encoded": An ordered string column is provided in output table.
         * "unordered": No ordering information will be provided in output. Only
           value columns are projected.
@@ -355,6 +346,10 @@ def _to_ibis_expr(
         return table
 
     def filter(self, predicate: ex.Expression) -> UnorderedIR:
+        if any(map(is_window, map(self._get_ibis_column, predicate.unbound_variables))):
+            # ibis doesn't support qualify syntax, so create CTE if filtering over window expression
+            # https://github.com/ibis-project/ibis/issues/9775
+            return self._reproject_to_table().filter(predicate)
         bindings = {col: self._get_ibis_column(col) for col in self.column_ids}
         condition = op_compiler.compile_expression(predicate, bindings)
         return self._filter(condition)
@@ -785,15 +780,33 @@ def promote_offsets(self, col_id: str) -> OrderedIR:
         """
         # Special case: offsets already exist
         ordering = self._ordering
+        # Case 1, already have offsets, just create column from them
+        if ordering.is_sequential and (ordering.total_order_col is not None):
+            expr_builder = self.builder()
+            expr_builder.columns = [
+                self._compile_expression(
+                    ordering.total_order_col.scalar_expression
+                ).name(col_id),
+                *self.columns,
+            ]
+            return expr_builder.build()
+        # Cannot nest analytic expressions, so reproject to cte first if needed.
+        # Also ibis cannot window literals, so need to reproject those (even though this is legal in googlesql)
+        # Seee: https://github.com/ibis-project/ibis/issues/9773
+        can_directly_window = not any(
+            map(lambda x: is_literal(x) or is_window(x), self._ibis_order)
+        )
+        if not can_directly_window:
+            return self._reproject_to_table().promote_offsets(col_id)
 
-        if (not ordering.is_sequential) or (not ordering.total_order_col):
-            return self._project_offsets().promote_offsets(col_id)
+        window = ibis.window(order_by=self._ibis_order)
+        if self._predicates:
+            window = window.group_by(self._reduced_predicate)
+        offsets = ibis.row_number().over(window)
         expr_builder = self.builder()
         expr_builder.columns = [
-            self._compile_expression(ordering.total_order_col.scalar_expression).name(
-                col_id
-            ),
             *self.columns,
+            offsets.name(col_id),
         ]
         return expr_builder.build()
 
@@ -806,7 +819,6 @@ def project_window_op(
         output_name=None,
         *,
         never_skip_nulls=False,
-        skip_reproject_unsafe: bool = False,
     ) -> OrderedIR:
         """
         Creates a new expression based on this expression with unary operation applied to one column.
@@ -815,8 +827,25 @@ def project_window_op(
         window_spec: a specification of the window over which to apply the operator
         output_name: the id to assign to the output of the operator, by default will replace input col if distinct output id not provided
         never_skip_nulls: will disable null skipping for operators that would otherwise do so
-        skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
         """
+        # Cannot nest analytic expressions, so reproject to cte first if needed.
+        # Also ibis cannot window literals, so need to reproject those (even though this is legal in googlesql)
+        # See: https://github.com/ibis-project/ibis/issues/9773
+        used_exprs = map(
+            self._get_any_column, [column_name, *window_spec.all_referenced_columns]
+        )
+        can_directly_window = not any(
+            map(lambda x: is_literal(x) or is_window(x), used_exprs)
+        )
+        if not can_directly_window:
+            return self._reproject_to_table().project_window_op(
+                column_name,
+                op,
+                window_spec,
+                output_name,
+                never_skip_nulls=never_skip_nulls,
+            )
+
         column = typing.cast(ibis_types.Column, self._get_ibis_column(column_name))
         window = self._ibis_window_from_spec(
             window_spec, require_total_order=op.uses_total_row_ordering
@@ -861,8 +890,7 @@ def project_window_op(
             window_op = case_statement
 
         result = self._set_or_replace_by_id(output_name or column_name, window_op)
-        # TODO(tbergeron): Automatically track analytic expression usage and defer reprojection until required for valid query generation.
-        return result._reproject_to_table() if not skip_reproject_unsafe else result
+        return result
 
     def _reproject_to_table(self) -> OrderedIR:
         table = self._to_ibis_expr(
@@ -944,7 +972,7 @@ def _to_ibis_expr(
         expose_hidden_cols: bool = False,
         fraction: Optional[float] = None,
         col_id_overrides: typing.Mapping[str, str] = {},
-        ordering_mode: Literal["string_encoded", "offset_col", "unordered"],
+        ordering_mode: Literal["string_encoded", "unordered"],
         order_col_name: Optional[str] = ORDER_ID_COLUMN,
     ):
         """
@@ -953,8 +981,7 @@ def _to_ibis_expr(
         ArrayValue objects are sorted, so the following options are available
         to reflect this in the ibis expression.
 
-        * "offset_col": Zero-based offsets are generated as a column, this will
-          not sort the rows however.
+
         * "string_encoded": An ordered string column is provided in output table.
         * "unordered": No ordering information will be provided in output. Only
           value columns are projected.
@@ -981,10 +1008,9 @@ def _to_ibis_expr(
         """
         assert ordering_mode in (
             "string_encoded",
-            "offset_col",
             "unordered",
         )
-        if expose_hidden_cols and ordering_mode in ("ordered_col", "offset_col"):
+        if expose_hidden_cols and ordering_mode in ("ordered_col"):
             raise ValueError(
                 f"Cannot expose hidden ordering columns with ordering_mode {ordering_mode}"
             )
@@ -1034,6 +1060,10 @@ def _to_ibis_expr(
         return table
 
     def filter(self, predicate: ex.Expression) -> OrderedIR:
+        if any(map(is_window, map(self._get_ibis_column, predicate.unbound_variables))):
+            # ibis doesn't support qualify syntax, so create CTE if filtering over window expression
+            # https://github.com/ibis-project/ibis/issues/9775
+            return self._reproject_to_table().filter(predicate)
         bindings = {col: self._get_ibis_column(col) for col in self.column_ids}
         condition = op_compiler.compile_expression(predicate, bindings)
         return self._filter(condition)
@@ -1174,27 +1204,6 @@ def _bake_ordering(self) -> OrderedIR:
             predicates=self._predicates,
         )
 
-    def _project_offsets(self) -> OrderedIR:
-        """Create a new expression that contains offsets. Should only be executed when
-        offsets are needed for an operations. Has no effect on expression semantics."""
-        if self._ordering.is_sequential:
-            return self
-        table = self._to_ibis_expr(
-            ordering_mode="offset_col", order_col_name=ORDER_ID_COLUMN
-        )
-        columns = [table[column_name] for column_name in self._column_names]
-        ordering = TotalOrdering(
-            ordering_value_columns=tuple([ascending_over(ORDER_ID_COLUMN)]),
-            total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
-            integer_encoding=IntegerEncoding(True, is_sequential=True),
-        )
-        return OrderedIR(
-            table,
-            columns=columns,
-            hidden_ordering_columns=[table[ORDER_ID_COLUMN]],
-            ordering=ordering,
-        )
-
     def _create_order_columns(
         self,
         ordering_mode: str,
@@ -1202,9 +1211,7 @@ def _create_order_columns(
         expose_hidden_cols: bool,
     ) -> typing.Sequence[ibis_types.Value]:
         # Generate offsets if current ordering id semantics are not sufficiently strict
-        if ordering_mode == "offset_col":
-            return (self._create_offset_column().name(order_col_name),)
-        elif ordering_mode == "string_encoded":
+        if ordering_mode == "string_encoded":
             return (self._create_string_ordering_column().name(order_col_name),)
         elif expose_hidden_cols:
             return self._hidden_ordering_columns
@@ -1328,6 +1335,22 @@ def build(self) -> OrderedIR:
             )
 
 
+def is_literal(column: ibis_types.Value) -> bool:
+    # Unfortunately, Literals in ibis are not "Columns"s and therefore can't be aggregated.
+    return not isinstance(column, ibis_types.Column)
+
+
+def is_window(column: ibis_types.Value) -> bool:
+    matches = (
+        (column)
+        .op()
+        .find_topmost(
+            lambda x: isinstance(x, (ibis_ops.WindowFunction, ibis_ops.Relation))
+        )
+    )
+    return any(isinstance(op, ibis_ops.WindowFunction) for op in matches)
+
+
 def _reduce_predicate_list(
     predicate_list: typing.Collection[ibis_types.BooleanValue],
 ) -> ibis_types.BooleanValue:
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index c7f8c5ab59..8fb1f7ab3a 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -304,7 +304,6 @@ def compile_window(self, node: nodes.WindowOpNode, ordered: bool = True):
             node.window_spec,
             node.output_name,
             never_skip_nulls=node.never_skip_nulls,
-            skip_reproject_unsafe=node.skip_reproject_unsafe,
         )
         return result if ordered else result.to_unordered()
 
diff --git a/bigframes/core/window_spec.py b/bigframes/core/window_spec.py
index 57c57b451a..f011e2848d 100644
--- a/bigframes/core/window_spec.py
+++ b/bigframes/core/window_spec.py
@@ -14,7 +14,8 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Optional, Tuple, Union
+import itertools
+from typing import Optional, Set, Tuple, Union
 
 import bigframes.core.ordering as orderings
 
@@ -162,3 +163,13 @@ def row_bounded(self):
         to calculate deterministically.
         """
         return isinstance(self.bounds, RowsWindowBounds)
+
+    @property
+    def all_referenced_columns(self) -> Set[str]:
+        """
+        Return list of all variables reference ind the window.
+        """
+        ordering_vars = itertools.chain.from_iterable(
+            item.scalar_expression.unbound_variables for item in self.ordering
+        )
+        return set(itertools.chain(self.grouping_keys, ordering_vars))
diff --git a/tests/unit/session/test_session.py b/tests/unit/session/test_session.py
index 31029abd67..2f7eaa567a 100644
--- a/tests/unit/session/test_session.py
+++ b/tests/unit/session/test_session.py
@@ -246,7 +246,8 @@ def test_default_index_warning_not_raised_by_read_gbq_index_col_sequential_int64
             index_col=bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
         )
 
-    # We expect a window operation because we specificaly requested a sequential index.
+    # We expect a window operation because we specificaly requested a sequential index and named it.
+    df.index.name = "named_index"
     generated_sql = df.sql.casefold()
     assert "OVER".casefold() in generated_sql
     assert "ROW_NUMBER()".casefold() in generated_sql

From 171da6cb33165b49d46ea6528038342abd89e9fa Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Tue, 6 Aug 2024 15:35:05 -0700
Subject: [PATCH 04/15] docs: add streaming html docs (#884)

* docs: add streaming html docs

* add beta icon

* add beta icon in toc.yml
---
 bigframes/session/__init__.py                    |  4 +++-
 bigframes/streaming/dataframe.py                 | 11 ++++++++++-
 docs/reference/bigframes.streaming/dataframe.rst |  6 ++++++
 docs/reference/bigframes.streaming/index.rst     | 13 +++++++++++++
 docs/reference/index.rst                         |  1 +
 docs/templates/toc.yml                           |  7 +++++++
 6 files changed, 40 insertions(+), 2 deletions(-)
 create mode 100644 docs/reference/bigframes.streaming/dataframe.rst
 create mode 100644 docs/reference/bigframes.streaming/index.rst

diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 8ff5862bfc..2da788292b 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -760,7 +760,9 @@ def read_gbq_table_streaming(
     ) -> streaming_dataframe.StreamingDataFrame:
         """Turn a BigQuery table into a StreamingDataFrame.
 
-        Note: The bigframes.streaming module is a preview feature, and subject to change.
+        .. note::
+
+            The bigframes.streaming module is a preview feature, and subject to change.
 
         **Examples:**
 
diff --git a/bigframes/streaming/dataframe.py b/bigframes/streaming/dataframe.py
index 64a4898c57..b83ae5d822 100644
--- a/bigframes/streaming/dataframe.py
+++ b/bigframes/streaming/dataframe.py
@@ -191,7 +191,16 @@ def to_pubsub(
 
 @log_adapter.class_logger
 class StreamingDataFrame(StreamingBase):
-    __doc__ = _curate_df_doc(dataframe.DataFrame.__doc__)
+    __doc__ = (
+        _curate_df_doc(dataframe.DataFrame.__doc__)
+        + """
+    .. note::
+
+        The bigframes.streaming module is a preview feature, and subject to change.
+
+    Currently only supports basic projection, filtering and preview operations.
+    """
+    )
 
     # Private constructor
     _create_key = object()
diff --git a/docs/reference/bigframes.streaming/dataframe.rst b/docs/reference/bigframes.streaming/dataframe.rst
new file mode 100644
index 0000000000..79ec64961c
--- /dev/null
+++ b/docs/reference/bigframes.streaming/dataframe.rst
@@ -0,0 +1,6 @@
+bigframes.streaming.dataframe
+=============================
+
+.. autoclass:: bigframes.streaming.dataframe.StreamingDataFrame
+    :members:
+    :inherited-members:
diff --git a/docs/reference/bigframes.streaming/index.rst b/docs/reference/bigframes.streaming/index.rst
new file mode 100644
index 0000000000..20a22072e5
--- /dev/null
+++ b/docs/reference/bigframes.streaming/index.rst
@@ -0,0 +1,13 @@
+
+============================
+BigQuery DataFrame Streaming
+============================
+
+.. automodule:: bigframes.streaming
+    :members:
+    :undoc-members:
+
+.. toctree::
+    :maxdepth: 2
+
+    dataframe
diff --git a/docs/reference/index.rst b/docs/reference/index.rst
index 387e9b5ced..eb5a774b29 100644
--- a/docs/reference/index.rst
+++ b/docs/reference/index.rst
@@ -11,3 +11,4 @@ packages.
     bigframes.pandas/index
     bigframes.ml/index
     bigframes.bigquery/index
+    bigframes.streaming/index
diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml
index 95bded9a60..736ffba286 100644
--- a/docs/templates/toc.yml
+++ b/docs/templates/toc.yml
@@ -203,4 +203,11 @@
     - name: BigQuery built-in functions
       uid: bigframes.bigquery
     name: bigframes.bigquery
+  - items:
+    - name: Overview
+      uid: bigframes.streaming
+    - name: StreamingDataFrame
+      uid: bigframes.streaming.dataframe.StreamingDataFrame
+    name: bigframes.streaming
+    status: beta
   name: BigQuery DataFrames

From 8c352ce6991b3635222a285d1eee9a56cd57d0c6 Mon Sep 17 00:00:00 2001
From: mattyopl <90574735+mattyopl@users.noreply.github.com>
Date: Tue, 6 Aug 2024 20:31:21 -0400
Subject: [PATCH 05/15] chore: clean up OWNERS (#886)

- remove inactive users
- add myself

Co-authored-by: Matthew Laurence Chen <mlaurencechen@google.com>
---
 OWNERS | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/OWNERS b/OWNERS
index f86ad551ef..562ee0f19b 100644
--- a/OWNERS
+++ b/OWNERS
@@ -1,12 +1,8 @@
-ashleyxu@google.com
-bmil@google.com
 chelsealin@google.com
 garrettwu@google.com
-henryjsolberg@google.com
-hormati@google.com
 huanc@google.com
 jiaxun@google.com
-kemppeterson@google.com
+mlaurencechen@google.com
 shobs@google.com
 swast@google.com
-tbergeron@google.com
+tbergeron@google.com
\ No newline at end of file

From 3dbf84bd1531c1f8d41ba57c2c38b3ba6abfb812 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <124939984+chelsea-lin@users.noreply.github.com>
Date: Tue, 6 Aug 2024 18:03:50 -0700
Subject: [PATCH 06/15] feat: bigframes.bigquery.json_extract (#868)

* feat: bigframes.bigquery.json_extract

* fixing tests
---
 bigframes/bigquery/__init__.py               | 35 ++++++++++++++++++++
 bigframes/core/compile/scalar_op_compiler.py | 12 +++++++
 bigframes/operations/__init__.py             | 16 +++++++++
 tests/system/small/bigquery/test_json.py     | 27 +++++++++++++++
 4 files changed, 90 insertions(+)

diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index ec26d14f33..7c409839b1 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -208,6 +208,41 @@ def json_set(
     return series
 
 
+def json_extract(
+    series: series.Series,
+    json_path: str,
+) -> series.Series:
+    """Extracts a JSON value and converts it to a SQL JSON-formatted `STRING` or `JSON`
+    value. This function uses single quotes and brackets to escape invalid JSONPath
+    characters in JSON keys.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> bpd.options.display.progress_bar = None
+
+        >>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}'])
+        >>> bbq.json_extract(s, json_path="$.class")
+        0    "{\\\"students\\\":[{\\\"id\\\":5},{\\\"id\\\":12}]}"
+        dtype: string
+
+    Args:
+        series (bigframes.series.Series):
+            The Series containing JSON data (as native JSON objects or JSON-formatted strings).
+        json_path (str):
+            The JSON path identifying the data that you want to obtain from the input.
+
+    Returns:
+        bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
+    """
+    return series._apply_unary_op(ops.JSONExtract(json_path=json_path))
+
+
+# Search functions defined from
+# https://cloud.google.com/bigquery/docs/reference/standard-sql/search_functions
+
+
 def vector_search(
     base_table: str,
     column_to_search: str,
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 67d0dac436..32749b32a6 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -922,6 +922,11 @@ def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
         ).to_expr()
 
 
+@scalar_op_compiler.register_unary_op(ops.JSONExtract, pass_op=True)
+def json_extract_op_impl(x: ibis_types.Value, op: ops.JSONExtract):
+    return json_extract(json_obj=x, json_path=op.json_path)
+
+
 ### Binary Ops
 def short_circuit_nulls(type_override: typing.Optional[ibis_dtypes.DataType] = None):
     """Wraps a binary operator to generate nulls of the expected type if either input is a null scalar."""
@@ -1549,6 +1554,13 @@ def json_set(
     """Produces a new SQL JSON value with the specified JSON data inserted or replaced."""
 
 
+@ibis.udf.scalar.builtin(name="json_extract")
+def json_extract(
+    json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.str
+) -> ibis_dtypes.JSON:
+    """Extracts a JSON value and converts it to a SQL JSON-formatted STRING or JSON value."""
+
+
 @ibis.udf.scalar.builtin(name="ML.DISTANCE")
 def vector_distance(vector1, vector2, type: str) -> ibis_dtypes.Float64:
     """Computes the distance between two vectors using specified type ("EUCLIDEAN", "MANHATTAN", or "COSINE")"""
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index 523882c14e..4d4e40643d 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -602,6 +602,22 @@ def output_type(self, *input_types):
         return dtypes.STRING_DTYPE
 
 
+## JSON Ops
+@dataclasses.dataclass(frozen=True)
+class JSONExtract(UnaryOp):
+    name: typing.ClassVar[str] = "json_extract"
+    json_path: str
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if not dtypes.is_json_like(input_type):
+            raise TypeError(
+                "Input type must be an valid JSON object or JSON-formatted string type."
+                + f" Received type: {input_type}"
+            )
+        return input_type
+
+
 # Binary Ops
 fillna_op = create_binary_op(name="fillna", type_signature=op_typing.COERCE)
 maximum_op = create_binary_op(name="maximum", type_signature=op_typing.COERCE)
diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py
index 9e0c06e0bd..059b8eea87 100644
--- a/tests/system/small/bigquery/test_json.py
+++ b/tests/system/small/bigquery/test_json.py
@@ -110,3 +110,30 @@ def test_json_set_w_invalid_value_type():
 def test_json_set_w_invalid_series_type():
     with pytest.raises(TypeError):
         bbq.json_set(bpd.Series([1, 2]), json_path_value_pairs=[("$.a", 1)])
+
+
+def test_json_extract_from_json():
+    s = _get_series_from_json([{"a": {"b": [1, 2]}}, {"a": {"c": 1}}, {"a": {"b": 0}}])
+    actual = bbq.json_extract(s, "$.a.b")
+    # After the introduction of the JSON type, the output should be a JSON-formatted series.
+    expected = _get_series_from_json(["[1,2]", None, "0"])
+    pd.testing.assert_series_equal(
+        actual.to_pandas(),
+        expected.to_pandas(),
+    )
+
+
+def test_json_extract_from_string():
+    s = bpd.Series(['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'])
+    actual = bbq.json_extract(s, "$.a.b")
+    expected = _get_series_from_json(["[1,2]", None, "0"])
+    pd.testing.assert_series_equal(
+        actual.to_pandas(),
+        expected.to_pandas(),
+        check_names=False,
+    )
+
+
+def test_json_extract_w_invalid_series_type():
+    with pytest.raises(TypeError):
+        bbq.json_extract(bpd.Series([1, 2]), "$.a")

From 0603e62fcdf513ca4207909af4dbd8d036af0b0c Mon Sep 17 00:00:00 2001
From: Chelsea Lin <124939984+chelsea-lin@users.noreply.github.com>
Date: Wed, 7 Aug 2024 11:38:58 -0700
Subject: [PATCH 07/15] chore: fix docs build on owlbot.py (#888)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* chore: fix docs build on owlbot.py

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 .kokoro/publish-docs.sh | 3 +++
 owlbot.py               | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh
index 233205d580..297b14ac90 100755
--- a/.kokoro/publish-docs.sh
+++ b/.kokoro/publish-docs.sh
@@ -58,5 +58,8 @@ python3.10 -m docuploader create-metadata \
 
 cat docs.metadata
 
+# Replace toc.yml template file
+mv docs/templates/toc.yml docs/_build/html/docfx_yaml/toc.yml
+
 # upload docs
 python3.10 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}"
diff --git a/owlbot.py b/owlbot.py
index ddc578c3a2..f9d9410d6d 100644
--- a/owlbot.py
+++ b/owlbot.py
@@ -112,14 +112,14 @@
         re.escape("# upload docs")
         + "\n"
         + re.escape(
-            'python3 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}"'
+            'python3.10 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}"'
         )
     ),
     (
         "# Replace toc.yml template file\n"
         + "mv docs/templates/toc.yml docs/_build/html/docfx_yaml/toc.yml\n\n"
         + "# upload docs\n"
-        + 'python3 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}"'
+        + 'python3.10 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}"'
     ),
 )
 

From 3eb6a17a5823faf5ecba92cb9a554df74477871d Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 8 Aug 2024 12:03:30 -0700
Subject: [PATCH 08/15] docs: fix the `DisplayOptions` doc rendering (#893)

---
 third_party/bigframes_vendored/pandas/core/config_init.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py
index 84ab90a322..4bca3f3c75 100644
--- a/third_party/bigframes_vendored/pandas/core/config_init.py
+++ b/third_party/bigframes_vendored/pandas/core/config_init.py
@@ -18,6 +18,7 @@
 **Examples:**
 
 Define Repr mode to "deferred" will prevent job execution in repr.
+
     >>> import bigframes.pandas as bpd
     >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
 
@@ -26,25 +27,32 @@
     Computation deferred. Computation will process 28.9 kB
 
 Users can also get a dry run of the job by accessing the query_job property before they've run the job. This will return a dry run instance of the job they can inspect.
+
     >>> df.query_job.total_bytes_processed
     28947
 
 User can execute the job by calling .to_pandas()
+
     >>> # df.to_pandas()
 
 Reset repr_mode option
+
     >>> bpd.options.display.repr_mode = "head"
 
 Can also set the progress_bar option to see the progress bar in terminal,
+
     >>> bpd.options.display.progress_bar = "terminal"
 
 notebook,
+
     >>> bpd.options.display.progress_bar = "notebook"
 
 or just remove it.
+
     >>> bpd.options.display.progress_bar = None
 
 Setting to default value "auto" will detect and show progress bar automatically.
+
     >>> bpd.options.display.progress_bar = "auto"
 
 Attributes:

From 0c011a8212a8c0824a12c560b6d63048362275e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Thu, 8 Aug 2024 14:28:26 -0500
Subject: [PATCH 09/15] test: enable tests on Windows (#857)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* test: enable tests on Windows

* use windows subdirectory for scripts

* maybe fix for windows

* fix working directory

* use nox so test dependencies are installed

* add 3.10 to system tests

* disable system tests

* add more goto errors

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 .kokoro/continuous/windows.cfg |  3 +++
 .kokoro/presubmit/windows.cfg  |  3 +++
 noxfile.py                     |  6 +++--
 scripts/windows/build.bat      | 38 ++++++++++++++++++++++++++++++++
 scripts/windows/test.bat       | 40 ++++++++++++++++++++++++++++++++++
 5 files changed, 88 insertions(+), 2 deletions(-)
 create mode 100644 .kokoro/continuous/windows.cfg
 create mode 100644 .kokoro/presubmit/windows.cfg
 create mode 100644 scripts/windows/build.bat
 create mode 100644 scripts/windows/test.bat

diff --git a/.kokoro/continuous/windows.cfg b/.kokoro/continuous/windows.cfg
new file mode 100644
index 0000000000..806986138d
--- /dev/null
+++ b/.kokoro/continuous/windows.cfg
@@ -0,0 +1,3 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "python-bigquery-dataframes/scripts/windows/build.bat"
diff --git a/.kokoro/presubmit/windows.cfg b/.kokoro/presubmit/windows.cfg
new file mode 100644
index 0000000000..806986138d
--- /dev/null
+++ b/.kokoro/presubmit/windows.cfg
@@ -0,0 +1,3 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "python-bigquery-dataframes/scripts/windows/build.bat"
diff --git a/noxfile.py b/noxfile.py
index d69c16e69c..c464b47270 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -62,7 +62,8 @@
 UNIT_TEST_EXTRAS: List[str] = []
 UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {}
 
-SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.12"]
+# 3.10 is needed for Windows tests.
+SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.12"]
 SYSTEM_TEST_STANDARD_DEPENDENCIES = [
     "jinja2",
     "mock",
@@ -100,7 +101,8 @@
     "docfx",
     "unit",
     "unit_noextras",
-    "system",
+    "system-3.9",
+    "system-3.12",
     "cover",
 ]
 
diff --git a/scripts/windows/build.bat b/scripts/windows/build.bat
new file mode 100644
index 0000000000..d599702c98
--- /dev/null
+++ b/scripts/windows/build.bat
@@ -0,0 +1,38 @@
+@rem Copyright 2024 Google LLC
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+
+:; Change directory to repo root.
+SET script_dir="%~dp0"
+cd "%~dp0"\..\..
+
+echo "Listing available Python versions'
+py -0 || goto :error
+
+py -3.10 -m pip install --upgrade pip || goto :error
+py -3.10 -m pip install --upgrade pip setuptools wheel || goto :error
+
+echo "Building Wheel"
+py -3.10 -m pip wheel . --wheel-dir wheels || goto :error/
+
+echo "Built wheel, now running tests."
+call "%script_dir%"/test.bat 3.10 || goto :error
+
+echo "Windows build has completed successfully"
+
+:; https://stackoverflow.com/a/46813196/101923
+:; exit 0
+exit /b 0
+
+:error
+exit /b %errorlevel%
diff --git a/scripts/windows/test.bat b/scripts/windows/test.bat
new file mode 100644
index 0000000000..bcd605bd12
--- /dev/null
+++ b/scripts/windows/test.bat
@@ -0,0 +1,40 @@
+@rem Copyright 2024 Google LLC
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+
+@rem This test file runs for one Python version at a time, and is intended to
+@rem be called from within the build loop.
+
+:; Change directory to repo root.
+SET script_dir="%~dp0"
+cd "%~dp0"\..\..
+
+set PYTHON_VERSION=%1
+if "%PYTHON_VERSION%"=="" (
+  echo "Python version was not provided, using Python 3.10"
+  set PYTHON_VERSION=3.10
+)
+
+py -%PYTHON_VERSION%-64 -m pip install nox || goto :error
+
+py -%PYTHON_VERSION%-64 -m nox -s unit-"%PYTHON_VERSION%" || goto :error
+
+:; TODO(b/358148440): enable system tests on windows
+:; py -%PYTHON_VERSION%-64 -m nox -s system-"%PYTHON_VERSION%" || goto :error
+
+:; https://stackoverflow.com/a/46813196/101923
+:; exit 0
+exit /b 0
+
+:error
+exit /b %errorlevel%

From 7117e33f2b6bc89ae8ce9b168d98bbcb21c08e52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Thu, 8 Aug 2024 16:31:23 -0500
Subject: [PATCH 10/15] chore: require Windows unit tests and re-enable owlbot
 check (#895)

---
 .github/sync-repo-settings.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml
index e098468da6..c2f3673fcc 100644
--- a/.github/sync-repo-settings.yaml
+++ b/.github/sync-repo-settings.yaml
@@ -7,8 +7,7 @@ branchProtectionRules:
   requiresCodeOwnerReviews: true
   requiresStrictStatusChecks: false
   requiredStatusCheckContexts:
-# TODO(b/347075426): Restore owlbot as required check
-#    - 'OwlBot Post Processor'
+    - 'OwlBot Post Processor'
     - 'conventionalcommits.org'
     - 'cla/google'
     - 'docs'
@@ -19,6 +18,7 @@ branchProtectionRules:
     - 'unit (3.12)'
     - 'cover'
     - 'Kokoro presubmit'
+    - 'Kokoro windows'
 permissionRules:
   - team: actools-python
     permission: admin

From e0b11bc8c038db7b950b1653ed4cd44a6246c713 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Fri, 9 Aug 2024 16:00:36 -0700
Subject: [PATCH 11/15] perf: Speed up compilation by reducing redundant type
 normalization (#896)

---
 bigframes/bigquery/__init__.py           |  2 +-
 bigframes/core/compile/compiled.py       | 23 +++--------------------
 bigframes/core/compile/ibis_types.py     |  1 +
 tests/system/small/bigquery/test_json.py | 20 +++++++++++---------
 4 files changed, 16 insertions(+), 30 deletions(-)

diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index 7c409839b1..fb9503dc72 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -224,7 +224,7 @@ def json_extract(
 
         >>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}'])
         >>> bbq.json_extract(s, json_path="$.class")
-        0    "{\\\"students\\\":[{\\\"id\\\":5},{\\\"id\\\":12}]}"
+        0    {"students":[{"id":5},{"id":12}]}
         dtype: string
 
     Args:
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index cae527931c..5492502f21 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -152,12 +152,7 @@ def _get_ibis_column(self, key: str) -> ibis_types.Value:
             raise ValueError(
                 "Column name {} not in set of values: {}".format(key, self.column_ids)
             )
-        return typing.cast(
-            ibis_types.Value,
-            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
-                self._column_names[key]
-            ),
-        )
+        return typing.cast(ibis_types.Value, self._column_names[key])
 
     def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
         ibis_type = typing.cast(
@@ -327,12 +322,7 @@ def _to_ibis_expr(
         if not columns:
             return ibis.memtable([])
 
-        # Make sure all dtypes are the "canonical" ones for BigFrames. This is
-        # important for operations like UNION where the schema must match.
-        table = self._table.select(
-            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(column)
-            for column in columns
-        )
+        table = self._table.select(columns)
         base_table = table
         if self._reduced_predicate is not None:
             table = table.filter(base_table[PREDICATE_COLUMN])
@@ -1039,14 +1029,7 @@ def _to_ibis_expr(
         # Make sure we don't have any unbound (deferred) columns.
         table = self._table.select(columns)
 
-        # Make sure all dtypes are the "canonical" ones for BigFrames. This is
-        # important for operations like UNION where the schema must match.
-        table = table.select(
-            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
-                table[column]
-            )
-            for column in table.columns
-        )
+        table = table.select(table[column] for column in table.columns)
         base_table = table
         if self._reduced_predicate is not None:
             table = table.filter(base_table[PREDICATE_COLUMN])
diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py
index f3221f605f..0b3038c9c7 100644
--- a/bigframes/core/compile/ibis_types.py
+++ b/bigframes/core/compile/ibis_types.py
@@ -208,6 +208,7 @@ def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
     name = value.get_name()
     if ibis_type.is_json():
         value = vendored_ibis_ops.ToJsonString(value).to_expr()
+        value = value.case().when("null", ibis.null()).else_(value).end()
         return value.name(name)
     # Allow REQUIRED fields to be joined with NULLABLE fields.
     nullable_type = ibis_type.copy(nullable=True)
diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py
index 059b8eea87..18ccadd9f5 100644
--- a/tests/system/small/bigquery/test_json.py
+++ b/tests/system/small/bigquery/test_json.py
@@ -23,11 +23,13 @@
 
 
 def _get_series_from_json(json_data):
+    # Note: converts None to sql "null" and not to json none.
+    values = [
+        f"JSON '{json.dumps(data)}'" if data is not None else "NULL"
+        for data in json_data
+    ]
     sql = " UNION ALL ".join(
-        [
-            f"SELECT {id} AS id, JSON '{json.dumps(data)}' AS data"
-            for id, data in enumerate(json_data)
-        ]
+        [f"SELECT {id} AS id, {value} AS data" for id, value in enumerate(values)]
     )
     df = bpd.read_gbq(sql).set_index("id").sort_index()
     return df["data"]
@@ -114,19 +116,19 @@ def test_json_set_w_invalid_series_type():
 
 def test_json_extract_from_json():
     s = _get_series_from_json([{"a": {"b": [1, 2]}}, {"a": {"c": 1}}, {"a": {"b": 0}}])
-    actual = bbq.json_extract(s, "$.a.b")
+    actual = bbq.json_extract(s, "$.a.b").to_pandas()
     # After the introduction of the JSON type, the output should be a JSON-formatted series.
-    expected = _get_series_from_json(["[1,2]", None, "0"])
+    expected = _get_series_from_json([[1, 2], None, 0]).to_pandas()
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual,
+        expected,
     )
 
 
 def test_json_extract_from_string():
     s = bpd.Series(['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'])
     actual = bbq.json_extract(s, "$.a.b")
-    expected = _get_series_from_json(["[1,2]", None, "0"])
+    expected = _get_series_from_json([[1, 2], None, 0])
     pd.testing.assert_series_equal(
         actual.to_pandas(),
         expected.to_pandas(),

From 991bb0a25e1e424de38abd065f9d79ab20c24ed2 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Mon, 12 Aug 2024 14:32:53 -0700
Subject: [PATCH 12/15] refactor: reorganize `remote_function` code for
 readability (#885)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: reorganize `remote_function` code for readability

* refactor out RemoteFunctionSession

* rename newly introduced modules to make them private symbolically

---------

Co-authored-by: Tim Sweña (Swast) <swast@google.com>
---
 .../functions/_remote_function_client.py      |  476 +++++++
 .../functions/_remote_function_session.py     |  546 ++++++++
 bigframes/functions/_utils.py                 |  214 +++
 bigframes/functions/remote_function.py        | 1174 +----------------
 bigframes/pandas/__init__.py                  |    4 +-
 bigframes/session/__init__.py                 |    3 +-
 tests/system/large/test_remote_function.py    |   10 +-
 tests/system/small/test_remote_function.py    |    7 +-
 tests/system/utils.py                         |    4 +-
 9 files changed, 1261 insertions(+), 1177 deletions(-)
 create mode 100644 bigframes/functions/_remote_function_client.py
 create mode 100644 bigframes/functions/_remote_function_session.py
 create mode 100644 bigframes/functions/_utils.py

diff --git a/bigframes/functions/_remote_function_client.py b/bigframes/functions/_remote_function_client.py
new file mode 100644
index 0000000000..6ef482ecda
--- /dev/null
+++ b/bigframes/functions/_remote_function_client.py
@@ -0,0 +1,476 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from __future__ import annotations
+
+import inspect
+import logging
+import os
+import random
+import shutil
+import string
+import sys
+import tempfile
+from typing import cast, Tuple, TYPE_CHECKING
+
+import requests
+
+from bigframes import constants
+import bigframes.functions.remote_function_template
+
+if TYPE_CHECKING:
+    from bigframes.session import Session
+
+import google.api_core.exceptions
+import google.api_core.retry
+from google.cloud import bigquery, functions_v2
+
+from . import _utils
+
+logger = logging.getLogger(__name__)
+
+
+class RemoteFunctionClient:
+    # Wait time (in seconds) for an IAM binding to take effect after creation
+    _iam_wait_seconds = 120
+
+    def __init__(
+        self,
+        gcp_project_id,
+        cloud_function_region,
+        cloud_functions_client,
+        bq_location,
+        bq_dataset,
+        bq_client,
+        bq_connection_id,
+        bq_connection_manager,
+        cloud_function_service_account,
+        cloud_function_kms_key_name,
+        cloud_function_docker_repository,
+        *,
+        session: Session,
+    ):
+        self._gcp_project_id = gcp_project_id
+        self._cloud_function_region = cloud_function_region
+        self._cloud_functions_client = cloud_functions_client
+        self._bq_location = bq_location
+        self._bq_dataset = bq_dataset
+        self._bq_client = bq_client
+        self._bq_connection_id = bq_connection_id
+        self._bq_connection_manager = bq_connection_manager
+        self._cloud_function_service_account = cloud_function_service_account
+        self._cloud_function_kms_key_name = cloud_function_kms_key_name
+        self._cloud_function_docker_repository = cloud_function_docker_repository
+        self._session = session
+
+    def create_bq_remote_function(
+        self,
+        input_args,
+        input_types,
+        output_type,
+        endpoint,
+        bq_function_name,
+        max_batching_rows,
+    ):
+        """Create a BigQuery remote function given the artifacts of a user defined
+        function and the http endpoint of a corresponding cloud function."""
+        if self._bq_connection_manager:
+            self._bq_connection_manager.create_bq_connection(
+                self._gcp_project_id,
+                self._bq_location,
+                self._bq_connection_id,
+                "run.invoker",
+            )
+
+        # Create BQ function
+        # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_remote_function_2
+        bq_function_args = []
+        bq_function_return_type = output_type
+
+        # We are expecting the input type annotations to be 1:1 with the input args
+        for name, type_ in zip(input_args, input_types):
+            bq_function_args.append(f"{name} {type_}")
+
+        remote_function_options = {
+            "endpoint": endpoint,
+            "max_batching_rows": max_batching_rows,
+        }
+
+        remote_function_options_str = ", ".join(
+            [
+                f'{key}="{val}"' if isinstance(val, str) else f"{key}={val}"
+                for key, val in remote_function_options.items()
+                if val is not None
+            ]
+        )
+
+        create_function_ddl = f"""
+            CREATE OR REPLACE FUNCTION `{self._gcp_project_id}.{self._bq_dataset}`.{bq_function_name}({','.join(bq_function_args)})
+            RETURNS {bq_function_return_type}
+            REMOTE WITH CONNECTION `{self._gcp_project_id}.{self._bq_location}.{self._bq_connection_id}`
+            OPTIONS ({remote_function_options_str})"""
+
+        logger.info(f"Creating BQ remote function: {create_function_ddl}")
+
+        # Make sure the dataset exists. I.e. if it doesn't exist, go ahead and
+        # create it
+        dataset = bigquery.Dataset(
+            bigquery.DatasetReference.from_string(
+                self._bq_dataset, default_project=self._gcp_project_id
+            )
+        )
+        dataset.location = self._bq_location
+        try:
+            # This check does not require bigquery.datasets.create IAM
+            # permission. So, if the data set already exists, then user can work
+            # without having that permission.
+            self._bq_client.get_dataset(dataset)
+        except google.api_core.exceptions.NotFound:
+            # This requires bigquery.datasets.create IAM permission
+            self._bq_client.create_dataset(dataset, exists_ok=True)
+
+        # TODO(swast): plumb through the original, user-facing api_name.
+        _, query_job = self._session._start_query(create_function_ddl)
+        logger.info(f"Created remote function {query_job.ddl_target_routine}")
+
+    def get_cloud_function_fully_qualified_parent(self):
+        "Get the fully qualilfied parent for a cloud function."
+        return self._cloud_functions_client.common_location_path(
+            self._gcp_project_id, self._cloud_function_region
+        )
+
+    def get_cloud_function_fully_qualified_name(self, name):
+        "Get the fully qualilfied name for a cloud function."
+        return self._cloud_functions_client.function_path(
+            self._gcp_project_id, self._cloud_function_region, name
+        )
+
+    def get_remote_function_fully_qualilfied_name(self, name):
+        "Get the fully qualilfied name for a BQ remote function."
+        return f"{self._gcp_project_id}.{self._bq_dataset}.{name}"
+
+    def get_cloud_function_endpoint(self, name):
+        """Get the http endpoint of a cloud function if it exists."""
+        fully_qualified_name = self.get_cloud_function_fully_qualified_name(name)
+        try:
+            response = self._cloud_functions_client.get_function(
+                name=fully_qualified_name
+            )
+            return response.service_config.uri
+        except google.api_core.exceptions.NotFound:
+            pass
+        return None
+
+    def generate_cloud_function_code(
+        self,
+        def_,
+        directory,
+        *,
+        input_types: Tuple[str],
+        output_type: str,
+        package_requirements=None,
+        is_row_processor=False,
+    ):
+        """Generate the cloud function code for a given user defined function.
+
+        Args:
+            input_types (tuple[str]):
+                Types of the input arguments in BigQuery SQL data type names.
+            output_type (str):
+                Types of the output scalar as a BigQuery SQL data type name.
+        """
+
+        # requirements.txt
+        if package_requirements:
+            requirements_txt = os.path.join(directory, "requirements.txt")
+            with open(requirements_txt, "w") as f:
+                f.write("\n".join(package_requirements))
+
+        # main.py
+        entry_point = bigframes.functions.remote_function_template.generate_cloud_function_main_code(
+            def_,
+            directory,
+            input_types=input_types,
+            output_type=output_type,
+            is_row_processor=is_row_processor,
+        )
+        return entry_point
+
+    def create_cloud_function(
+        self,
+        def_,
+        cf_name,
+        *,
+        input_types: Tuple[str],
+        output_type: str,
+        package_requirements=None,
+        timeout_seconds=600,
+        max_instance_count=None,
+        is_row_processor=False,
+        vpc_connector=None,
+        memory_mib=1024,
+    ):
+        """Create a cloud function from the given user defined function.
+
+        Args:
+            input_types (tuple[str]):
+                Types of the input arguments in BigQuery SQL data type names.
+            output_type (str):
+                Types of the output scalar as a BigQuery SQL data type name.
+        """
+
+        # Build and deploy folder structure containing cloud function
+        with tempfile.TemporaryDirectory() as directory:
+            entry_point = self.generate_cloud_function_code(
+                def_,
+                directory,
+                package_requirements=package_requirements,
+                input_types=input_types,
+                output_type=output_type,
+                is_row_processor=is_row_processor,
+            )
+            archive_path = shutil.make_archive(directory, "zip", directory)
+
+            # We are creating cloud function source code from the currently running
+            # python version. Use the same version to deploy. This is necessary
+            # because cloudpickle serialization done in one python version and
+            # deserialization done in another python version doesn't work.
+            # TODO(shobs): Figure out how to achieve version compatibility, specially
+            # when pickle (internally used by cloudpickle) guarantees that:
+            # https://docs.python.org/3/library/pickle.html#:~:text=The%20pickle%20serialization%20format%20is,unique%20breaking%20change%20language%20boundary.
+            python_version = "python{}{}".format(
+                sys.version_info.major, sys.version_info.minor
+            )
+
+            # Determine an upload URL for user code
+            upload_url_request = functions_v2.GenerateUploadUrlRequest(
+                kms_key_name=self._cloud_function_kms_key_name
+            )
+            upload_url_request.parent = self.get_cloud_function_fully_qualified_parent()
+            upload_url_response = self._cloud_functions_client.generate_upload_url(
+                request=upload_url_request
+            )
+
+            # Upload the code to GCS
+            with open(archive_path, "rb") as f:
+                response = requests.put(
+                    upload_url_response.upload_url,
+                    data=f,
+                    headers={"content-type": "application/zip"},
+                )
+                if response.status_code != 200:
+                    raise RuntimeError(
+                        "Failed to upload user code. code={}, reason={}, text={}".format(
+                            response.status_code, response.reason, response.text
+                        )
+                    )
+
+            # Deploy Cloud Function
+            create_function_request = functions_v2.CreateFunctionRequest()
+            create_function_request.parent = (
+                self.get_cloud_function_fully_qualified_parent()
+            )
+            create_function_request.function_id = cf_name
+            function = functions_v2.Function()
+            function.name = self.get_cloud_function_fully_qualified_name(cf_name)
+            function.build_config = functions_v2.BuildConfig()
+            function.build_config.runtime = python_version
+            function.build_config.entry_point = entry_point
+            function.build_config.source = functions_v2.Source()
+            function.build_config.source.storage_source = functions_v2.StorageSource()
+            function.build_config.source.storage_source.bucket = (
+                upload_url_response.storage_source.bucket
+            )
+            function.build_config.source.storage_source.object_ = (
+                upload_url_response.storage_source.object_
+            )
+            function.build_config.docker_repository = (
+                self._cloud_function_docker_repository
+            )
+            function.service_config = functions_v2.ServiceConfig()
+            if memory_mib is not None:
+                function.service_config.available_memory = f"{memory_mib}Mi"
+            if timeout_seconds is not None:
+                if timeout_seconds > 1200:
+                    raise ValueError(
+                        "BigQuery remote function can wait only up to 20 minutes"
+                        ", see for more details "
+                        "https://cloud.google.com/bigquery/quotas#remote_function_limits."
+                    )
+                function.service_config.timeout_seconds = timeout_seconds
+            if max_instance_count is not None:
+                function.service_config.max_instance_count = max_instance_count
+            if vpc_connector is not None:
+                function.service_config.vpc_connector = vpc_connector
+            function.service_config.service_account_email = (
+                self._cloud_function_service_account
+            )
+            function.kms_key_name = self._cloud_function_kms_key_name
+            create_function_request.function = function
+
+            # Create the cloud function and wait for it to be ready to use
+            try:
+                operation = self._cloud_functions_client.create_function(
+                    request=create_function_request
+                )
+                operation.result()
+
+                # Cleanup
+                os.remove(archive_path)
+            except google.api_core.exceptions.AlreadyExists:
+                # If a cloud function with the same name already exists, let's
+                # update it
+                update_function_request = functions_v2.UpdateFunctionRequest()
+                update_function_request.function = function
+                operation = self._cloud_functions_client.update_function(
+                    request=update_function_request
+                )
+                operation.result()
+
+        # Fetch the endpoint of the just created function
+        endpoint = self.get_cloud_function_endpoint(cf_name)
+        if not endpoint:
+            raise ValueError(
+                f"Couldn't fetch the http endpoint. {constants.FEEDBACK_LINK}"
+            )
+
+        logger.info(
+            f"Successfully created cloud function {cf_name} with uri ({endpoint})"
+        )
+        return endpoint
+
+    def provision_bq_remote_function(
+        self,
+        def_,
+        input_types,
+        output_type,
+        reuse,
+        name,
+        package_requirements,
+        max_batching_rows,
+        cloud_function_timeout,
+        cloud_function_max_instance_count,
+        is_row_processor,
+        cloud_function_vpc_connector,
+        cloud_function_memory_mib,
+    ):
+        """Provision a BigQuery remote function."""
+        # Augment user package requirements with any internal package
+        # requirements
+        package_requirements = _utils._get_updated_package_requirements(
+            package_requirements, is_row_processor
+        )
+
+        # Compute a unique hash representing the user code
+        function_hash = _utils._get_hash(def_, package_requirements)
+
+        # If reuse of any existing function with the same name (indicated by the
+        # same hash of its source code) is not intended, then attach a unique
+        # suffix to the intended function name to make it unique.
+        uniq_suffix = None
+        if not reuse:
+            # use 4 digits as a unique suffix which should suffice for
+            # uniqueness per session
+            uniq_suffix = "".join(
+                random.choices(string.ascii_lowercase + string.digits, k=4)
+            )
+
+        # Derive the name of the cloud function underlying the intended BQ
+        # remote function. Use the session id to identify the GCF for unnamed
+        # functions. The named remote functions are treated as a persistant
+        # artifacts, so let's keep them independent of session id, which also
+        # makes their naming more stable for the same udf code
+        session_id = None if name else self._session.session_id
+        cloud_function_name = _utils.get_cloud_function_name(
+            function_hash, session_id, uniq_suffix
+        )
+        cf_endpoint = self.get_cloud_function_endpoint(cloud_function_name)
+
+        # Create the cloud function if it does not exist
+        if not cf_endpoint:
+            cf_endpoint = self.create_cloud_function(
+                def_,
+                cloud_function_name,
+                input_types=input_types,
+                output_type=output_type,
+                package_requirements=package_requirements,
+                timeout_seconds=cloud_function_timeout,
+                max_instance_count=cloud_function_max_instance_count,
+                is_row_processor=is_row_processor,
+                vpc_connector=cloud_function_vpc_connector,
+                memory_mib=cloud_function_memory_mib,
+            )
+        else:
+            logger.info(f"Cloud function {cloud_function_name} already exists.")
+
+        # Derive the name of the remote function
+        remote_function_name = name
+        if not remote_function_name:
+            remote_function_name = _utils.get_remote_function_name(
+                function_hash, self._session.session_id, uniq_suffix
+            )
+        rf_endpoint, rf_conn = self.get_remote_function_specs(remote_function_name)
+
+        # Create the BQ remote function in following circumstances:
+        # 1. It does not exist
+        # 2. It exists but the existing remote function has different
+        #    configuration than intended
+        created_new = False
+        if not rf_endpoint or (
+            rf_endpoint != cf_endpoint or rf_conn != self._bq_connection_id
+        ):
+            input_args = inspect.getargs(def_.__code__).args
+            if len(input_args) != len(input_types):
+                raise ValueError(
+                    "Exactly one type should be provided for every input arg."
+                )
+            self.create_bq_remote_function(
+                input_args,
+                input_types,
+                output_type,
+                cf_endpoint,
+                remote_function_name,
+                max_batching_rows,
+            )
+
+            created_new = True
+        else:
+            logger.info(f"Remote function {remote_function_name} already exists.")
+
+        return remote_function_name, cloud_function_name, created_new
+
+    def get_remote_function_specs(self, remote_function_name):
+        """Check whether a remote function already exists for the udf."""
+        http_endpoint = None
+        bq_connection = None
+        routines = self._bq_client.list_routines(
+            f"{self._gcp_project_id}.{self._bq_dataset}"
+        )
+        try:
+            for routine in routines:
+                routine = cast(bigquery.Routine, routine)
+                if routine.reference.routine_id == remote_function_name:
+                    rf_options = routine.remote_function_options
+                    if rf_options:
+                        http_endpoint = rf_options.endpoint
+                        bq_connection = rf_options.connection
+                        if bq_connection:
+                            bq_connection = os.path.basename(bq_connection)
+                    break
+        except google.api_core.exceptions.NotFound:
+            # The dataset might not exist, in which case the http_endpoint doesn't, either.
+            # Note: list_routines doesn't make an API request until we iterate on the response object.
+            pass
+        return (http_endpoint, bq_connection)
diff --git a/bigframes/functions/_remote_function_session.py b/bigframes/functions/_remote_function_session.py
new file mode 100644
index 0000000000..0ab19ca353
--- /dev/null
+++ b/bigframes/functions/_remote_function_session.py
@@ -0,0 +1,546 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from __future__ import annotations
+
+import collections.abc
+import inspect
+import sys
+import threading
+from typing import Any, cast, Dict, Mapping, Optional, Sequence, TYPE_CHECKING, Union
+import warnings
+
+import google.api_core.exceptions
+from google.cloud import (
+    bigquery,
+    bigquery_connection_v1,
+    functions_v2,
+    resourcemanager_v3,
+)
+
+from bigframes import clients, constants
+
+if TYPE_CHECKING:
+    from bigframes.session import Session
+
+import bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes
+import ibis
+import pandas
+
+from . import _remote_function_client as rf_client
+from . import _utils
+
+
+class RemoteFunctionSession:
+    """Session to manage remote functions."""
+
+    def __init__(self):
+        # Session level mapping of remote function artifacts
+        self._temp_artifacts: Dict[str, str] = dict()
+
+        # Lock to synchronize the update of the session artifacts
+        self._artifacts_lock = threading.Lock()
+
+    def _update_temp_artifacts(self, bqrf_routine: str, gcf_path: str):
+        """Update remote function artifacts in the current session."""
+        with self._artifacts_lock:
+            self._temp_artifacts[bqrf_routine] = gcf_path
+
+    def clean_up(
+        self,
+        bqclient: bigquery.Client,
+        gcfclient: functions_v2.FunctionServiceClient,
+        session_id: str,
+    ):
+        """Delete remote function artifacts in the current session."""
+        with self._artifacts_lock:
+            for bqrf_routine, gcf_path in self._temp_artifacts.items():
+                # Let's accept the possibility that the remote function may have
+                # been deleted directly by the user
+                bqclient.delete_routine(bqrf_routine, not_found_ok=True)
+
+                # Let's accept the possibility that the cloud function may have
+                # been deleted directly by the user
+                try:
+                    gcfclient.delete_function(name=gcf_path)
+                except google.api_core.exceptions.NotFound:
+                    pass
+
+            self._temp_artifacts.clear()
+
+    # Inspired by @udf decorator implemented in ibis-bigquery package
+    # https://github.com/ibis-project/ibis-bigquery/blob/main/ibis_bigquery/udf/__init__.py
+    # which has moved as @js to the ibis package
+    # https://github.com/ibis-project/ibis/blob/master/ibis/backends/bigquery/udf/__init__.py
+    def remote_function(
+        self,
+        input_types: Union[None, type, Sequence[type]] = None,
+        output_type: Optional[type] = None,
+        session: Optional[Session] = None,
+        bigquery_client: Optional[bigquery.Client] = None,
+        bigquery_connection_client: Optional[
+            bigquery_connection_v1.ConnectionServiceClient
+        ] = None,
+        cloud_functions_client: Optional[functions_v2.FunctionServiceClient] = None,
+        resource_manager_client: Optional[resourcemanager_v3.ProjectsClient] = None,
+        dataset: Optional[str] = None,
+        bigquery_connection: Optional[str] = None,
+        reuse: bool = True,
+        name: Optional[str] = None,
+        packages: Optional[Sequence[str]] = None,
+        cloud_function_service_account: Optional[str] = None,
+        cloud_function_kms_key_name: Optional[str] = None,
+        cloud_function_docker_repository: Optional[str] = None,
+        max_batching_rows: Optional[int] = 1000,
+        cloud_function_timeout: Optional[int] = 600,
+        cloud_function_max_instances: Optional[int] = None,
+        cloud_function_vpc_connector: Optional[str] = None,
+        cloud_function_memory_mib: Optional[int] = 1024,
+    ):
+        """Decorator to turn a user defined function into a BigQuery remote function.
+
+        .. deprecated:: 0.0.1
+        This is an internal method. Please use :func:`bigframes.pandas.remote_function` instead.
+
+        .. note::
+            Please make sure following is setup before using this API:
+
+            1. Have the below APIs enabled for your project:
+
+                * BigQuery Connection API
+                * Cloud Functions API
+                * Cloud Run API
+                * Cloud Build API
+                * Artifact Registry API
+                * Cloud Resource Manager API
+
+            This can be done from the cloud console (change `PROJECT_ID` to yours):
+            https://console.cloud.google.com/apis/enableflow?apiid=bigqueryconnection.googleapis.com,cloudfunctions.googleapis.com,run.googleapis.com,cloudbuild.googleapis.com,artifactregistry.googleapis.com,cloudresourcemanager.googleapis.com&project=PROJECT_ID
+
+            Or from the gcloud CLI:
+
+            `$ gcloud services enable bigqueryconnection.googleapis.com cloudfunctions.googleapis.com run.googleapis.com cloudbuild.googleapis.com artifactregistry.googleapis.com cloudresourcemanager.googleapis.com`
+
+            2. Have following IAM roles enabled for you:
+
+                * BigQuery Data Editor (roles/bigquery.dataEditor)
+                * BigQuery Connection Admin (roles/bigquery.connectionAdmin)
+                * Cloud Functions Developer (roles/cloudfunctions.developer)
+                * Service Account User (roles/iam.serviceAccountUser) on the service account `PROJECT_NUMBER-compute@developer.gserviceaccount.com`
+                * Storage Object Viewer (roles/storage.objectViewer)
+                * Project IAM Admin (roles/resourcemanager.projectIamAdmin) (Only required if the bigquery connection being used is not pre-created and is created dynamically with user credentials.)
+
+            3. Either the user has setIamPolicy privilege on the project, or a BigQuery connection is pre-created with necessary IAM role set:
+
+                1. To create a connection, follow https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_connection
+                2. To set up IAM, follow https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#grant_permission_on_function
+
+                Alternatively, the IAM could also be setup via the gcloud CLI:
+
+                `$ gcloud projects add-iam-policy-binding PROJECT_ID --member="serviceAccount:CONNECTION_SERVICE_ACCOUNT_ID" --role="roles/run.invoker"`.
+
+        Args:
+            input_types (None, type, or sequence(type)):
+                For scalar user defined function it should be the input type or
+                sequence of input types. For row processing user defined function,
+                type `Series` should be specified.
+            output_type (Optional[type]):
+                Data type of the output in the user defined function.
+            session (bigframes.Session, Optional):
+                BigQuery DataFrames session to use for getting default project,
+                dataset and BigQuery connection.
+            bigquery_client (google.cloud.bigquery.Client, Optional):
+                Client to use for BigQuery operations. If this param is not provided
+                then bigquery client from the session would be used.
+            bigquery_connection_client (google.cloud.bigquery_connection_v1.ConnectionServiceClient, Optional):
+                Client to use for BigQuery connection operations. If this param is
+                not provided then bigquery connection client from the session would
+                be used.
+            cloud_functions_client (google.cloud.functions_v2.FunctionServiceClient, Optional):
+                Client to use for cloud functions operations. If this param is not
+                provided then the functions client from the session would be used.
+            resource_manager_client (google.cloud.resourcemanager_v3.ProjectsClient, Optional):
+                Client to use for cloud resource management operations, e.g. for
+                getting and setting IAM roles on cloud resources. If this param is
+                not provided then resource manager client from the session would be
+                used.
+            dataset (str, Optional.):
+                Dataset in which to create a BigQuery remote function. It should be in
+                `<project_id>.<dataset_name>` or `<dataset_name>` format. If this
+                parameter is not provided then session dataset id is used.
+            bigquery_connection (str, Optional):
+                Name of the BigQuery connection in the form of `CONNECTION_ID` or
+                `LOCATION.CONNECTION_ID` or `PROJECT_ID.LOCATION.CONNECTION_ID`.
+                If this param is not provided then the bigquery connection from the session
+                would be used. If it is pre created in the same location as the
+                `bigquery_client.location` then it would be used, otherwise it is created
+                dynamically using the `bigquery_connection_client` assuming the user has necessary
+                priviliges. The PROJECT_ID should be the same as the BigQuery connection project.
+            reuse (bool, Optional):
+                Reuse the remote function if already exists.
+                `True` by default, which will result in reusing an existing remote
+                function and corresponding cloud function that was previously
+                created (if any) for the same udf.
+                Please note that for an unnamed (i.e. created without an explicit
+                `name` argument) remote function, the BigQuery DataFrames
+                session id is attached in the cloud artifacts names. So for the
+                effective reuse across the sessions it is recommended to create
+                the remote function with an explicit `name`.
+                Setting it to `False` would force creating a unique remote function.
+                If the required remote function does not exist then it would be
+                created irrespective of this param.
+            name (str, Optional):
+                Explicit name of the persisted BigQuery remote function. Use it with
+                caution, because two users working in the same project and dataset
+                could overwrite each other's remote functions if they use the same
+                persistent name. When an explicit name is provided, any session
+                specific clean up (``bigframes.session.Session.close``/
+                ``bigframes.pandas.close_session``/
+                ``bigframes.pandas.reset_session``/
+                ``bigframes.pandas.clean_up_by_session_id``) does not clean up
+                the function, and leaves it for the user to manage the function
+                and the associated cloud function directly.
+            packages (str[], Optional):
+                Explicit name of the external package dependencies. Each dependency
+                is added to the `requirements.txt` as is, and can be of the form
+                supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/.
+            cloud_function_service_account (str, Optional):
+                Service account to use for the cloud functions. If not provided then
+                the default service account would be used. See
+                https://cloud.google.com/functions/docs/securing/function-identity
+                for more details. Please make sure the service account has the
+                necessary IAM permissions configured as described in
+                https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration.
+            cloud_function_kms_key_name (str, Optional):
+                Customer managed encryption key to protect cloud functions and
+                related data at rest. This is of the format
+                projects/PROJECT_ID/locations/LOCATION/keyRings/KEYRING/cryptoKeys/KEY.
+                Read https://cloud.google.com/functions/docs/securing/cmek for
+                more details including granting necessary service accounts
+                access to the key.
+            cloud_function_docker_repository (str, Optional):
+                Docker repository created with the same encryption key as
+                `cloud_function_kms_key_name` to store encrypted artifacts
+                created to support the cloud function. This is of the format
+                projects/PROJECT_ID/locations/LOCATION/repositories/REPOSITORY_NAME.
+                For more details see
+                https://cloud.google.com/functions/docs/securing/cmek#before_you_begin.
+            max_batching_rows (int, Optional):
+                The maximum number of rows to be batched for processing in the
+                BQ remote function. Default value is 1000. A lower number can be
+                passed to avoid timeouts in case the user code is too complex to
+                process large number of rows fast enough. A higher number can be
+                used to increase throughput in case the user code is fast enough.
+                `None` can be passed to let BQ remote functions service apply
+                default batching. See for more details
+                https://cloud.google.com/bigquery/docs/remote-functions#limiting_number_of_rows_in_a_batch_request.
+            cloud_function_timeout (int, Optional):
+                The maximum amount of time (in seconds) BigQuery should wait for
+                the cloud function to return a response. See for more details
+                https://cloud.google.com/functions/docs/configuring/timeout.
+                Please note that even though the cloud function (2nd gen) itself
+                allows seeting up to 60 minutes of timeout, BigQuery remote
+                function can wait only up to 20 minutes, see for more details
+                https://cloud.google.com/bigquery/quotas#remote_function_limits.
+                By default BigQuery DataFrames uses a 10 minute timeout. `None`
+                can be passed to let the cloud functions default timeout take effect.
+            cloud_function_max_instances (int, Optional):
+                The maximumm instance count for the cloud function created. This
+                can be used to control how many cloud function instances can be
+                active at max at any given point of time. Lower setting can help
+                control the spike in the billing. Higher setting can help
+                support processing larger scale data. When not specified, cloud
+                function's default setting applies. For more details see
+                https://cloud.google.com/functions/docs/configuring/max-instances.
+            cloud_function_vpc_connector (str, Optional):
+                The VPC connector you would like to configure for your cloud
+                function. This is useful if your code needs access to data or
+                service(s) that are on a VPC network. See for more details
+                https://cloud.google.com/functions/docs/networking/connecting-vpc.
+            cloud_function_memory_mib (int, Optional):
+                The amounts of memory (in mebibytes) to allocate for the cloud
+                function (2nd gen) created. This also dictates a corresponding
+                amount of allocated CPU for the function. By default a memory of
+                1024 MiB is set for the cloud functions created to support
+                BigQuery DataFrames remote function. If you want to let the
+                default memory of cloud functions be allocated, pass `None`. See
+                for more details
+                https://cloud.google.com/functions/docs/configuring/memory.
+        """
+        # Some defaults may be used from the session if not provided otherwise
+        import bigframes.exceptions as bf_exceptions
+        import bigframes.pandas as bpd
+        import bigframes.series as bf_series
+        import bigframes.session
+
+        session = cast(bigframes.session.Session, session or bpd.get_global_session())
+
+        # A BigQuery client is required to perform BQ operations
+        if not bigquery_client:
+            bigquery_client = session.bqclient
+        if not bigquery_client:
+            raise ValueError(
+                "A bigquery client must be provided, either directly or via session. "
+                f"{constants.FEEDBACK_LINK}"
+            )
+
+        # A BigQuery connection client is required to perform BQ connection operations
+        if not bigquery_connection_client:
+            bigquery_connection_client = session.bqconnectionclient
+        if not bigquery_connection_client:
+            raise ValueError(
+                "A bigquery connection client must be provided, either directly or via session. "
+                f"{constants.FEEDBACK_LINK}"
+            )
+
+        # A cloud functions client is required to perform cloud functions operations
+        if not cloud_functions_client:
+            cloud_functions_client = session.cloudfunctionsclient
+        if not cloud_functions_client:
+            raise ValueError(
+                "A cloud functions client must be provided, either directly or via session. "
+                f"{constants.FEEDBACK_LINK}"
+            )
+
+        # A resource manager client is required to get/set IAM operations
+        if not resource_manager_client:
+            resource_manager_client = session.resourcemanagerclient
+        if not resource_manager_client:
+            raise ValueError(
+                "A resource manager client must be provided, either directly or via session. "
+                f"{constants.FEEDBACK_LINK}"
+            )
+
+        # BQ remote function must be persisted, for which we need a dataset
+        # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#:~:text=You%20cannot%20create%20temporary%20remote%20functions.
+        if dataset:
+            dataset_ref = bigquery.DatasetReference.from_string(
+                dataset, default_project=bigquery_client.project
+            )
+        else:
+            dataset_ref = session._anonymous_dataset
+
+        bq_location, cloud_function_region = _utils.get_remote_function_locations(
+            bigquery_client.location
+        )
+
+        # A connection is required for BQ remote function
+        # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_remote_function
+        if not bigquery_connection:
+            bigquery_connection = session._bq_connection  # type: ignore
+
+        bigquery_connection = clients.resolve_full_bq_connection_name(
+            bigquery_connection,
+            default_project=dataset_ref.project,
+            default_location=bq_location,
+        )
+        # Guaranteed to be the form of <project>.<location>.<connection_id>
+        (
+            gcp_project_id,
+            bq_connection_location,
+            bq_connection_id,
+        ) = bigquery_connection.split(".")
+        if gcp_project_id.casefold() != dataset_ref.project.casefold():
+            raise ValueError(
+                "The project_id does not match BigQuery connection gcp_project_id: "
+                f"{dataset_ref.project}."
+            )
+        if bq_connection_location.casefold() != bq_location.casefold():
+            raise ValueError(
+                "The location does not match BigQuery connection location: "
+                f"{bq_location}."
+            )
+
+        # If any CMEK is intended then check that a docker repository is also specified
+        if (
+            cloud_function_kms_key_name is not None
+            and cloud_function_docker_repository is None
+        ):
+            raise ValueError(
+                "cloud_function_docker_repository must be specified with cloud_function_kms_key_name."
+                " For more details see https://cloud.google.com/functions/docs/securing/cmek#before_you_begin"
+            )
+
+        bq_connection_manager = session.bqconnectionmanager
+
+        def wrapper(func):
+            nonlocal input_types, output_type
+
+            if not callable(func):
+                raise TypeError("f must be callable, got {}".format(func))
+
+            if sys.version_info >= (3, 10):
+                # Add `eval_str = True` so that deferred annotations are turned into their
+                # corresponding type objects. Need Python 3.10 for eval_str parameter.
+                # https://docs.python.org/3/library/inspect.html#inspect.signature
+                signature_kwargs: Mapping[str, Any] = {"eval_str": True}
+            else:
+                signature_kwargs = {}
+
+            signature = inspect.signature(
+                func,
+                **signature_kwargs,
+            )
+
+            # Try to get input types via type annotations.
+            if input_types is None:
+                input_types = []
+                for parameter in signature.parameters.values():
+                    if (param_type := parameter.annotation) is inspect.Signature.empty:
+                        raise ValueError(
+                            "'input_types' was not set and parameter "
+                            f"'{parameter.name}' is missing a type annotation. "
+                            "Types are required to use @remote_function."
+                        )
+                    input_types.append(param_type)
+            elif not isinstance(input_types, collections.abc.Sequence):
+                input_types = [input_types]
+
+            if output_type is None:
+                if (
+                    output_type := signature.return_annotation
+                ) is inspect.Signature.empty:
+                    raise ValueError(
+                        "'output_type' was not set and function is missing a "
+                        "return type annotation. Types are required to use "
+                        "@remote_function."
+                    )
+
+            # The function will actually be receiving a pandas Series, but allow both
+            # BigQuery DataFrames and pandas object types for compatibility.
+            is_row_processor = False
+            if len(input_types) == 1 and (
+                (input_type := input_types[0]) == bf_series.Series
+                or input_type == pandas.Series
+            ):
+                warnings.warn(
+                    "input_types=Series is in preview.",
+                    stacklevel=1,
+                    category=bf_exceptions.PreviewWarning,
+                )
+
+                # we will model the row as a json serialized string containing the data
+                # and the metadata representing the row
+                input_types = [str]
+                is_row_processor = True
+            elif isinstance(input_types, type):
+                input_types = [input_types]
+
+            # TODO(b/340898611): fix type error
+            ibis_signature = _utils.ibis_signature_from_python_signature(
+                signature, input_types, output_type  # type: ignore
+            )
+
+            remote_function_client = rf_client.RemoteFunctionClient(
+                dataset_ref.project,
+                cloud_function_region,
+                cloud_functions_client,
+                bq_location,
+                dataset_ref.dataset_id,
+                bigquery_client,
+                bq_connection_id,
+                bq_connection_manager,
+                cloud_function_service_account,
+                cloud_function_kms_key_name,
+                cloud_function_docker_repository,
+                session=session,  # type: ignore
+            )
+
+            # In the unlikely case where the user is trying to re-deploy the same
+            # function, cleanup the attributes we add below, first. This prevents
+            # the pickle from having dependencies that might not otherwise be
+            # present such as ibis or pandas.
+            def try_delattr(attr):
+                try:
+                    delattr(func, attr)
+                except AttributeError:
+                    pass
+
+            try_delattr("bigframes_cloud_function")
+            try_delattr("bigframes_remote_function")
+            try_delattr("input_dtypes")
+            try_delattr("output_dtype")
+            try_delattr("is_row_processor")
+            try_delattr("ibis_node")
+
+            (
+                rf_name,
+                cf_name,
+                created_new,
+            ) = remote_function_client.provision_bq_remote_function(
+                func,
+                input_types=tuple(
+                    third_party_ibis_bqtypes.BigQueryType.from_ibis(type_)
+                    for type_ in ibis_signature.input_types
+                ),
+                output_type=third_party_ibis_bqtypes.BigQueryType.from_ibis(
+                    ibis_signature.output_type
+                ),
+                reuse=reuse,
+                name=name,
+                package_requirements=packages,
+                max_batching_rows=max_batching_rows,
+                cloud_function_timeout=cloud_function_timeout,
+                cloud_function_max_instance_count=cloud_function_max_instances,
+                is_row_processor=is_row_processor,
+                cloud_function_vpc_connector=cloud_function_vpc_connector,
+                cloud_function_memory_mib=cloud_function_memory_mib,
+            )
+
+            # TODO: Move ibis logic to compiler step
+            node = ibis.udf.scalar.builtin(
+                func,
+                name=rf_name,
+                schema=f"{dataset_ref.project}.{dataset_ref.dataset_id}",
+                signature=(ibis_signature.input_types, ibis_signature.output_type),
+            )
+            func.bigframes_cloud_function = (
+                remote_function_client.get_cloud_function_fully_qualified_name(cf_name)
+            )
+            func.bigframes_remote_function = (
+                remote_function_client.get_remote_function_fully_qualilfied_name(
+                    rf_name
+                )
+            )
+            func.input_dtypes = tuple(
+                [
+                    bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(
+                        input_type
+                    )
+                    for input_type in ibis_signature.input_types
+                ]
+            )
+            func.output_dtype = (
+                bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(
+                    ibis_signature.output_type
+                )
+            )
+            func.is_row_processor = is_row_processor
+            func.ibis_node = node
+
+            # If a new remote function was created, update the cloud artifacts
+            # created in the session. This would be used to clean up any
+            # resources in the session. Note that we need to do this only for
+            # the case where an explicit name was not provided by the user and
+            # we used an internal name. For the cases where the user provided an
+            # explicit name, we are assuming that the user wants to persist them
+            # with that name and would directly manage their lifecycle.
+            if created_new and (not name):
+                self._update_temp_artifacts(
+                    func.bigframes_remote_function, func.bigframes_cloud_function
+                )
+            return func
+
+        return wrapper
diff --git a/bigframes/functions/_utils.py b/bigframes/functions/_utils.py
new file mode 100644
index 0000000000..537473bed8
--- /dev/null
+++ b/bigframes/functions/_utils.py
@@ -0,0 +1,214 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import hashlib
+import inspect
+from typing import cast, List, NamedTuple, Optional, Sequence, Set
+
+import cloudpickle
+import google.api_core.exceptions
+from google.cloud import bigquery, functions_v2
+import ibis.expr.datatypes.core
+import numpy
+import pandas
+import pyarrow
+
+import bigframes.core.compile.ibis_types
+
+# Naming convention for the remote function artifacts
+_BIGFRAMES_REMOTE_FUNCTION_PREFIX = "bigframes"
+_BQ_FUNCTION_NAME_SEPERATOR = "_"
+_GCF_FUNCTION_NAME_SEPERATOR = "-"
+
+# Protocol version 4 is available in python version 3.4 and above
+# https://docs.python.org/3/library/pickle.html#data-stream-format
+_pickle_protocol_version = 4
+
+
+def get_remote_function_locations(bq_location):
+    """Get BQ location and cloud functions region given a BQ client."""
+    # TODO(shobs, b/274647164): Find the best way to determine default location.
+    # For now let's assume that if no BQ location is set in the client then it
+    # defaults to US multi region
+    bq_location = bq_location.lower() if bq_location else "us"
+
+    # Cloud function should be in the same region as the bigquery remote function
+    cloud_function_region = bq_location
+
+    # BigQuery has multi region but cloud functions does not.
+    # Any region in the multi region that supports cloud functions should work
+    # https://cloud.google.com/functions/docs/locations
+    if bq_location == "us":
+        cloud_function_region = "us-central1"
+    elif bq_location == "eu":
+        cloud_function_region = "europe-west1"
+
+    return bq_location, cloud_function_region
+
+
+def _get_updated_package_requirements(
+    package_requirements=None, is_row_processor=False
+):
+    requirements = [f"cloudpickle=={cloudpickle.__version__}"]
+    if is_row_processor:
+        # bigframes remote function will send an entire row of data as json,
+        # which would be converted to a pandas series and processed
+        # Ensure numpy versions match to avoid unpickling problems. See
+        # internal issue b/347934471.
+        requirements.append(f"numpy=={numpy.__version__}")
+        requirements.append(f"pandas=={pandas.__version__}")
+        requirements.append(f"pyarrow=={pyarrow.__version__}")
+
+    if package_requirements:
+        requirements.extend(package_requirements)
+
+    requirements = sorted(requirements)
+    return requirements
+
+
+def _clean_up_by_session_id(
+    bqclient: bigquery.Client,
+    gcfclient: functions_v2.FunctionServiceClient,
+    dataset: bigquery.DatasetReference,
+    session_id: str,
+):
+    """Delete remote function artifacts for a session id, where the session id
+    was not necessarily created in the current runtime. This is useful if the
+    user worked with a BigQuery DataFrames session previously and remembered the
+    session id, and now wants to clean up its temporary resources at a later
+    point in time.
+    """
+
+    # First clean up the BQ remote functions and then the underlying
+    # cloud functions, so that at no point we are left with a remote function
+    # that is pointing to a cloud function that does not exist
+
+    endpoints_to_be_deleted: Set[str] = set()
+    match_prefix = "".join(
+        [
+            _BIGFRAMES_REMOTE_FUNCTION_PREFIX,
+            _BQ_FUNCTION_NAME_SEPERATOR,
+            session_id,
+            _BQ_FUNCTION_NAME_SEPERATOR,
+        ]
+    )
+    for routine in bqclient.list_routines(dataset):
+        routine = cast(bigquery.Routine, routine)
+
+        # skip past the routines not belonging to the given session id, or
+        # non-remote-function routines
+        if (
+            routine.type_ != bigquery.RoutineType.SCALAR_FUNCTION
+            or not cast(str, routine.routine_id).startswith(match_prefix)
+            or not routine.remote_function_options
+            or not routine.remote_function_options.endpoint
+        ):
+            continue
+
+        # Let's forgive the edge case possibility that the BQ remote function
+        # may have been deleted at the same time directly by the user
+        bqclient.delete_routine(routine, not_found_ok=True)
+        endpoints_to_be_deleted.add(routine.remote_function_options.endpoint)
+
+    # Now clean up the cloud functions
+    bq_location = bqclient.get_dataset(dataset).location
+    bq_location, gcf_location = get_remote_function_locations(bq_location)
+    parent_path = gcfclient.common_location_path(
+        project=dataset.project, location=gcf_location
+    )
+    for gcf in gcfclient.list_functions(parent=parent_path):
+        # skip past the cloud functions not attached to any BQ remote function
+        # belonging to the given session id
+        if gcf.service_config.uri not in endpoints_to_be_deleted:
+            continue
+
+        # Let's forgive the edge case possibility that the cloud function
+        # may have been deleted at the same time directly by the user
+        try:
+            gcfclient.delete_function(name=gcf.name)
+        except google.api_core.exceptions.NotFound:
+            pass
+
+
+def _get_hash(def_, package_requirements=None):
+    "Get hash (32 digits alphanumeric) of a function."
+    # There is a known cell-id sensitivity of the cloudpickle serialization in
+    # notebooks https://github.com/cloudpipe/cloudpickle/issues/538. Because of
+    # this, if a cell contains a udf decorated with @remote_function, a unique
+    # cloudpickle code is generated every time the cell is run, creating new
+    # cloud artifacts every time. This is slow and wasteful.
+    # A workaround of the same can be achieved by replacing the filename in the
+    # code object to a static value
+    # https://github.com/cloudpipe/cloudpickle/issues/120#issuecomment-338510661.
+    #
+    # To respect the user code/environment let's make this modification on a
+    # copy of the udf, not on the original udf itself.
+    def_copy = cloudpickle.loads(cloudpickle.dumps(def_))
+    def_copy.__code__ = def_copy.__code__.replace(
+        co_filename="bigframes_place_holder_filename"
+    )
+
+    def_repr = cloudpickle.dumps(def_copy, protocol=_pickle_protocol_version)
+    if package_requirements:
+        for p in sorted(package_requirements):
+            def_repr += p.encode()
+    return hashlib.md5(def_repr).hexdigest()
+
+
+def routine_ref_to_string_for_query(routine_ref: bigquery.RoutineReference) -> str:
+    return f"`{routine_ref.project}.{routine_ref.dataset_id}`.{routine_ref.routine_id}"
+
+
+def get_cloud_function_name(function_hash, session_id=None, uniq_suffix=None):
+    "Get a name for the cloud function for the given user defined function."
+    parts = [_BIGFRAMES_REMOTE_FUNCTION_PREFIX]
+    if session_id:
+        parts.append(session_id)
+    parts.append(function_hash)
+    if uniq_suffix:
+        parts.append(uniq_suffix)
+    return _GCF_FUNCTION_NAME_SEPERATOR.join(parts)
+
+
+def get_remote_function_name(function_hash, session_id, uniq_suffix=None):
+    "Get a name for the BQ remote function for the given user defined function."
+    parts = [_BIGFRAMES_REMOTE_FUNCTION_PREFIX, session_id, function_hash]
+    if uniq_suffix:
+        parts.append(uniq_suffix)
+    return _BQ_FUNCTION_NAME_SEPERATOR.join(parts)
+
+
+class IbisSignature(NamedTuple):
+    parameter_names: List[str]
+    input_types: List[Optional[ibis.expr.datatypes.core.DataType]]
+    output_type: ibis.expr.datatypes.core.DataType
+
+
+def ibis_signature_from_python_signature(
+    signature: inspect.Signature,
+    input_types: Sequence[type],
+    output_type: type,
+) -> IbisSignature:
+
+    return IbisSignature(
+        parameter_names=list(signature.parameters.keys()),
+        input_types=[
+            bigframes.core.compile.ibis_types.ibis_type_from_python_type(t)
+            for t in input_types
+        ],
+        output_type=bigframes.core.compile.ibis_types.ibis_type_from_python_type(
+            output_type
+        ),
+    )
diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py
index b3c6aee1b3..7e9df74e76 100644
--- a/bigframes/functions/remote_function.py
+++ b/bigframes/functions/remote_function.py
@@ -14,664 +14,29 @@
 
 from __future__ import annotations
 
-import collections.abc
-import hashlib
-import inspect
 import logging
-import os
-import random
-import shutil
-import string
-import sys
-import tempfile
-import threading
-from typing import (
-    Any,
-    cast,
-    Dict,
-    List,
-    Mapping,
-    NamedTuple,
-    Optional,
-    Sequence,
-    Set,
-    Tuple,
-    TYPE_CHECKING,
-    Union,
-)
+from typing import cast, Optional, TYPE_CHECKING
 import warnings
 
 import ibis
-import numpy
-import pandas
-import pyarrow
-import requests
 
 if TYPE_CHECKING:
     from bigframes.session import Session
 
-import bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes
-import cloudpickle
 import google.api_core.exceptions
 import google.api_core.retry
-from google.cloud import (
-    bigquery,
-    bigquery_connection_v1,
-    functions_v2,
-    resourcemanager_v3,
-)
+from google.cloud import bigquery
 import google.iam.v1
-from ibis.expr.datatypes.core import DataType as IbisDataType
 
-from bigframes import clients
 import bigframes.constants as constants
 import bigframes.core.compile.ibis_types
 import bigframes.dtypes
 import bigframes.functions.remote_function_template
 
-logger = logging.getLogger(__name__)
-
-# Naming convention for the remote function artifacts
-_BIGFRAMES_REMOTE_FUNCTION_PREFIX = "bigframes"
-_BQ_FUNCTION_NAME_SEPERATOR = "_"
-_GCF_FUNCTION_NAME_SEPERATOR = "-"
-
-# Protocol version 4 is available in python version 3.4 and above
-# https://docs.python.org/3/library/pickle.html#data-stream-format
-_pickle_protocol_version = 4
-
-
-def _clean_up_by_session_id(
-    bqclient: bigquery.Client,
-    gcfclient: functions_v2.FunctionServiceClient,
-    dataset: bigquery.DatasetReference,
-    session_id: str,
-):
-    """Delete remote function artifacts for a session id, where the session id
-    was not necessarily created in the current runtime. This is useful if the
-    user worked with a BigQuery DataFrames session previously and remembered the
-    session id, and now wants to clean up its temporary resources at a later
-    point in time.
-    """
-
-    # First clean up the BQ remote functions and then the underlying
-    # cloud functions, so that at no point we are left with a remote function
-    # that is pointing to a cloud function that does not exist
-
-    endpoints_to_be_deleted: Set[str] = set()
-    match_prefix = "".join(
-        [
-            _BIGFRAMES_REMOTE_FUNCTION_PREFIX,
-            _BQ_FUNCTION_NAME_SEPERATOR,
-            session_id,
-            _BQ_FUNCTION_NAME_SEPERATOR,
-        ]
-    )
-    for routine in bqclient.list_routines(dataset):
-        routine = cast(bigquery.Routine, routine)
-
-        # skip past the routines not belonging to the given session id, or
-        # non-remote-function routines
-        if (
-            routine.type_ != bigquery.RoutineType.SCALAR_FUNCTION
-            or not cast(str, routine.routine_id).startswith(match_prefix)
-            or not routine.remote_function_options
-            or not routine.remote_function_options.endpoint
-        ):
-            continue
-
-        # Let's forgive the edge case possibility that the BQ remote function
-        # may have been deleted at the same time directly by the user
-        bqclient.delete_routine(routine, not_found_ok=True)
-        endpoints_to_be_deleted.add(routine.remote_function_options.endpoint)
-
-    # Now clean up the cloud functions
-    bq_location = bqclient.get_dataset(dataset).location
-    bq_location, gcf_location = get_remote_function_locations(bq_location)
-    parent_path = gcfclient.common_location_path(
-        project=dataset.project, location=gcf_location
-    )
-    for gcf in gcfclient.list_functions(parent=parent_path):
-        # skip past the cloud functions not attached to any BQ remote function
-        # belonging to the given session id
-        if gcf.service_config.uri not in endpoints_to_be_deleted:
-            continue
-
-        # Let's forgive the edge case possibility that the cloud function
-        # may have been deleted at the same time directly by the user
-        try:
-            gcfclient.delete_function(name=gcf.name)
-        except google.api_core.exceptions.NotFound:
-            pass
-
-
-def get_remote_function_locations(bq_location):
-    """Get BQ location and cloud functions region given a BQ client."""
-    # TODO(shobs, b/274647164): Find the best way to determine default location.
-    # For now let's assume that if no BQ location is set in the client then it
-    # defaults to US multi region
-    bq_location = bq_location.lower() if bq_location else "us"
-
-    # Cloud function should be in the same region as the bigquery remote function
-    cloud_function_region = bq_location
-
-    # BigQuery has multi region but cloud functions does not.
-    # Any region in the multi region that supports cloud functions should work
-    # https://cloud.google.com/functions/docs/locations
-    if bq_location == "us":
-        cloud_function_region = "us-central1"
-    elif bq_location == "eu":
-        cloud_function_region = "europe-west1"
-
-    return bq_location, cloud_function_region
-
-
-def _get_hash(def_, package_requirements=None):
-    "Get hash (32 digits alphanumeric) of a function."
-    # There is a known cell-id sensitivity of the cloudpickle serialization in
-    # notebooks https://github.com/cloudpipe/cloudpickle/issues/538. Because of
-    # this, if a cell contains a udf decorated with @remote_function, a unique
-    # cloudpickle code is generated every time the cell is run, creating new
-    # cloud artifacts every time. This is slow and wasteful.
-    # A workaround of the same can be achieved by replacing the filename in the
-    # code object to a static value
-    # https://github.com/cloudpipe/cloudpickle/issues/120#issuecomment-338510661.
-    #
-    # To respect the user code/environment let's make this modification on a
-    # copy of the udf, not on the original udf itself.
-    def_copy = cloudpickle.loads(cloudpickle.dumps(def_))
-    def_copy.__code__ = def_copy.__code__.replace(
-        co_filename="bigframes_place_holder_filename"
-    )
-
-    def_repr = cloudpickle.dumps(def_copy, protocol=_pickle_protocol_version)
-    if package_requirements:
-        for p in sorted(package_requirements):
-            def_repr += p.encode()
-    return hashlib.md5(def_repr).hexdigest()
-
-
-def _get_updated_package_requirements(
-    package_requirements=None, is_row_processor=False
-):
-    requirements = [f"cloudpickle=={cloudpickle.__version__}"]
-    if is_row_processor:
-        # bigframes remote function will send an entire row of data as json,
-        # which would be converted to a pandas series and processed
-        # Ensure numpy versions match to avoid unpickling problems. See
-        # internal issue b/347934471.
-        requirements.append(f"numpy=={numpy.__version__}")
-        requirements.append(f"pandas=={pandas.__version__}")
-        requirements.append(f"pyarrow=={pyarrow.__version__}")
-
-    if package_requirements:
-        requirements.extend(package_requirements)
-
-    requirements = sorted(requirements)
-    return requirements
-
-
-def routine_ref_to_string_for_query(routine_ref: bigquery.RoutineReference) -> str:
-    return f"`{routine_ref.project}.{routine_ref.dataset_id}`.{routine_ref.routine_id}"
-
-
-class IbisSignature(NamedTuple):
-    parameter_names: List[str]
-    input_types: List[Optional[IbisDataType]]
-    output_type: IbisDataType
-
-
-def get_cloud_function_name(function_hash, session_id=None, uniq_suffix=None):
-    "Get a name for the cloud function for the given user defined function."
-    parts = [_BIGFRAMES_REMOTE_FUNCTION_PREFIX]
-    if session_id:
-        parts.append(session_id)
-    parts.append(function_hash)
-    if uniq_suffix:
-        parts.append(uniq_suffix)
-    return _GCF_FUNCTION_NAME_SEPERATOR.join(parts)
-
-
-def get_remote_function_name(function_hash, session_id, uniq_suffix=None):
-    "Get a name for the BQ remote function for the given user defined function."
-    parts = [_BIGFRAMES_REMOTE_FUNCTION_PREFIX, session_id, function_hash]
-    if uniq_suffix:
-        parts.append(uniq_suffix)
-    return _BQ_FUNCTION_NAME_SEPERATOR.join(parts)
-
-
-class RemoteFunctionClient:
-    # Wait time (in seconds) for an IAM binding to take effect after creation
-    _iam_wait_seconds = 120
-
-    def __init__(
-        self,
-        gcp_project_id,
-        cloud_function_region,
-        cloud_functions_client,
-        bq_location,
-        bq_dataset,
-        bq_client,
-        bq_connection_id,
-        bq_connection_manager,
-        cloud_function_service_account,
-        cloud_function_kms_key_name,
-        cloud_function_docker_repository,
-        *,
-        session: Session,
-    ):
-        self._gcp_project_id = gcp_project_id
-        self._cloud_function_region = cloud_function_region
-        self._cloud_functions_client = cloud_functions_client
-        self._bq_location = bq_location
-        self._bq_dataset = bq_dataset
-        self._bq_client = bq_client
-        self._bq_connection_id = bq_connection_id
-        self._bq_connection_manager = bq_connection_manager
-        self._cloud_function_service_account = cloud_function_service_account
-        self._cloud_function_kms_key_name = cloud_function_kms_key_name
-        self._cloud_function_docker_repository = cloud_function_docker_repository
-        self._session = session
-
-    def create_bq_remote_function(
-        self,
-        input_args,
-        input_types,
-        output_type,
-        endpoint,
-        bq_function_name,
-        max_batching_rows,
-    ):
-        """Create a BigQuery remote function given the artifacts of a user defined
-        function and the http endpoint of a corresponding cloud function."""
-        if self._bq_connection_manager:
-            self._bq_connection_manager.create_bq_connection(
-                self._gcp_project_id,
-                self._bq_location,
-                self._bq_connection_id,
-                "run.invoker",
-            )
-
-        # Create BQ function
-        # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_remote_function_2
-        bq_function_args = []
-        bq_function_return_type = output_type
-
-        # We are expecting the input type annotations to be 1:1 with the input args
-        for name, type_ in zip(input_args, input_types):
-            bq_function_args.append(f"{name} {type_}")
-
-        remote_function_options = {
-            "endpoint": endpoint,
-            "max_batching_rows": max_batching_rows,
-        }
-
-        remote_function_options_str = ", ".join(
-            [
-                f'{key}="{val}"' if isinstance(val, str) else f"{key}={val}"
-                for key, val in remote_function_options.items()
-                if val is not None
-            ]
-        )
-
-        create_function_ddl = f"""
-            CREATE OR REPLACE FUNCTION `{self._gcp_project_id}.{self._bq_dataset}`.{bq_function_name}({','.join(bq_function_args)})
-            RETURNS {bq_function_return_type}
-            REMOTE WITH CONNECTION `{self._gcp_project_id}.{self._bq_location}.{self._bq_connection_id}`
-            OPTIONS ({remote_function_options_str})"""
-
-        logger.info(f"Creating BQ remote function: {create_function_ddl}")
-
-        # Make sure the dataset exists. I.e. if it doesn't exist, go ahead and
-        # create it
-        dataset = bigquery.Dataset(
-            bigquery.DatasetReference.from_string(
-                self._bq_dataset, default_project=self._gcp_project_id
-            )
-        )
-        dataset.location = self._bq_location
-        try:
-            # This check does not require bigquery.datasets.create IAM
-            # permission. So, if the data set already exists, then user can work
-            # without having that permission.
-            self._bq_client.get_dataset(dataset)
-        except google.api_core.exceptions.NotFound:
-            # This requires bigquery.datasets.create IAM permission
-            self._bq_client.create_dataset(dataset, exists_ok=True)
-
-        # TODO(swast): plumb through the original, user-facing api_name.
-        _, query_job = self._session._start_query(create_function_ddl)
-        logger.info(f"Created remote function {query_job.ddl_target_routine}")
-
-    def get_cloud_function_fully_qualified_parent(self):
-        "Get the fully qualilfied parent for a cloud function."
-        return self._cloud_functions_client.common_location_path(
-            self._gcp_project_id, self._cloud_function_region
-        )
-
-    def get_cloud_function_fully_qualified_name(self, name):
-        "Get the fully qualilfied name for a cloud function."
-        return self._cloud_functions_client.function_path(
-            self._gcp_project_id, self._cloud_function_region, name
-        )
-
-    def get_remote_function_fully_qualilfied_name(self, name):
-        "Get the fully qualilfied name for a BQ remote function."
-        return f"{self._gcp_project_id}.{self._bq_dataset}.{name}"
-
-    def get_cloud_function_endpoint(self, name):
-        """Get the http endpoint of a cloud function if it exists."""
-        fully_qualified_name = self.get_cloud_function_fully_qualified_name(name)
-        try:
-            response = self._cloud_functions_client.get_function(
-                name=fully_qualified_name
-            )
-            return response.service_config.uri
-        except google.api_core.exceptions.NotFound:
-            pass
-        return None
-
-    def generate_cloud_function_code(
-        self,
-        def_,
-        directory,
-        *,
-        input_types: Tuple[str],
-        output_type: str,
-        package_requirements=None,
-        is_row_processor=False,
-    ):
-        """Generate the cloud function code for a given user defined function.
-
-        Args:
-            input_types (tuple[str]):
-                Types of the input arguments in BigQuery SQL data type names.
-            output_type (str):
-                Types of the output scalar as a BigQuery SQL data type name.
-        """
-
-        # requirements.txt
-        if package_requirements:
-            requirements_txt = os.path.join(directory, "requirements.txt")
-            with open(requirements_txt, "w") as f:
-                f.write("\n".join(package_requirements))
-
-        # main.py
-        entry_point = bigframes.functions.remote_function_template.generate_cloud_function_main_code(
-            def_,
-            directory,
-            input_types=input_types,
-            output_type=output_type,
-            is_row_processor=is_row_processor,
-        )
-        return entry_point
-
-    def create_cloud_function(
-        self,
-        def_,
-        cf_name,
-        *,
-        input_types: Tuple[str],
-        output_type: str,
-        package_requirements=None,
-        timeout_seconds=600,
-        max_instance_count=None,
-        is_row_processor=False,
-        vpc_connector=None,
-        memory_mib=1024,
-    ):
-        """Create a cloud function from the given user defined function.
-
-        Args:
-            input_types (tuple[str]):
-                Types of the input arguments in BigQuery SQL data type names.
-            output_type (str):
-                Types of the output scalar as a BigQuery SQL data type name.
-        """
-
-        # Build and deploy folder structure containing cloud function
-        with tempfile.TemporaryDirectory() as directory:
-            entry_point = self.generate_cloud_function_code(
-                def_,
-                directory,
-                package_requirements=package_requirements,
-                input_types=input_types,
-                output_type=output_type,
-                is_row_processor=is_row_processor,
-            )
-            archive_path = shutil.make_archive(directory, "zip", directory)
-
-            # We are creating cloud function source code from the currently running
-            # python version. Use the same version to deploy. This is necessary
-            # because cloudpickle serialization done in one python version and
-            # deserialization done in another python version doesn't work.
-            # TODO(shobs): Figure out how to achieve version compatibility, specially
-            # when pickle (internally used by cloudpickle) guarantees that:
-            # https://docs.python.org/3/library/pickle.html#:~:text=The%20pickle%20serialization%20format%20is,unique%20breaking%20change%20language%20boundary.
-            python_version = "python{}{}".format(
-                sys.version_info.major, sys.version_info.minor
-            )
-
-            # Determine an upload URL for user code
-            upload_url_request = functions_v2.GenerateUploadUrlRequest(
-                kms_key_name=self._cloud_function_kms_key_name
-            )
-            upload_url_request.parent = self.get_cloud_function_fully_qualified_parent()
-            upload_url_response = self._cloud_functions_client.generate_upload_url(
-                request=upload_url_request
-            )
-
-            # Upload the code to GCS
-            with open(archive_path, "rb") as f:
-                response = requests.put(
-                    upload_url_response.upload_url,
-                    data=f,
-                    headers={"content-type": "application/zip"},
-                )
-                if response.status_code != 200:
-                    raise RuntimeError(
-                        "Failed to upload user code. code={}, reason={}, text={}".format(
-                            response.status_code, response.reason, response.text
-                        )
-                    )
-
-            # Deploy Cloud Function
-            create_function_request = functions_v2.CreateFunctionRequest()
-            create_function_request.parent = (
-                self.get_cloud_function_fully_qualified_parent()
-            )
-            create_function_request.function_id = cf_name
-            function = functions_v2.Function()
-            function.name = self.get_cloud_function_fully_qualified_name(cf_name)
-            function.build_config = functions_v2.BuildConfig()
-            function.build_config.runtime = python_version
-            function.build_config.entry_point = entry_point
-            function.build_config.source = functions_v2.Source()
-            function.build_config.source.storage_source = functions_v2.StorageSource()
-            function.build_config.source.storage_source.bucket = (
-                upload_url_response.storage_source.bucket
-            )
-            function.build_config.source.storage_source.object_ = (
-                upload_url_response.storage_source.object_
-            )
-            function.build_config.docker_repository = (
-                self._cloud_function_docker_repository
-            )
-            function.service_config = functions_v2.ServiceConfig()
-            if memory_mib is not None:
-                function.service_config.available_memory = f"{memory_mib}Mi"
-            if timeout_seconds is not None:
-                if timeout_seconds > 1200:
-                    raise ValueError(
-                        "BigQuery remote function can wait only up to 20 minutes"
-                        ", see for more details "
-                        "https://cloud.google.com/bigquery/quotas#remote_function_limits."
-                    )
-                function.service_config.timeout_seconds = timeout_seconds
-            if max_instance_count is not None:
-                function.service_config.max_instance_count = max_instance_count
-            if vpc_connector is not None:
-                function.service_config.vpc_connector = vpc_connector
-            function.service_config.service_account_email = (
-                self._cloud_function_service_account
-            )
-            function.kms_key_name = self._cloud_function_kms_key_name
-            create_function_request.function = function
-
-            # Create the cloud function and wait for it to be ready to use
-            try:
-                operation = self._cloud_functions_client.create_function(
-                    request=create_function_request
-                )
-                operation.result()
-
-                # Cleanup
-                os.remove(archive_path)
-            except google.api_core.exceptions.AlreadyExists:
-                # If a cloud function with the same name already exists, let's
-                # update it
-                update_function_request = functions_v2.UpdateFunctionRequest()
-                update_function_request.function = function
-                operation = self._cloud_functions_client.update_function(
-                    request=update_function_request
-                )
-                operation.result()
-
-        # Fetch the endpoint of the just created function
-        endpoint = self.get_cloud_function_endpoint(cf_name)
-        if not endpoint:
-            raise ValueError(
-                f"Couldn't fetch the http endpoint. {constants.FEEDBACK_LINK}"
-            )
-
-        logger.info(
-            f"Successfully created cloud function {cf_name} with uri ({endpoint})"
-        )
-        return endpoint
-
-    def provision_bq_remote_function(
-        self,
-        def_,
-        input_types,
-        output_type,
-        reuse,
-        name,
-        package_requirements,
-        max_batching_rows,
-        cloud_function_timeout,
-        cloud_function_max_instance_count,
-        is_row_processor,
-        cloud_function_vpc_connector,
-        cloud_function_memory_mib,
-    ):
-        """Provision a BigQuery remote function."""
-        # Augment user package requirements with any internal package
-        # requirements
-        package_requirements = _get_updated_package_requirements(
-            package_requirements, is_row_processor
-        )
-
-        # Compute a unique hash representing the user code
-        function_hash = _get_hash(def_, package_requirements)
-
-        # If reuse of any existing function with the same name (indicated by the
-        # same hash of its source code) is not intended, then attach a unique
-        # suffix to the intended function name to make it unique.
-        uniq_suffix = None
-        if not reuse:
-            # use 4 digits as a unique suffix which should suffice for
-            # uniqueness per session
-            uniq_suffix = "".join(
-                random.choices(string.ascii_lowercase + string.digits, k=4)
-            )
-
-        # Derive the name of the cloud function underlying the intended BQ
-        # remote function. Use the session id to identify the GCF for unnamed
-        # functions. The named remote functions are treated as a persistant
-        # artifacts, so let's keep them independent of session id, which also
-        # makes their naming more stable for the same udf code
-        session_id = None if name else self._session.session_id
-        cloud_function_name = get_cloud_function_name(
-            function_hash, session_id, uniq_suffix
-        )
-        cf_endpoint = self.get_cloud_function_endpoint(cloud_function_name)
-
-        # Create the cloud function if it does not exist
-        if not cf_endpoint:
-            cf_endpoint = self.create_cloud_function(
-                def_,
-                cloud_function_name,
-                input_types=input_types,
-                output_type=output_type,
-                package_requirements=package_requirements,
-                timeout_seconds=cloud_function_timeout,
-                max_instance_count=cloud_function_max_instance_count,
-                is_row_processor=is_row_processor,
-                vpc_connector=cloud_function_vpc_connector,
-                memory_mib=cloud_function_memory_mib,
-            )
-        else:
-            logger.info(f"Cloud function {cloud_function_name} already exists.")
-
-        # Derive the name of the remote function
-        remote_function_name = name
-        if not remote_function_name:
-            remote_function_name = get_remote_function_name(
-                function_hash, self._session.session_id, uniq_suffix
-            )
-        rf_endpoint, rf_conn = self.get_remote_function_specs(remote_function_name)
-
-        # Create the BQ remote function in following circumstances:
-        # 1. It does not exist
-        # 2. It exists but the existing remote function has different
-        #    configuration than intended
-        created_new = False
-        if not rf_endpoint or (
-            rf_endpoint != cf_endpoint or rf_conn != self._bq_connection_id
-        ):
-            input_args = inspect.getargs(def_.__code__).args
-            if len(input_args) != len(input_types):
-                raise ValueError(
-                    "Exactly one type should be provided for every input arg."
-                )
-            self.create_bq_remote_function(
-                input_args,
-                input_types,
-                output_type,
-                cf_endpoint,
-                remote_function_name,
-                max_batching_rows,
-            )
-
-            created_new = True
-        else:
-            logger.info(f"Remote function {remote_function_name} already exists.")
-
-        return remote_function_name, cloud_function_name, created_new
+from . import _remote_function_session as rf_session
+from . import _utils
 
-    def get_remote_function_specs(self, remote_function_name):
-        """Check whether a remote function already exists for the udf."""
-        http_endpoint = None
-        bq_connection = None
-        routines = self._bq_client.list_routines(
-            f"{self._gcp_project_id}.{self._bq_dataset}"
-        )
-        try:
-            for routine in routines:
-                routine = cast(bigquery.Routine, routine)
-                if routine.reference.routine_id == remote_function_name:
-                    rf_options = routine.remote_function_options
-                    if rf_options:
-                        http_endpoint = rf_options.endpoint
-                        bq_connection = rf_options.connection
-                        if bq_connection:
-                            bq_connection = os.path.basename(bq_connection)
-                    break
-        except google.api_core.exceptions.NotFound:
-            # The dataset might not exist, in which case the http_endpoint doesn't, either.
-            # Note: list_routines doesn't make an API request until we iterate on the response object.
-            pass
-        return (http_endpoint, bq_connection)
+logger = logging.getLogger(__name__)
 
 
 class UnsupportedTypeError(ValueError):
@@ -680,34 +45,16 @@ def __init__(self, type_, supported_types):
         self.supported_types = supported_types
 
 
-def ibis_signature_from_python_signature(
-    signature: inspect.Signature,
-    input_types: Sequence[type],
-    output_type: type,
-) -> IbisSignature:
-
-    return IbisSignature(
-        parameter_names=list(signature.parameters.keys()),
-        input_types=[
-            bigframes.core.compile.ibis_types.ibis_type_from_python_type(t)
-            for t in input_types
-        ],
-        output_type=bigframes.core.compile.ibis_types.ibis_type_from_python_type(
-            output_type
-        ),
-    )
-
-
 class ReturnTypeMissingError(ValueError):
     pass
 
 
 # TODO: Move this to compile folder
-def ibis_signature_from_routine(routine: bigquery.Routine) -> IbisSignature:
+def ibis_signature_from_routine(routine: bigquery.Routine) -> _utils.IbisSignature:
     if not routine.return_type:
         raise ReturnTypeMissingError
 
-    return IbisSignature(
+    return _utils.IbisSignature(
         parameter_names=[arg.name for arg in routine.arguments],
         input_types=[
             bigframes.core.compile.ibis_types.ibis_type_from_type_kind(
@@ -748,515 +95,12 @@ def get_routine_reference(
         return dataset_ref.routine(routine_ref_str)
 
 
-class _RemoteFunctionSession:
-    """Session to manage remote functions."""
-
-    def __init__(self):
-        # Session level mapping of remote function artifacts
-        self._temp_artifacts: Dict[str, str] = dict()
-
-        # Lock to synchronize the update of the session artifacts
-        self._artifacts_lock = threading.Lock()
-
-    def _update_temp_artifacts(self, bqrf_routine: str, gcf_path: str):
-        """Update remote function artifacts in the current session."""
-        with self._artifacts_lock:
-            self._temp_artifacts[bqrf_routine] = gcf_path
-
-    def clean_up(
-        self,
-        bqclient: bigquery.Client,
-        gcfclient: functions_v2.FunctionServiceClient,
-        session_id: str,
-    ):
-        """Delete remote function artifacts in the current session."""
-        with self._artifacts_lock:
-            for bqrf_routine, gcf_path in self._temp_artifacts.items():
-                # Let's accept the possibility that the remote function may have
-                # been deleted directly by the user
-                bqclient.delete_routine(bqrf_routine, not_found_ok=True)
-
-                # Let's accept the possibility that the cloud function may have
-                # been deleted directly by the user
-                try:
-                    gcfclient.delete_function(name=gcf_path)
-                except google.api_core.exceptions.NotFound:
-                    pass
-
-            self._temp_artifacts.clear()
-
-    # Inspired by @udf decorator implemented in ibis-bigquery package
-    # https://github.com/ibis-project/ibis-bigquery/blob/main/ibis_bigquery/udf/__init__.py
-    # which has moved as @js to the ibis package
-    # https://github.com/ibis-project/ibis/blob/master/ibis/backends/bigquery/udf/__init__.py
-    def remote_function(
-        self,
-        input_types: Union[None, type, Sequence[type]] = None,
-        output_type: Optional[type] = None,
-        session: Optional[Session] = None,
-        bigquery_client: Optional[bigquery.Client] = None,
-        bigquery_connection_client: Optional[
-            bigquery_connection_v1.ConnectionServiceClient
-        ] = None,
-        cloud_functions_client: Optional[functions_v2.FunctionServiceClient] = None,
-        resource_manager_client: Optional[resourcemanager_v3.ProjectsClient] = None,
-        dataset: Optional[str] = None,
-        bigquery_connection: Optional[str] = None,
-        reuse: bool = True,
-        name: Optional[str] = None,
-        packages: Optional[Sequence[str]] = None,
-        cloud_function_service_account: Optional[str] = None,
-        cloud_function_kms_key_name: Optional[str] = None,
-        cloud_function_docker_repository: Optional[str] = None,
-        max_batching_rows: Optional[int] = 1000,
-        cloud_function_timeout: Optional[int] = 600,
-        cloud_function_max_instances: Optional[int] = None,
-        cloud_function_vpc_connector: Optional[str] = None,
-        cloud_function_memory_mib: Optional[int] = 1024,
-    ):
-        """Decorator to turn a user defined function into a BigQuery remote function.
-
-        .. deprecated:: 0.0.1
-        This is an internal method. Please use :func:`bigframes.pandas.remote_function` instead.
-
-        .. note::
-            Please make sure following is setup before using this API:
-
-            1. Have the below APIs enabled for your project:
-
-                * BigQuery Connection API
-                * Cloud Functions API
-                * Cloud Run API
-                * Cloud Build API
-                * Artifact Registry API
-                * Cloud Resource Manager API
-
-            This can be done from the cloud console (change `PROJECT_ID` to yours):
-            https://console.cloud.google.com/apis/enableflow?apiid=bigqueryconnection.googleapis.com,cloudfunctions.googleapis.com,run.googleapis.com,cloudbuild.googleapis.com,artifactregistry.googleapis.com,cloudresourcemanager.googleapis.com&project=PROJECT_ID
-
-            Or from the gcloud CLI:
-
-            `$ gcloud services enable bigqueryconnection.googleapis.com cloudfunctions.googleapis.com run.googleapis.com cloudbuild.googleapis.com artifactregistry.googleapis.com cloudresourcemanager.googleapis.com`
-
-            2. Have following IAM roles enabled for you:
-
-                * BigQuery Data Editor (roles/bigquery.dataEditor)
-                * BigQuery Connection Admin (roles/bigquery.connectionAdmin)
-                * Cloud Functions Developer (roles/cloudfunctions.developer)
-                * Service Account User (roles/iam.serviceAccountUser) on the service account `PROJECT_NUMBER-compute@developer.gserviceaccount.com`
-                * Storage Object Viewer (roles/storage.objectViewer)
-                * Project IAM Admin (roles/resourcemanager.projectIamAdmin) (Only required if the bigquery connection being used is not pre-created and is created dynamically with user credentials.)
-
-            3. Either the user has setIamPolicy privilege on the project, or a BigQuery connection is pre-created with necessary IAM role set:
-
-                1. To create a connection, follow https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_connection
-                2. To set up IAM, follow https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#grant_permission_on_function
-
-                Alternatively, the IAM could also be setup via the gcloud CLI:
-
-                `$ gcloud projects add-iam-policy-binding PROJECT_ID --member="serviceAccount:CONNECTION_SERVICE_ACCOUNT_ID" --role="roles/run.invoker"`.
-
-        Args:
-            input_types (None, type, or sequence(type)):
-                For scalar user defined function it should be the input type or
-                sequence of input types. For row processing user defined function,
-                type `Series` should be specified.
-            output_type (Optional[type]):
-                Data type of the output in the user defined function.
-            session (bigframes.Session, Optional):
-                BigQuery DataFrames session to use for getting default project,
-                dataset and BigQuery connection.
-            bigquery_client (google.cloud.bigquery.Client, Optional):
-                Client to use for BigQuery operations. If this param is not provided
-                then bigquery client from the session would be used.
-            bigquery_connection_client (google.cloud.bigquery_connection_v1.ConnectionServiceClient, Optional):
-                Client to use for BigQuery connection operations. If this param is
-                not provided then bigquery connection client from the session would
-                be used.
-            cloud_functions_client (google.cloud.functions_v2.FunctionServiceClient, Optional):
-                Client to use for cloud functions operations. If this param is not
-                provided then the functions client from the session would be used.
-            resource_manager_client (google.cloud.resourcemanager_v3.ProjectsClient, Optional):
-                Client to use for cloud resource management operations, e.g. for
-                getting and setting IAM roles on cloud resources. If this param is
-                not provided then resource manager client from the session would be
-                used.
-            dataset (str, Optional.):
-                Dataset in which to create a BigQuery remote function. It should be in
-                `<project_id>.<dataset_name>` or `<dataset_name>` format. If this
-                parameter is not provided then session dataset id is used.
-            bigquery_connection (str, Optional):
-                Name of the BigQuery connection in the form of `CONNECTION_ID` or
-                `LOCATION.CONNECTION_ID` or `PROJECT_ID.LOCATION.CONNECTION_ID`.
-                If this param is not provided then the bigquery connection from the session
-                would be used. If it is pre created in the same location as the
-                `bigquery_client.location` then it would be used, otherwise it is created
-                dynamically using the `bigquery_connection_client` assuming the user has necessary
-                priviliges. The PROJECT_ID should be the same as the BigQuery connection project.
-            reuse (bool, Optional):
-                Reuse the remote function if already exists.
-                `True` by default, which will result in reusing an existing remote
-                function and corresponding cloud function that was previously
-                created (if any) for the same udf.
-                Please note that for an unnamed (i.e. created without an explicit
-                `name` argument) remote function, the BigQuery DataFrames
-                session id is attached in the cloud artifacts names. So for the
-                effective reuse across the sessions it is recommended to create
-                the remote function with an explicit `name`.
-                Setting it to `False` would force creating a unique remote function.
-                If the required remote function does not exist then it would be
-                created irrespective of this param.
-            name (str, Optional):
-                Explicit name of the persisted BigQuery remote function. Use it with
-                caution, because two users working in the same project and dataset
-                could overwrite each other's remote functions if they use the same
-                persistent name. When an explicit name is provided, any session
-                specific clean up (``bigframes.session.Session.close``/
-                ``bigframes.pandas.close_session``/
-                ``bigframes.pandas.reset_session``/
-                ``bigframes.pandas.clean_up_by_session_id``) does not clean up
-                the function, and leaves it for the user to manage the function
-                and the associated cloud function directly.
-            packages (str[], Optional):
-                Explicit name of the external package dependencies. Each dependency
-                is added to the `requirements.txt` as is, and can be of the form
-                supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/.
-            cloud_function_service_account (str, Optional):
-                Service account to use for the cloud functions. If not provided then
-                the default service account would be used. See
-                https://cloud.google.com/functions/docs/securing/function-identity
-                for more details. Please make sure the service account has the
-                necessary IAM permissions configured as described in
-                https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration.
-            cloud_function_kms_key_name (str, Optional):
-                Customer managed encryption key to protect cloud functions and
-                related data at rest. This is of the format
-                projects/PROJECT_ID/locations/LOCATION/keyRings/KEYRING/cryptoKeys/KEY.
-                Read https://cloud.google.com/functions/docs/securing/cmek for
-                more details including granting necessary service accounts
-                access to the key.
-            cloud_function_docker_repository (str, Optional):
-                Docker repository created with the same encryption key as
-                `cloud_function_kms_key_name` to store encrypted artifacts
-                created to support the cloud function. This is of the format
-                projects/PROJECT_ID/locations/LOCATION/repositories/REPOSITORY_NAME.
-                For more details see
-                https://cloud.google.com/functions/docs/securing/cmek#before_you_begin.
-            max_batching_rows (int, Optional):
-                The maximum number of rows to be batched for processing in the
-                BQ remote function. Default value is 1000. A lower number can be
-                passed to avoid timeouts in case the user code is too complex to
-                process large number of rows fast enough. A higher number can be
-                used to increase throughput in case the user code is fast enough.
-                `None` can be passed to let BQ remote functions service apply
-                default batching. See for more details
-                https://cloud.google.com/bigquery/docs/remote-functions#limiting_number_of_rows_in_a_batch_request.
-            cloud_function_timeout (int, Optional):
-                The maximum amount of time (in seconds) BigQuery should wait for
-                the cloud function to return a response. See for more details
-                https://cloud.google.com/functions/docs/configuring/timeout.
-                Please note that even though the cloud function (2nd gen) itself
-                allows seeting up to 60 minutes of timeout, BigQuery remote
-                function can wait only up to 20 minutes, see for more details
-                https://cloud.google.com/bigquery/quotas#remote_function_limits.
-                By default BigQuery DataFrames uses a 10 minute timeout. `None`
-                can be passed to let the cloud functions default timeout take effect.
-            cloud_function_max_instances (int, Optional):
-                The maximumm instance count for the cloud function created. This
-                can be used to control how many cloud function instances can be
-                active at max at any given point of time. Lower setting can help
-                control the spike in the billing. Higher setting can help
-                support processing larger scale data. When not specified, cloud
-                function's default setting applies. For more details see
-                https://cloud.google.com/functions/docs/configuring/max-instances.
-            cloud_function_vpc_connector (str, Optional):
-                The VPC connector you would like to configure for your cloud
-                function. This is useful if your code needs access to data or
-                service(s) that are on a VPC network. See for more details
-                https://cloud.google.com/functions/docs/networking/connecting-vpc.
-            cloud_function_memory_mib (int, Optional):
-                The amounts of memory (in mebibytes) to allocate for the cloud
-                function (2nd gen) created. This also dictates a corresponding
-                amount of allocated CPU for the function. By default a memory of
-                1024 MiB is set for the cloud functions created to support
-                BigQuery DataFrames remote function. If you want to let the
-                default memory of cloud functions be allocated, pass `None`. See
-                for more details
-                https://cloud.google.com/functions/docs/configuring/memory.
-        """
-        # Some defaults may be used from the session if not provided otherwise
-        import bigframes.exceptions as bf_exceptions
-        import bigframes.pandas as bpd
-        import bigframes.series as bf_series
-        import bigframes.session
-
-        session = cast(bigframes.session.Session, session or bpd.get_global_session())
-
-        # A BigQuery client is required to perform BQ operations
-        if not bigquery_client:
-            bigquery_client = session.bqclient
-        if not bigquery_client:
-            raise ValueError(
-                "A bigquery client must be provided, either directly or via session. "
-                f"{constants.FEEDBACK_LINK}"
-            )
-
-        # A BigQuery connection client is required to perform BQ connection operations
-        if not bigquery_connection_client:
-            bigquery_connection_client = session.bqconnectionclient
-        if not bigquery_connection_client:
-            raise ValueError(
-                "A bigquery connection client must be provided, either directly or via session. "
-                f"{constants.FEEDBACK_LINK}"
-            )
-
-        # A cloud functions client is required to perform cloud functions operations
-        if not cloud_functions_client:
-            cloud_functions_client = session.cloudfunctionsclient
-        if not cloud_functions_client:
-            raise ValueError(
-                "A cloud functions client must be provided, either directly or via session. "
-                f"{constants.FEEDBACK_LINK}"
-            )
-
-        # A resource manager client is required to get/set IAM operations
-        if not resource_manager_client:
-            resource_manager_client = session.resourcemanagerclient
-        if not resource_manager_client:
-            raise ValueError(
-                "A resource manager client must be provided, either directly or via session. "
-                f"{constants.FEEDBACK_LINK}"
-            )
-
-        # BQ remote function must be persisted, for which we need a dataset
-        # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#:~:text=You%20cannot%20create%20temporary%20remote%20functions.
-        if dataset:
-            dataset_ref = bigquery.DatasetReference.from_string(
-                dataset, default_project=bigquery_client.project
-            )
-        else:
-            dataset_ref = session._anonymous_dataset
-
-        bq_location, cloud_function_region = get_remote_function_locations(
-            bigquery_client.location
-        )
-
-        # A connection is required for BQ remote function
-        # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_remote_function
-        if not bigquery_connection:
-            bigquery_connection = session._bq_connection  # type: ignore
-
-        bigquery_connection = clients.resolve_full_bq_connection_name(
-            bigquery_connection,
-            default_project=dataset_ref.project,
-            default_location=bq_location,
-        )
-        # Guaranteed to be the form of <project>.<location>.<connection_id>
-        (
-            gcp_project_id,
-            bq_connection_location,
-            bq_connection_id,
-        ) = bigquery_connection.split(".")
-        if gcp_project_id.casefold() != dataset_ref.project.casefold():
-            raise ValueError(
-                "The project_id does not match BigQuery connection gcp_project_id: "
-                f"{dataset_ref.project}."
-            )
-        if bq_connection_location.casefold() != bq_location.casefold():
-            raise ValueError(
-                "The location does not match BigQuery connection location: "
-                f"{bq_location}."
-            )
-
-        # If any CMEK is intended then check that a docker repository is also specified
-        if (
-            cloud_function_kms_key_name is not None
-            and cloud_function_docker_repository is None
-        ):
-            raise ValueError(
-                "cloud_function_docker_repository must be specified with cloud_function_kms_key_name."
-                " For more details see https://cloud.google.com/functions/docs/securing/cmek#before_you_begin"
-            )
-
-        bq_connection_manager = session.bqconnectionmanager
-
-        def wrapper(func):
-            nonlocal input_types, output_type
-
-            if not callable(func):
-                raise TypeError("f must be callable, got {}".format(func))
-
-            if sys.version_info >= (3, 10):
-                # Add `eval_str = True` so that deferred annotations are turned into their
-                # corresponding type objects. Need Python 3.10 for eval_str parameter.
-                # https://docs.python.org/3/library/inspect.html#inspect.signature
-                signature_kwargs: Mapping[str, Any] = {"eval_str": True}
-            else:
-                signature_kwargs = {}
-
-            signature = inspect.signature(
-                func,
-                **signature_kwargs,
-            )
-
-            # Try to get input types via type annotations.
-            if input_types is None:
-                input_types = []
-                for parameter in signature.parameters.values():
-                    if (param_type := parameter.annotation) is inspect.Signature.empty:
-                        raise ValueError(
-                            "'input_types' was not set and parameter "
-                            f"'{parameter.name}' is missing a type annotation. "
-                            "Types are required to use @remote_function."
-                        )
-                    input_types.append(param_type)
-            elif not isinstance(input_types, collections.abc.Sequence):
-                input_types = [input_types]
-
-            if output_type is None:
-                if (
-                    output_type := signature.return_annotation
-                ) is inspect.Signature.empty:
-                    raise ValueError(
-                        "'output_type' was not set and function is missing a "
-                        "return type annotation. Types are required to use "
-                        "@remote_function."
-                    )
-
-            # The function will actually be receiving a pandas Series, but allow both
-            # BigQuery DataFrames and pandas object types for compatibility.
-            is_row_processor = False
-            if len(input_types) == 1 and (
-                (input_type := input_types[0]) == bf_series.Series
-                or input_type == pandas.Series
-            ):
-                warnings.warn(
-                    "input_types=Series is in preview.",
-                    stacklevel=1,
-                    category=bf_exceptions.PreviewWarning,
-                )
-
-                # we will model the row as a json serialized string containing the data
-                # and the metadata representing the row
-                input_types = [str]
-                is_row_processor = True
-            elif isinstance(input_types, type):
-                input_types = [input_types]
-
-            # TODO(b/340898611): fix type error
-            ibis_signature = ibis_signature_from_python_signature(
-                signature, input_types, output_type  # type: ignore
-            )
-
-            remote_function_client = RemoteFunctionClient(
-                dataset_ref.project,
-                cloud_function_region,
-                cloud_functions_client,
-                bq_location,
-                dataset_ref.dataset_id,
-                bigquery_client,
-                bq_connection_id,
-                bq_connection_manager,
-                cloud_function_service_account,
-                cloud_function_kms_key_name,
-                cloud_function_docker_repository,
-                session=session,  # type: ignore
-            )
-
-            # In the unlikely case where the user is trying to re-deploy the same
-            # function, cleanup the attributes we add below, first. This prevents
-            # the pickle from having dependencies that might not otherwise be
-            # present such as ibis or pandas.
-            def try_delattr(attr):
-                try:
-                    delattr(func, attr)
-                except AttributeError:
-                    pass
-
-            try_delattr("bigframes_cloud_function")
-            try_delattr("bigframes_remote_function")
-            try_delattr("input_dtypes")
-            try_delattr("output_dtype")
-            try_delattr("is_row_processor")
-            try_delattr("ibis_node")
-
-            (
-                rf_name,
-                cf_name,
-                created_new,
-            ) = remote_function_client.provision_bq_remote_function(
-                func,
-                input_types=tuple(
-                    third_party_ibis_bqtypes.BigQueryType.from_ibis(type_)
-                    for type_ in ibis_signature.input_types
-                ),
-                output_type=third_party_ibis_bqtypes.BigQueryType.from_ibis(
-                    ibis_signature.output_type
-                ),
-                reuse=reuse,
-                name=name,
-                package_requirements=packages,
-                max_batching_rows=max_batching_rows,
-                cloud_function_timeout=cloud_function_timeout,
-                cloud_function_max_instance_count=cloud_function_max_instances,
-                is_row_processor=is_row_processor,
-                cloud_function_vpc_connector=cloud_function_vpc_connector,
-                cloud_function_memory_mib=cloud_function_memory_mib,
-            )
-
-            # TODO: Move ibis logic to compiler step
-            node = ibis.udf.scalar.builtin(
-                func,
-                name=rf_name,
-                schema=f"{dataset_ref.project}.{dataset_ref.dataset_id}",
-                signature=(ibis_signature.input_types, ibis_signature.output_type),
-            )
-            func.bigframes_cloud_function = (
-                remote_function_client.get_cloud_function_fully_qualified_name(cf_name)
-            )
-            func.bigframes_remote_function = (
-                remote_function_client.get_remote_function_fully_qualilfied_name(
-                    rf_name
-                )
-            )
-            func.input_dtypes = tuple(
-                [
-                    bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(
-                        input_type
-                    )
-                    for input_type in ibis_signature.input_types
-                ]
-            )
-            func.output_dtype = (
-                bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(
-                    ibis_signature.output_type
-                )
-            )
-            func.is_row_processor = is_row_processor
-            func.ibis_node = node
-
-            # If a new remote function was created, update the cloud artifacts
-            # created in the session. This would be used to clean up any
-            # resources in the session. Note that we need to do this only for
-            # the case where an explicit name was not provided by the user and
-            # we used an internal name. For the cases where the user provided an
-            # explicit name, we are assuming that the user wants to persist them
-            # with that name and would directly manage their lifecycle.
-            if created_new and (not name):
-                self._update_temp_artifacts(
-                    func.bigframes_remote_function, func.bigframes_cloud_function
-                )
-            return func
-
-        return wrapper
-
-
 def remote_function(*args, **kwargs):
-    remote_function_session = _RemoteFunctionSession()
+    remote_function_session = rf_session.RemoteFunctionSession()
     return remote_function_session.remote_function(*args, **kwargs)
 
 
-remote_function.__doc__ = _RemoteFunctionSession.remote_function.__doc__
+remote_function.__doc__ = rf_session.RemoteFunctionSession.remote_function.__doc__
 
 
 def read_gbq_function(
diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 21f75eb82c..08d808572d 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -63,7 +63,7 @@
 import bigframes.core.tools
 import bigframes.dataframe
 import bigframes.enums
-import bigframes.functions.remote_function as bigframes_rf
+import bigframes.functions._utils as functions_utils
 import bigframes.operations as ops
 import bigframes.series
 import bigframes.session
@@ -817,7 +817,7 @@ def clean_up_by_session_id(
         session.bqclient, dataset, session_id
     )
 
-    bigframes_rf._clean_up_by_session_id(
+    functions_utils._clean_up_by_session_id(
         session.bqclient, session.cloudfunctionsclient, dataset, session_id
     )
 
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 2da788292b..8cef869a32 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -95,6 +95,7 @@
 import bigframes.dtypes
 import bigframes.exceptions
 import bigframes.formatting_helpers as formatting_helpers
+import bigframes.functions._remote_function_session as bigframes_rf_session
 import bigframes.functions.remote_function as bigframes_rf
 import bigframes.session._io.bigquery as bf_io_bigquery
 import bigframes.session._io.bigquery.read_gbq_table as bf_read_gbq_table
@@ -316,7 +317,7 @@ def __init__(
         )
         self._allow_ambiguity = not self._strictly_ordered
 
-        self._remote_function_session = bigframes_rf._RemoteFunctionSession()
+        self._remote_function_session = bigframes_rf_session.RemoteFunctionSession()
 
     @property
     def bqclient(self):
diff --git a/tests/system/large/test_remote_function.py b/tests/system/large/test_remote_function.py
index 095f7059cd..d6eefc1e31 100644
--- a/tests/system/large/test_remote_function.py
+++ b/tests/system/large/test_remote_function.py
@@ -31,7 +31,7 @@
 import bigframes.dataframe
 import bigframes.dtypes
 import bigframes.exceptions
-import bigframes.functions.remote_function as bigframes_rf
+import bigframes.functions._utils as functions_utils
 import bigframes.pandas as bpd
 import bigframes.series
 from tests.system.utils import (
@@ -595,9 +595,11 @@ def add_one(x):
         add_one_uniq, add_one_uniq_dir = make_uniq_udf(add_one)
 
         # Expected cloud function name for the unique udf
-        package_requirements = bigframes_rf._get_updated_package_requirements()
-        add_one_uniq_hash = bigframes_rf._get_hash(add_one_uniq, package_requirements)
-        add_one_uniq_cf_name = bigframes_rf.get_cloud_function_name(
+        package_requirements = functions_utils._get_updated_package_requirements()
+        add_one_uniq_hash = functions_utils._get_hash(
+            add_one_uniq, package_requirements
+        )
+        add_one_uniq_cf_name = functions_utils.get_cloud_function_name(
             add_one_uniq_hash, session.session_id
         )
 
diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py
index 8ecf9eb368..db573efa40 100644
--- a/tests/system/small/test_remote_function.py
+++ b/tests/system/small/test_remote_function.py
@@ -23,6 +23,7 @@
 import bigframes
 import bigframes.dtypes
 import bigframes.exceptions
+from bigframes.functions import _utils as rf_utils
 from bigframes.functions import remote_function as rf
 from tests.system.utils import assert_pandas_df_equal
 
@@ -89,12 +90,12 @@ def get_rf_name(func, package_requirements=None, is_row_processor=False):
     """Get a remote function name for testing given a udf."""
     # Augment user package requirements with any internal package
     # requirements
-    package_requirements = rf._get_updated_package_requirements(
+    package_requirements = rf_utils._get_updated_package_requirements(
         package_requirements, is_row_processor
     )
 
     # Compute a unique hash representing the user code
-    function_hash = rf._get_hash(func, package_requirements)
+    function_hash = rf_utils._get_hash(func, package_requirements)
 
     return f"bigframes_{function_hash}"
 
@@ -714,7 +715,7 @@ def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id):
 
         src = {"x": [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]}
 
-        routine_ref_str = rf.routine_ref_to_string_for_query(routine.reference)
+        routine_ref_str = rf_utils.routine_ref_to_string_for_query(routine.reference)
         direct_sql = " UNION ALL ".join(
             [f"SELECT {x} AS x, {routine_ref_str}({x}) AS y" for x in src["x"]]
         )
diff --git a/tests/system/utils.py b/tests/system/utils.py
index 9fbf191a3a..e9054d04c9 100644
--- a/tests/system/utils.py
+++ b/tests/system/utils.py
@@ -26,7 +26,7 @@
 import pyarrow as pa  # type: ignore
 import pytest
 
-from bigframes.functions import remote_function
+import bigframes.functions._utils as functions_utils
 import bigframes.pandas
 
 ML_REGRESSION_METRICS = [
@@ -340,7 +340,7 @@ def get_cloud_functions(
         not name or not name_prefix
     ), "Either 'name' or 'name_prefix' can be passed but not both."
 
-    _, location = remote_function.get_remote_function_locations(location)
+    _, location = functions_utils.get_remote_function_locations(location)
     parent = f"projects/{project}/locations/{location}"
     request = functions_v2.ListFunctionsRequest(parent=parent)
     page_result = functions_client.list_functions(request=request)

From 6dff860758bd5de08f0692703f27906e1efbe7e6 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <124939984+chelsea-lin@users.noreply.github.com>
Date: Mon, 12 Aug 2024 21:15:52 -0700
Subject: [PATCH 13/15] chore: update owlbot script to prevent silent failures
 at s.replace (#889)

* chore: update owlbot script to prevent silent failures at s.replace

* fix errors

* removing s.replace for CONTRIBUTING.rst because it was excluded from templated files
---
 owlbot.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/owlbot.py b/owlbot.py
index f9d9410d6d..b29384d462 100644
--- a/owlbot.py
+++ b/owlbot.py
@@ -61,7 +61,7 @@
 # ----------------------------------------------------------------------------
 
 # Encourage sharring all relevant versions in bug reports.
-s.replace(
+assert 1 == s.replace(
     [".github/ISSUE_TEMPLATE/bug_report.md"],
     re.escape("#### Steps to reproduce\n"),
     textwrap.dedent(
@@ -90,7 +90,7 @@
 )
 
 # Make sure build includes all necessary files.
-s.replace(
+assert 1 == s.replace(
     ["MANIFEST.in"],
     re.escape("recursive-include google"),
     "recursive-include third_party/bigframes_vendored *\nrecursive-include bigframes",
@@ -98,7 +98,7 @@
 
 # Even though BigQuery DataFrames isn't technically a client library, we are
 # opting into Cloud RAD for docs hosting.
-s.replace(
+assert 1 == s.replace(
     [".kokoro/docs/common.cfg"],
     re.escape('value: "docs-staging-v2-staging"'),
     'value: "docs-staging-v2"',
@@ -106,7 +106,7 @@
 
 # Use a custom table of contents since the default one isn't organized well
 # enough for the number of classes we have.
-s.replace(
+assert 1 == s.replace(
     [".kokoro/publish-docs.sh"],
     (
         re.escape("# upload docs")
@@ -124,19 +124,12 @@
 )
 
 # Fixup the documentation.
-s.replace(
+assert 1 == s.replace(
     ["docs/conf.py"],
     re.escape("Google Cloud Client Libraries for bigframes"),
     "BigQuery DataFrames provides DataFrame APIs on the BigQuery engine",
 )
 
-# Update the contributing guide to reflect some differences in this repo.
-s.replace(
-    ["CONTRIBUTING.rst"],
-    re.escape("blacken"),
-    "format",
-)
-
 # ----------------------------------------------------------------------------
 # Samples templates
 # ----------------------------------------------------------------------------

From e027b7e9d29f628d058611106014a1790459958c Mon Sep 17 00:00:00 2001
From: Chelsea Lin <124939984+chelsea-lin@users.noreply.github.com>
Date: Mon, 12 Aug 2024 22:24:21 -0700
Subject: [PATCH 14/15] feat: Series.str.__getitem__ (#897)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- [X] Make sure to open an issue as internal issue: 358459166
- [X] Ensure the tests and linter pass
- [X] Code coverage does not decrease (if any source code was changed)
- [X] Appropriate docs were updated (if necessary)

Fixes internal issue: 358459166 🦕
---
 bigframes/core/compile/scalar_op_compiler.py  |  20 ++-
 bigframes/operations/__init__.py              |  34 ++++
 bigframes/operations/strings.py               |  27 +++
 tests/system/small/operations/test_strings.py | 157 +++++++++++++++---
 .../pandas/core/strings/accessor.py           |  31 ++++
 5 files changed, 241 insertions(+), 28 deletions(-)

diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 32749b32a6..e70c49e337 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -902,6 +902,24 @@ def array_to_string_op_impl(x: ibis_types.Value, op: ops.ArrayToStringOp):
     return typing.cast(ibis_types.ArrayValue, x).join(op.delimiter)
 
 
+@scalar_op_compiler.register_unary_op(ops.ArrayIndexOp, pass_op=True)
+def array_index_op_impl(x: ibis_types.Value, op: ops.ArrayIndexOp):
+    res = typing.cast(ibis_types.ArrayValue, x)[op.index]
+    if x.type().is_string():
+        return _null_or_value(res, res != ibis.literal(""))
+    else:
+        return res
+
+
+@scalar_op_compiler.register_unary_op(ops.ArraySliceOp, pass_op=True)
+def array_slice_op_impl(x: ibis_types.Value, op: ops.ArraySliceOp):
+    res = typing.cast(ibis_types.ArrayValue, x)[op.start : op.stop : op.step]
+    if x.type().is_string():
+        return _null_or_value(res, res != ibis.literal(""))
+    else:
+        return res
+
+
 # JSON Ops
 @scalar_op_compiler.register_binary_op(ops.JSONSet, pass_op=True)
 def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
@@ -984,7 +1002,7 @@ def ne_op(
 
 
 def _null_or_value(value: ibis_types.Value, where_value: ibis_types.BooleanValue):
-    return ibis.where(
+    return ibis.ifelse(
         where_value,
         value,
         ibis.null(),
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index 4d4e40643d..fb333d7a53 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -602,6 +602,40 @@ def output_type(self, *input_types):
         return dtypes.STRING_DTYPE
 
 
+@dataclasses.dataclass(frozen=True)
+class ArrayIndexOp(UnaryOp):
+    name: typing.ClassVar[str] = "array_index"
+    index: int
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if dtypes.is_string_like(input_type):
+            return dtypes.STRING_DTYPE
+        elif dtypes.is_array_like(input_type):
+            return dtypes.arrow_dtype_to_bigframes_dtype(
+                input_type.pyarrow_dtype.value_type
+            )
+        else:
+            raise TypeError("Input type must be an array or string-like type.")
+
+
+@dataclasses.dataclass(frozen=True)
+class ArraySliceOp(UnaryOp):
+    name: typing.ClassVar[str] = "array_slice"
+    start: int
+    stop: typing.Optional[int] = None
+    step: typing.Optional[int] = None
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if dtypes.is_string_like(input_type):
+            return dtypes.STRING_DTYPE
+        elif dtypes.is_array_like(input_type):
+            return input_type
+        else:
+            raise TypeError("Input type must be an array or string-like type.")
+
+
 ## JSON Ops
 @dataclasses.dataclass(frozen=True)
 class JSONExtract(UnaryOp):
diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py
index 22c325d7e0..d3e9c7edc6 100644
--- a/bigframes/operations/strings.py
+++ b/bigframes/operations/strings.py
@@ -38,6 +38,33 @@
 class StringMethods(bigframes.operations.base.SeriesMethods, vendorstr.StringMethods):
     __doc__ = vendorstr.StringMethods.__doc__
 
+    def __getitem__(self, key: Union[int, slice]) -> series.Series:
+        if isinstance(key, int):
+            if key < 0:
+                raise NotImplementedError("Negative indexing is not supported.")
+            return self._apply_unary_op(ops.ArrayIndexOp(index=key))
+        elif isinstance(key, slice):
+            if key.step is not None and key.step != 1:
+                raise NotImplementedError(
+                    f"Only a step of 1 is allowed, got {key.step}"
+                )
+            if (key.start is not None and key.start < 0) or (
+                key.stop is not None and key.stop < 0
+            ):
+                raise NotImplementedError(
+                    "Slicing with negative numbers is not allowed."
+                )
+
+            return self._apply_unary_op(
+                ops.ArraySliceOp(
+                    start=key.start if key.start is not None else 0,
+                    stop=key.stop,
+                    step=key.step,
+                )
+            )
+        else:
+            raise ValueError(f"key must be an int or slice, got {type(key).__name__}")
+
     def find(
         self,
         sub: str,
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index b8a8ad2d1e..3191adf920 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -14,10 +14,13 @@
 
 import re
 
+import packaging.version
 import pandas as pd
+import pyarrow as pa
 import pytest
 
-import bigframes.series
+import bigframes.dtypes as dtypes
+import bigframes.pandas as bpd
 
 from ...utils import assert_series_equal
 
@@ -25,7 +28,7 @@
 def test_find(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.find("W").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.find("W")
 
@@ -50,7 +53,7 @@ def test_find(scalars_dfs):
 def test_str_contains(scalars_dfs, pat, case, flags, regex):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
 
     bf_result = bf_series.str.contains(
         pat, case=case, flags=flags, regex=regex
@@ -72,7 +75,7 @@ def test_str_contains(scalars_dfs, pat, case, flags, regex):
 def test_str_extract(scalars_dfs, pat):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
 
     bf_result = bf_series.str.extract(pat).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.extract(pat)
@@ -101,7 +104,7 @@ def test_str_extract(scalars_dfs, pat):
 def test_str_replace(scalars_dfs, pat, repl, case, flags, regex):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
 
     bf_result = bf_series.str.replace(
         pat, repl=repl, case=case, flags=flags, regex=regex
@@ -132,7 +135,7 @@ def test_str_replace(scalars_dfs, pat, repl, case, flags, regex):
 def test_str_startswith(scalars_dfs, pat):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     pd_series = scalars_pandas_df[col_name].astype("object")
 
     bf_result = bf_series.str.startswith(pat).to_pandas()
@@ -157,7 +160,7 @@ def test_str_startswith(scalars_dfs, pat):
 def test_str_endswith(scalars_dfs, pat):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     pd_series = scalars_pandas_df[col_name].astype("object")
 
     bf_result = bf_series.str.endswith(pat).to_pandas()
@@ -169,7 +172,7 @@ def test_str_endswith(scalars_dfs, pat):
 def test_len(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.len().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.len()
 
@@ -188,7 +191,7 @@ def test_len_with_array_column(nested_df, nested_pandas_df):
     See: https://stackoverflow.com/a/41340543/101923
     """
     col_name = "event_sequence"
-    bf_series: bigframes.series.Series = nested_df[col_name]
+    bf_series: bpd.Series = nested_df[col_name]
     bf_result = bf_series.str.len().to_pandas()
     pd_result = nested_pandas_df[col_name].str.len()
 
@@ -204,7 +207,7 @@ def test_len_with_array_column(nested_df, nested_pandas_df):
 def test_lower(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.lower().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.lower()
 
@@ -217,7 +220,7 @@ def test_lower(scalars_dfs):
 def test_reverse(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.reverse().to_pandas()
     pd_result = scalars_pandas_df[col_name].copy()
     for i in pd_result.index:
@@ -239,7 +242,7 @@ def test_reverse(scalars_dfs):
 def test_slice(scalars_dfs, start, stop):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.slice(start, stop).to_pandas()
     pd_series = scalars_pandas_df[col_name]
     pd_result = pd_series.str.slice(start, stop)
@@ -253,7 +256,7 @@ def test_slice(scalars_dfs, start, stop):
 def test_strip(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.strip().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.strip()
 
@@ -266,7 +269,7 @@ def test_strip(scalars_dfs):
 def test_upper(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.upper().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.upper()
 
@@ -375,7 +378,7 @@ def test_isupper(weird_strings, weird_strings_pd):
 def test_rstrip(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.rstrip().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.rstrip()
 
@@ -388,7 +391,7 @@ def test_rstrip(scalars_dfs):
 def test_lstrip(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.lstrip().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.lstrip()
 
@@ -402,7 +405,7 @@ def test_lstrip(scalars_dfs):
 def test_repeat(scalars_dfs, repeats):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.repeat(repeats).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.repeat(repeats)
 
@@ -415,7 +418,7 @@ def test_repeat(scalars_dfs, repeats):
 def test_capitalize(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.capitalize().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.capitalize()
 
@@ -428,9 +431,9 @@ def test_capitalize(scalars_dfs):
 def test_cat_with_series(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_filter: bigframes.series.Series = scalars_df["bool_col"]
-    bf_left: bigframes.series.Series = scalars_df[col_name][bf_filter]
-    bf_right: bigframes.series.Series = scalars_df[col_name]
+    bf_filter: bpd.Series = scalars_df["bool_col"]
+    bf_left: bpd.Series = scalars_df[col_name][bf_filter]
+    bf_right: bpd.Series = scalars_df[col_name]
     bf_result = bf_left.str.cat(others=bf_right).to_pandas()
     pd_filter = scalars_pandas_df["bool_col"]
     pd_left = scalars_pandas_df[col_name][pd_filter]
@@ -447,7 +450,7 @@ def test_str_match(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
     pattern = "[A-Z].*"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.match(pattern).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.match(pattern)
 
@@ -461,7 +464,7 @@ def test_str_fullmatch(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
     pattern = "[A-Z].*!"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.fullmatch(pattern).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.fullmatch(pattern)
 
@@ -474,7 +477,7 @@ def test_str_fullmatch(scalars_dfs):
 def test_str_get(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.get(8).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.get(8)
 
@@ -487,7 +490,7 @@ def test_str_get(scalars_dfs):
 def test_str_pad(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.pad(8, side="both", fillchar="%").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.pad(8, side="both", fillchar="%")
 
@@ -510,7 +513,7 @@ def test_str_zfill(weird_strings, weird_strings_pd):
 def test_str_ljust(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.ljust(7, fillchar="%").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.ljust(7, fillchar="%")
 
@@ -523,7 +526,7 @@ def test_str_ljust(scalars_dfs):
 def test_str_rjust(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.rjust(9, fillchar="%").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.rjust(9, fillchar="%")
 
@@ -562,3 +565,103 @@ def test_str_split_raise_errors(scalars_dfs, pat, regex):
     pd_result = pd_result.apply(lambda x: [] if pd.isnull(x) is True else x)
 
     assert_series_equal(pd_result, bf_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("index"),
+    [
+        pytest.param(
+            "first", id="invalid_type", marks=pytest.mark.xfail(raises=ValueError)
+        ),
+        pytest.param(
+            -1, id="neg_index", marks=pytest.mark.xfail(raises=NotImplementedError)
+        ),
+        pytest.param(
+            slice(0, 2, 2),
+            id="only_allow_one_step",
+            marks=pytest.mark.xfail(raises=NotImplementedError),
+        ),
+        pytest.param(
+            slice(-1, None, None),
+            id="neg_slicing",
+            marks=pytest.mark.xfail(raises=NotImplementedError),
+        ),
+    ],
+)
+def test_getitem_raise_errors(scalars_dfs, index):
+    scalars_df, _ = scalars_dfs
+    col_name = "string_col"
+    scalars_df[col_name].str[index]
+
+
+@pytest.mark.parametrize(
+    ("index"),
+    [
+        pytest.param(2, id="int"),
+        pytest.param(slice(None, None, None), id="default_start_slice"),
+        pytest.param(slice(0, None, 1), id="default_stop_slice"),
+        pytest.param(slice(0, 2, None), id="default_step_slice"),
+    ],
+)
+def test_getitem_w_string(scalars_dfs, index):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "string_col"
+    bf_result = scalars_df[col_name].str[index].to_pandas()
+    pd_result = scalars_pandas_df[col_name].str[index]
+
+    assert_series_equal(pd_result, bf_result)
+
+
+@pytest.mark.parametrize(
+    ("index"),
+    [
+        pytest.param(2, id="int"),
+        pytest.param(slice(None, None, None), id="default_start_slice"),
+        pytest.param(slice(0, None, 1), id="default_stop_slice"),
+        pytest.param(slice(0, 2, None), id="default_step_slice"),
+        pytest.param(slice(0, 0, None), id="single_one_slice"),
+    ],
+)
+def test_getitem_w_array(index):
+    data = [[1], [2, 3], [], [4, 5, 6]]
+    s = bpd.Series(data)
+    pd_s = pd.Series(data)
+
+    bf_result = s.str[index].to_pandas()
+    pd_result = pd_s.str[index]
+    # Skip dtype checks here because pandas returns `int64` while BF returns `Int64`.
+    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
+
+
+def test_getitem_w_struct_array():
+    if packaging.version.Version(pd.__version__) <= packaging.version.Version("1.5.0"):
+        pytest.skip("https://github.com/googleapis/python-bigquery/issues/1992")
+
+    pa_struct = pa.struct(
+        [
+            ("name", pa.string()),
+            ("age", pa.int64()),
+        ]
+    )
+    data: list[list[dict]] = [
+        [
+            {"name": "Alice", "age": 30},
+            {"name": "Bob", "age": 25},
+        ],
+        [
+            {"name": "Charlie", "age": 35},
+            {"name": "David", "age": 40},
+            {"name": "Eva", "age": 28},
+        ],
+        [],
+        [{"name": "Frank", "age": 50}],
+    ]
+    s = bpd.Series(data, dtype=bpd.ArrowDtype(pa.list_(pa_struct)))
+
+    result = s.str[1]
+    assert dtypes.is_struct_like(result.dtype)
+
+    expected_data = [item[1] if len(item) > 1 else None for item in data]
+    expected = bpd.Series(expected_data, dtype=bpd.ArrowDtype((pa_struct)))
+
+    assert_series_equal(result.to_pandas(), expected.to_pandas())
diff --git a/third_party/bigframes_vendored/pandas/core/strings/accessor.py b/third_party/bigframes_vendored/pandas/core/strings/accessor.py
index b02c23f945..bd5e78f415 100644
--- a/third_party/bigframes_vendored/pandas/core/strings/accessor.py
+++ b/third_party/bigframes_vendored/pandas/core/strings/accessor.py
@@ -13,6 +13,37 @@ class StringMethods:
     R's stringr package.
     """
 
+    def __getitem__(self, key: typing.Union[int, slice]):
+        """
+        Index or slice string or list in the Series.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series(['Alice', 'Bob', 'Charlie'])
+            >>> s.str[0]
+            0     A
+            1     B
+            2     C
+            dtype: string
+
+            >>> s.str[0:3]
+            0     Ali
+            1     Bob
+            2     Cha
+            dtype: string
+
+        Args:
+            key (int | slice):
+                Index or slice of indices to access from each string or list.
+
+        Returns:
+            bigframes.series.Series: The list at requested index.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def extract(self, pat: str, flags: int = 0):
         """
         Extract capture groups in the regex `pat` as columns in a DataFrame.

From ae07274ea3b49f0350da77c3f8fdb44e4cda6778 Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 19:21:06 -0700
Subject: [PATCH 15/15] chore(main): release 1.14.0 (#882)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 CHANGELOG.md         | 26 ++++++++++++++++++++++++++
 bigframes/version.py |  2 +-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3209391f44..754658c5e1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,32 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [1.14.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.13.0...v1.14.0) (2024-08-14)
+
+
+### Features
+
+* Implement `bigframes.bigquery.json_extract` ([#868](https://github.com/googleapis/python-bigquery-dataframes/issues/868)) ([3dbf84b](https://github.com/googleapis/python-bigquery-dataframes/commit/3dbf84bd1531c1f8d41ba57c2c38b3ba6abfb812))
+* Implement `Series.str.__getitem__` ([#897](https://github.com/googleapis/python-bigquery-dataframes/issues/897)) ([e027b7e](https://github.com/googleapis/python-bigquery-dataframes/commit/e027b7e9d29f628d058611106014a1790459958c))
+
+
+### Bug Fixes
+
+* Fix caching from generating row numbers in partial ordering mode ([#872](https://github.com/googleapis/python-bigquery-dataframes/issues/872)) ([52b7786](https://github.com/googleapis/python-bigquery-dataframes/commit/52b7786c3a28da6c29e3ddf12629802215194ad9))
+
+
+### Performance Improvements
+
+* Generate SQL with fewer CTEs ([#877](https://github.com/googleapis/python-bigquery-dataframes/issues/877)) ([eb60804](https://github.com/googleapis/python-bigquery-dataframes/commit/eb6080460344aff2fabb7864536ea4fe24c5fbef))
+* Speed up compilation by reducing redundant type normalization ([#896](https://github.com/googleapis/python-bigquery-dataframes/issues/896)) ([e0b11bc](https://github.com/googleapis/python-bigquery-dataframes/commit/e0b11bc8c038db7b950b1653ed4cd44a6246c713))
+
+
+### Documentation
+
+* Add streaming html docs ([#884](https://github.com/googleapis/python-bigquery-dataframes/issues/884)) ([171da6c](https://github.com/googleapis/python-bigquery-dataframes/commit/171da6cb33165b49d46ea6528038342abd89e9fa))
+* Fix the `DisplayOptions` doc rendering ([#893](https://github.com/googleapis/python-bigquery-dataframes/issues/893)) ([3eb6a17](https://github.com/googleapis/python-bigquery-dataframes/commit/3eb6a17a5823faf5ecba92cb9a554df74477871d))
+* Update streaming notebook ([#887](https://github.com/googleapis/python-bigquery-dataframes/issues/887)) ([6e6f9df](https://github.com/googleapis/python-bigquery-dataframes/commit/6e6f9df55d435afe0b3ade728ca06826e92a6ee6))
+
 ## [1.13.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.12.0...v1.13.0) (2024-08-05)
 
 
diff --git a/bigframes/version.py b/bigframes/version.py
index b474f021d4..2e135689ed 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.13.0"
+__version__ = "1.14.0"