From 7d6909df904558e32ebbafbc4981161fcf9126fa Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 30 Apr 2025 01:02:24 +0000 Subject: [PATCH 1/4] feat: improve error message in `Series.apply` for direct udfs --- bigframes/series.py | 12 +++++++++--- venv-py39/pyvenv.cfg | 3 +++ 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 venv-py39/pyvenv.cfg diff --git a/bigframes/series.py b/bigframes/series.py index 87f1f1d141..be241e11b6 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1712,9 +1712,15 @@ def apply( # as a whole. if by_row: raise ValueError( - "A vectorized non-BigFrames BigQuery function can be " - "provided only with by_row=False. For element-wise operation " - "it must be a BigFrames BigQuery function." + "You have passed a function as-is. If your intention is to " + "apply this function to the entire Series as a whole, and " + "you are sure that it performs only the operations that " + "are implemented for a Series (e.g. a chain of " + "arithmatic/logical operations), please also specify " + "by_row=False. If your function contains arbitrary code, " + "it can only be applied to every element in the Series, in " + "which case you must convert it to a BigFrames BigQuery " + "function using `udf` or `remote_function` before passing." ) try: diff --git a/venv-py39/pyvenv.cfg b/venv-py39/pyvenv.cfg new file mode 100644 index 0000000000..f1d4bab9a4 --- /dev/null +++ b/venv-py39/pyvenv.cfg @@ -0,0 +1,3 @@ +home = /usr/local/google/home/shobs/.pyenv/versions/3.9.17/bin +include-system-site-packages = false +version = 3.9.17 From 3d7f697ea887668c5a7b95fb1d508f3413a694eb Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 30 Apr 2025 01:16:19 +0000 Subject: [PATCH 2/4] remove stray file, improve message --- bigframes/series.py | 18 ++++++++++-------- venv-py39/pyvenv.cfg | 3 --- 2 files changed, 10 insertions(+), 11 deletions(-) delete mode 100644 venv-py39/pyvenv.cfg diff --git a/bigframes/series.py b/bigframes/series.py index be241e11b6..8c5db807b8 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1713,14 +1713,16 @@ def apply( if by_row: raise ValueError( "You have passed a function as-is. If your intention is to " - "apply this function to the entire Series as a whole, and " - "you are sure that it performs only the operations that " - "are implemented for a Series (e.g. a chain of " - "arithmatic/logical operations), please also specify " - "by_row=False. If your function contains arbitrary code, " - "it can only be applied to every element in the Series, in " - "which case you must convert it to a BigFrames BigQuery " - "function using `udf` or `remote_function` before passing." + "apply this function in a vectorized way (i.e. to the " + "entire Series as a whole, and you are sure that it " + "performs only the operations that are implemented for a " + "Series (e.g. a chain of arithmatic/logical operations, " + "such as `def foo(s): return s % 2 == 1`), please also " + "specify `by_row=False`. If your function contains " + "arbitrary code, it can only be applied to every element " + "in the Series individually, in which case you must " + "convert it to a BigFrames BigQuery function using `udf`, " + "or `remote_function` before passing." ) try: diff --git a/venv-py39/pyvenv.cfg b/venv-py39/pyvenv.cfg deleted file mode 100644 index f1d4bab9a4..0000000000 --- a/venv-py39/pyvenv.cfg +++ /dev/null @@ -1,3 +0,0 @@ -home = /usr/local/google/home/shobs/.pyenv/versions/3.9.17/bin -include-system-site-packages = false -version = 3.9.17 From 374900e5403f4021565c50761266dcfee2edbe41 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 1 May 2025 17:54:21 +0000 Subject: [PATCH 3/4] specify namespace for udf and remote_function in the error message --- bigframes/series.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bigframes/series.py b/bigframes/series.py index 8c5db807b8..2075c65575 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1721,8 +1721,9 @@ def apply( "specify `by_row=False`. If your function contains " "arbitrary code, it can only be applied to every element " "in the Series individually, in which case you must " - "convert it to a BigFrames BigQuery function using `udf`, " - "or `remote_function` before passing." + "convert it to a BigFrames BigQuery function using " + "`bigframes.pandas.udf`, " + "or `bigframes.pandas.remote_function` before passing." ) try: From b808ec677e0ccfcb58e678ac3168b51ce8eaa2b5 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 7 May 2025 19:30:52 +0000 Subject: [PATCH 4/4] fix typo --- bigframes/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/series.py b/bigframes/series.py index 2075c65575..37a3723a0a 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1716,7 +1716,7 @@ def apply( "apply this function in a vectorized way (i.e. to the " "entire Series as a whole, and you are sure that it " "performs only the operations that are implemented for a " - "Series (e.g. a chain of arithmatic/logical operations, " + "Series (e.g. a chain of arithmetic/logical operations, " "such as `def foo(s): return s % 2 == 1`), please also " "specify `by_row=False`. If your function contains " "arbitrary code, it can only be applied to every element "