Skip to content

fix: Support str.replace re.compile with flags #1736

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions bigframes/operations/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from __future__ import annotations

import re
from typing import cast, Literal, Optional, Union
from typing import Literal, Optional, Union

import bigframes_vendored.constants as constants
import bigframes_vendored.pandas.core.strings.accessor as vendorstr
Expand Down Expand Up @@ -230,21 +230,26 @@ def replace(
flags: int = 0,
regex: bool = False,
) -> series.Series:
is_compiled = isinstance(pat, re.Pattern)
patstr = cast(str, pat.pattern if is_compiled else pat) # type: ignore
if isinstance(pat, re.Pattern):
assert isinstance(pat.pattern, str)
pat_str = pat.pattern
flags = pat.flags | flags
else:
pat_str = pat

if case is False:
return self.replace(pat, repl, flags=flags | re.IGNORECASE, regex=True)
return self.replace(pat_str, repl, flags=flags | re.IGNORECASE, regex=True)
if regex:
re2flags = _parse_flags(flags)
if re2flags:
patstr = re2flags + patstr
return self._apply_unary_op(ops.RegexReplaceStrOp(pat=patstr, repl=repl))
pat_str = re2flags + pat_str
return self._apply_unary_op(ops.RegexReplaceStrOp(pat=pat_str, repl=repl))
else:
if is_compiled:
if isinstance(pat, re.Pattern):
raise ValueError(
"Must set 'regex'=True if using compiled regex pattern."
)
return self._apply_unary_op(ops.ReplaceStrOp(pat=patstr, repl=repl))
return self._apply_unary_op(ops.ReplaceStrOp(pat=pat_str, repl=repl))

def startswith(
self,
Expand Down Expand Up @@ -318,10 +323,15 @@ def to_blob(self, connection: Optional[str] = None) -> series.Series:
def _parse_flags(flags: int) -> Optional[str]:
re2flags = []
for reflag, re2flag in REGEXP_FLAGS.items():
if flags & flags:
if flags & reflag:
re2flags.append(re2flag)
flags = flags ^ reflag

# re2 handles unicode fine by default
# most compiled re in python will have unicode set
if re.U and flags:
flags = flags ^ re.U

# Remaining flags couldn't be mapped to re2 engine
if flags:
raise NotImplementedError(
Expand Down
1 change: 1 addition & 0 deletions tests/system/small/operations/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def test_str_extract(scalars_dfs, pat):
(re.compile("(?i).e.."), "blah", None, 0, True),
("H", "h", True, 0, False),
(", ", "__", True, 0, False),
(re.compile(r"hEllo", flags=re.I), "blah", None, 0, True),
],
)
def test_str_replace(scalars_dfs, pat, repl, case, flags, regex):
Expand Down