Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions bigframes/core/compile/sqlglot/expressions/unary_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import bigframes.core.compile.sqlglot.expressions.constants as constants
from bigframes.core.compile.sqlglot.expressions.op_registration import OpRegistration
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
import bigframes.dtypes as dtypes

UNARY_OP_REGISTRATION = OpRegistration()

Expand Down Expand Up @@ -420,7 +421,28 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:

@UNARY_OP_REGISTRATION.register(ops.IsInOp)
def _(op: ops.IsInOp, expr: TypedExpr) -> sge.Expression:
return sge.In(this=expr.expr, expressions=[sge.convert(v) for v in op.values])
values = []
is_numeric_expr = dtypes.is_numeric(expr.dtype)
for value in op.values:
if value is None:
continue
dtype = dtypes.bigframes_type(type(value))
if expr.dtype == dtype or is_numeric_expr and dtypes.is_numeric(dtype):
values.append(sge.convert(value))

if op.match_nulls:
contains_nulls = any(_is_null(value) for value in op.values)
if contains_nulls:
return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In(
this=expr.expr, expressions=values
)

if len(values) == 0:
return sge.convert(False)

return sge.func(
"COALESCE", sge.In(this=expr.expr, expressions=values), sge.convert(False)
)


@UNARY_OP_REGISTRATION.register(ops.isalnum_op)
Expand Down Expand Up @@ -767,7 +789,7 @@ def _(op: ops.ToTimedeltaOp, expr: TypedExpr) -> sge.Expression:
factor = UNIT_TO_US_CONVERSION_FACTORS[op.unit]
if factor != 1:
value = sge.Mul(this=value, expression=sge.convert(factor))
return sge.Interval(this=value, unit=sge.Identifier(this="MICROSECOND"))
return value


@UNARY_OP_REGISTRATION.register(ops.UnixMicros)
Expand Down Expand Up @@ -866,3 +888,9 @@ def _(op: ops.ZfillOp, expr: TypedExpr) -> sge.Expression:
],
default=sge.func("LPAD", expr.expr, sge.convert(op.width), sge.convert("0")),
)


# Helpers
def _is_null(value) -> bool:
# float NaN/inf should be treated as distinct from 'true' null values
return typing.cast(bool, pd.isna(value)) and not isinstance(value, float)
2 changes: 1 addition & 1 deletion tests/system/small/engines/test_generic_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def test_engines_invert_op(scalars_array_value: array_value.ArrayValue, engine):
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)


@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
def test_engines_isin_op(scalars_array_value: array_value.ArrayValue, engine):
arr, col_ids = scalars_array_value.compute_values(
[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ WITH `bfcte_0` AS (
`bfcol_1` AS `bfcol_8`,
`bfcol_2` AS `bfcol_9`,
`bfcol_0` AS `bfcol_10`,
INTERVAL `bfcol_3` MICROSECOND AS `bfcol_11`
`bfcol_3` AS `bfcol_11`
FROM `bfcte_0`
), `bfcte_2` AS (
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ WITH `bfcte_0` AS (
`bfcol_1` AS `bfcol_8`,
`bfcol_2` AS `bfcol_9`,
`bfcol_0` AS `bfcol_10`,
INTERVAL `bfcol_3` MICROSECOND AS `bfcol_11`
`bfcol_3` AS `bfcol_11`
FROM `bfcte_0`
), `bfcte_2` AS (
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
), `bfcte_1` AS (
SELECT
*,
`bfcol_0` IN (1, 2, 3) AS `bfcol_1`
COALESCE(`bfcol_0` IN (1, 2, 3), FALSE) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
WITH `bfcte_0` AS (
SELECT
`bool_col` AS `bfcol_0`,
`bytes_col` AS `bfcol_1`,
`date_col` AS `bfcol_2`,
`datetime_col` AS `bfcol_3`,
`geography_col` AS `bfcol_4`,
`int64_col` AS `bfcol_5`,
`int64_too` AS `bfcol_6`,
`numeric_col` AS `bfcol_7`,
`float64_col` AS `bfcol_8`,
`rowindex` AS `bfcol_9`,
`rowindex_2` AS `bfcol_10`,
`string_col` AS `bfcol_11`,
`time_col` AS `bfcol_12`,
`timestamp_col` AS `bfcol_13`,
`duration_col` AS `bfcol_14`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
COALESCE(`bfcol_5` IN (1, 2, 3), FALSE) AS `bfcol_31`,
(
`bfcol_5` IS NULL
) OR `bfcol_5` IN (123456) AS `bfcol_32`,
COALESCE(`bfcol_5` IN (123456), FALSE) AS `bfcol_33`,
COALESCE(`bfcol_5` IN (1.0, 2.0, 3.0), FALSE) AS `bfcol_34`,
FALSE AS `bfcol_35`,
COALESCE(`bfcol_5` IN (2.5, 3), FALSE) AS `bfcol_36`,
FALSE AS `bfcol_37`,
(
`bfcol_8` IS NULL
) OR `bfcol_8` IN (1, 2, 3) AS `bfcol_38`
FROM `bfcte_0`
)
SELECT
`bfcol_9` AS `bfuid_col_1`,
`bfcol_0` AS `bool_col`,
`bfcol_1` AS `bytes_col`,
`bfcol_2` AS `date_col`,
`bfcol_3` AS `datetime_col`,
`bfcol_4` AS `geography_col`,
`bfcol_5` AS `int64_col`,
`bfcol_6` AS `int64_too`,
`bfcol_7` AS `numeric_col`,
`bfcol_8` AS `float64_col`,
`bfcol_9` AS `rowindex`,
`bfcol_10` AS `rowindex_2`,
`bfcol_11` AS `string_col`,
`bfcol_12` AS `time_col`,
`bfcol_13` AS `timestamp_col`,
`bfcol_14` AS `duration_col`,
`bfcol_31` AS `int in ints`,
`bfcol_32` AS `int in ints w null`,
`bfcol_33` AS `int in ints w null wo match nulls`,
`bfcol_34` AS `int in floats`,
`bfcol_35` AS `int in strings`,
`bfcol_36` AS `int in mixed`,
`bfcol_37` AS `int in empty`,
`bfcol_38` AS `float in ints`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ WITH `bfcte_0` AS (
*,
`bfcol_1` AS `bfcol_4`,
`bfcol_0` AS `bfcol_5`,
INTERVAL `bfcol_0` MICROSECOND AS `bfcol_6`
`bfcol_0` AS `bfcol_6`
FROM `bfcte_0`
), `bfcte_2` AS (
SELECT
*,
`bfcol_4` AS `bfcol_10`,
`bfcol_5` AS `bfcol_11`,
`bfcol_6` AS `bfcol_12`,
INTERVAL (`bfcol_5` * 1000000) MICROSECOND AS `bfcol_13`
`bfcol_5` * 1000000 AS `bfcol_13`
FROM `bfcte_1`
), `bfcte_3` AS (
SELECT
Expand All @@ -25,7 +25,7 @@ WITH `bfcte_0` AS (
`bfcol_11` AS `bfcol_19`,
`bfcol_12` AS `bfcol_20`,
`bfcol_13` AS `bfcol_21`,
INTERVAL (`bfcol_11` * 604800000000) MICROSECOND AS `bfcol_22`
`bfcol_11` * 604800000000 AS `bfcol_22`
FROM `bfcte_2`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pytest

from bigframes import operations as ops
from bigframes.core import expression
from bigframes.operations._op_converters import convert_index, convert_slice
import bigframes.pandas as bpd

Expand Down Expand Up @@ -312,6 +313,40 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot):
snapshot.assert_match(sql, "out.sql")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: empty lines before assertions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Done.



def test_is_in_for_all_cases(scalar_types_df: bpd.DataFrame, snapshot):
scalars_array_value = scalar_types_df._block.expr
arr, col_ids = scalars_array_value.compute_values(
[
ops.IsInOp((1, 2, 3)).as_expr(expression.deref("int64_col")),
ops.IsInOp((None, 123456)).as_expr(expression.deref("int64_col")),
ops.IsInOp((None, 123456), match_nulls=False).as_expr(
expression.deref("int64_col")
),
ops.IsInOp((1.0, 2.0, 3.0)).as_expr(expression.deref("int64_col")),
ops.IsInOp(("1.0", "2.0")).as_expr(expression.deref("int64_col")),
ops.IsInOp(("1.0", 2.5, 3)).as_expr(expression.deref("int64_col")),
ops.IsInOp(()).as_expr(expression.deref("int64_col")),
ops.IsInOp((1, 2, 3, None)).as_expr(expression.deref("float64_col")),
]
)
new_names = (
"int in ints",
"int in ints w null",
"int in ints w null wo match nulls",
"int in floats",
"int in strings",
"int in mixed",
"int in empty",
"float in ints",
)
arr = arr.rename_columns(
{old_name: new_names[i] for i, old_name in enumerate(col_ids)}
)
sql = arr.session._executor.to_sql(arr, enable_cache=False)

snapshot.assert_match(sql, "out.sql")


def test_isalnum(scalar_types_df: bpd.DataFrame, snapshot):
bf_df = scalar_types_df[["string_col"]]
sql = _apply_unary_op(bf_df, ops.isalnum_op, "string_col")
Expand Down
Loading