googleapis · chelsea-lin · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025 · Aug 29, 2025
@@ -27,6 +27,7 @@
 import bigframes.core.compile.sqlglot.expressions.constants as constants
 from bigframes.core.compile.sqlglot.expressions.op_registration import OpRegistration
 from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
+import bigframes.dtypes as dtypes
 
 UNARY_OP_REGISTRATION = OpRegistration()
 
@@ -420,7 +421,28 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
 
 @UNARY_OP_REGISTRATION.register(ops.IsInOp)
 def _(op: ops.IsInOp, expr: TypedExpr) -> sge.Expression:
-    return sge.In(this=expr.expr, expressions=[sge.convert(v) for v in op.values])
+    values = []
+    is_numeric_expr = dtypes.is_numeric(expr.dtype)
+    for value in op.values:
+        if value is None:
+            continue
+        dtype = dtypes.bigframes_type(type(value))
+        if expr.dtype == dtype or is_numeric_expr and dtypes.is_numeric(dtype):
+            values.append(sge.convert(value))
+
+    if op.match_nulls:
+        contains_nulls = any(_is_null(value) for value in op.values)
+        if contains_nulls:
+            return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In(
+                this=expr.expr, expressions=values
+            )
+
+    if len(values) == 0:
+        return sge.convert(False)
+
+    return sge.func(
+        "COALESCE", sge.In(this=expr.expr, expressions=values), sge.convert(False)
+    )
 
 
 @UNARY_OP_REGISTRATION.register(ops.isalnum_op)
@@ -767,7 +789,7 @@ def _(op: ops.ToTimedeltaOp, expr: TypedExpr) -> sge.Expression:
     factor = UNIT_TO_US_CONVERSION_FACTORS[op.unit]
     if factor != 1:
         value = sge.Mul(this=value, expression=sge.convert(factor))
-    return sge.Interval(this=value, unit=sge.Identifier(this="MICROSECOND"))
+    return value
 
 
 @UNARY_OP_REGISTRATION.register(ops.UnixMicros)
@@ -866,3 +888,9 @@ def _(op: ops.ZfillOp, expr: TypedExpr) -> sge.Expression:
         ],
         default=sge.func("LPAD", expr.expr, sge.convert(op.width), sge.convert("0")),
     )
+
+
+# Helpers
+def _is_null(value) -> bool:
+    # float NaN/inf should be treated as distinct from 'true' null values
+    return typing.cast(bool, pd.isna(value)) and not isinstance(value, float)
@@ -392,7 +392,7 @@ def test_engines_invert_op(scalars_array_value: array_value.ArrayValue, engine):
     assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
 
 
-@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
+@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
 def test_engines_isin_op(scalars_array_value: array_value.ArrayValue, engine):
     arr, col_ids = scalars_array_value.compute_values(
         [

@@ -11,7 +11,7 @@ WITH `bfcte_0` AS (
     `bfcol_1` AS `bfcol_8`,
     `bfcol_2` AS `bfcol_9`,
     `bfcol_0` AS `bfcol_10`,
-    INTERVAL `bfcol_3` MICROSECOND AS `bfcol_11`
+    `bfcol_3` AS `bfcol_11`
   FROM `bfcte_0`
 ), `bfcte_2` AS (
   SELECT

@@ -11,7 +11,7 @@ WITH `bfcte_0` AS (
     `bfcol_1` AS `bfcol_8`,
     `bfcol_2` AS `bfcol_9`,
     `bfcol_0` AS `bfcol_10`,
-    INTERVAL `bfcol_3` MICROSECOND AS `bfcol_11`
+    `bfcol_3` AS `bfcol_11`
   FROM `bfcte_0`
 ), `bfcte_2` AS (
   SELECT

@@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
 ), `bfcte_1` AS (
   SELECT
     *,
-    `bfcol_0` IN (1, 2, 3) AS `bfcol_1`
+    COALESCE(`bfcol_0` IN (1, 2, 3), FALSE) AS `bfcol_1`
   FROM `bfcte_0`
 )
 SELECT

@@ -0,0 +1,61 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `bool_col` AS `bfcol_0`,
+    `bytes_col` AS `bfcol_1`,
+    `date_col` AS `bfcol_2`,
+    `datetime_col` AS `bfcol_3`,
+    `geography_col` AS `bfcol_4`,
+    `int64_col` AS `bfcol_5`,
+    `int64_too` AS `bfcol_6`,
+    `numeric_col` AS `bfcol_7`,
+    `float64_col` AS `bfcol_8`,
+    `rowindex` AS `bfcol_9`,
+    `rowindex_2` AS `bfcol_10`,
+    `string_col` AS `bfcol_11`,
+    `time_col` AS `bfcol_12`,
+    `timestamp_col` AS `bfcol_13`,
+    `duration_col` AS `bfcol_14`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    *,
+    COALESCE(`bfcol_5` IN (1, 2, 3), FALSE) AS `bfcol_31`,
+    (
+      `bfcol_5` IS NULL
+    ) OR `bfcol_5` IN (123456) AS `bfcol_32`,
+    COALESCE(`bfcol_5` IN (123456), FALSE) AS `bfcol_33`,
+    COALESCE(`bfcol_5` IN (1.0, 2.0, 3.0), FALSE) AS `bfcol_34`,
+    FALSE AS `bfcol_35`,
+    COALESCE(`bfcol_5` IN (2.5, 3), FALSE) AS `bfcol_36`,
+    FALSE AS `bfcol_37`,
+    (
+      `bfcol_8` IS NULL
+    ) OR `bfcol_8` IN (1, 2, 3) AS `bfcol_38`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_9` AS `bfuid_col_1`,
+  `bfcol_0` AS `bool_col`,
+  `bfcol_1` AS `bytes_col`,
+  `bfcol_2` AS `date_col`,
+  `bfcol_3` AS `datetime_col`,
+  `bfcol_4` AS `geography_col`,
+  `bfcol_5` AS `int64_col`,
+  `bfcol_6` AS `int64_too`,
+  `bfcol_7` AS `numeric_col`,
+  `bfcol_8` AS `float64_col`,
+  `bfcol_9` AS `rowindex`,
+  `bfcol_10` AS `rowindex_2`,
+  `bfcol_11` AS `string_col`,
+  `bfcol_12` AS `time_col`,
+  `bfcol_13` AS `timestamp_col`,
+  `bfcol_14` AS `duration_col`,
+  `bfcol_31` AS `int in ints`,
+  `bfcol_32` AS `int in ints w null`,
+  `bfcol_33` AS `int in ints w null wo match nulls`,
+  `bfcol_34` AS `int in floats`,
+  `bfcol_35` AS `int in strings`,
+  `bfcol_36` AS `int in mixed`,
+  `bfcol_37` AS `int in empty`,
+  `bfcol_38` AS `float in ints`
+FROM `bfcte_1`
@@ -8,15 +8,15 @@ WITH `bfcte_0` AS (
     *,
     `bfcol_1` AS `bfcol_4`,
     `bfcol_0` AS `bfcol_5`,
-    INTERVAL `bfcol_0` MICROSECOND AS `bfcol_6`
+    `bfcol_0` AS `bfcol_6`
   FROM `bfcte_0`
 ), `bfcte_2` AS (
   SELECT
     *,
     `bfcol_4` AS `bfcol_10`,
     `bfcol_5` AS `bfcol_11`,
     `bfcol_6` AS `bfcol_12`,
-    INTERVAL (`bfcol_5` * 1000000) MICROSECOND AS `bfcol_13`
+    `bfcol_5` * 1000000 AS `bfcol_13`
   FROM `bfcte_1`
 ), `bfcte_3` AS (
   SELECT
@@ -25,7 +25,7 @@ WITH `bfcte_0` AS (
     `bfcol_11` AS `bfcol_19`,
     `bfcol_12` AS `bfcol_20`,
     `bfcol_13` AS `bfcol_21`,
-    INTERVAL (`bfcol_11` * 604800000000) MICROSECOND AS `bfcol_22`
+    `bfcol_11` * 604800000000 AS `bfcol_22`
   FROM `bfcte_2`
 )
 SELECT

@@ -15,6 +15,7 @@
 import pytest
 
 from bigframes import operations as ops
+from bigframes.core import expression
 from bigframes.operations._op_converters import convert_index, convert_slice
 import bigframes.pandas as bpd
 
@@ -312,6 +313,40 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot):
     snapshot.assert_match(sql, "out.sql")
 
 
+def test_is_in_for_all_cases(scalar_types_df: bpd.DataFrame, snapshot):
+    scalars_array_value = scalar_types_df._block.expr
+    arr, col_ids = scalars_array_value.compute_values(
+        [
+            ops.IsInOp((1, 2, 3)).as_expr(expression.deref("int64_col")),
+            ops.IsInOp((None, 123456)).as_expr(expression.deref("int64_col")),
+            ops.IsInOp((None, 123456), match_nulls=False).as_expr(
+                expression.deref("int64_col")
+            ),
+            ops.IsInOp((1.0, 2.0, 3.0)).as_expr(expression.deref("int64_col")),
+            ops.IsInOp(("1.0", "2.0")).as_expr(expression.deref("int64_col")),
+            ops.IsInOp(("1.0", 2.5, 3)).as_expr(expression.deref("int64_col")),
+            ops.IsInOp(()).as_expr(expression.deref("int64_col")),
+            ops.IsInOp((1, 2, 3, None)).as_expr(expression.deref("float64_col")),
+        ]
+    )
+    new_names = (
+        "int in ints",
+        "int in ints w null",
+        "int in ints w null wo match nulls",
+        "int in floats",
+        "int in strings",
+        "int in mixed",
+        "int in empty",
+        "float in ints",
+    )
+    arr = arr.rename_columns(
+        {old_name: new_names[i] for i, old_name in enumerate(col_ids)}
+    )
+    sql = arr.session._executor.to_sql(arr, enable_cache=False)
+
+    snapshot.assert_match(sql, "out.sql")
+
+
 def test_isalnum(scalar_types_df: bpd.DataFrame, snapshot):
     bf_df = scalar_types_df[["string_col"]]
     sql = _apply_unary_op(bf_df, ops.isalnum_op, "string_col")