Skip to content

fix: Avoid implicit join when using join with unnest #924

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sqlalchemy_bigquery/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,14 @@ def _known_tables(self):
if table is not None:
known_tables.add(table.name)

# If we have the table in the `from` of our parent, do not add the alias
# as this will add the table twice and cause an implicit JOIN for that
# table on itself
asfrom_froms = self.stack[-1].get("asfrom_froms", [])
for from_ in asfrom_froms:
if isinstance(from_, Table):
known_tables.add(from_.name)

return known_tables

def visit_column(
Expand Down
166 changes: 165 additions & 1 deletion tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import sqlalchemy.exc

from .conftest import setup_table
from .conftest import sqlalchemy_1_4_or_higher
from .conftest import sqlalchemy_1_4_or_higher, sqlalchemy_before_1_4


def test_constraints_are_ignored(faux_conn, metadata):
Expand Down Expand Up @@ -114,3 +114,167 @@ def test_no_alias_for_known_tables_cte(faux_conn, metadata):
)
found_cte_sql = q.compile(faux_conn).string
assert found_cte_sql == expected_cte_sql


def prepare_implicit_join_base_query(
faux_conn, metadata, select_from_table2, old_syntax
):
table1 = setup_table(
faux_conn, "table1", metadata, sqlalchemy.Column("foo", sqlalchemy.Integer)
)
table2 = setup_table(
faux_conn,
"table2",
metadata,
sqlalchemy.Column("foos", sqlalchemy.ARRAY(sqlalchemy.Integer)),
sqlalchemy.Column("bar", sqlalchemy.Integer),
)
F = sqlalchemy.func

unnested_col_name = "unnested_foos"
unnested_foos = F.unnest(table2.c.foos).alias(unnested_col_name)
unnested_foo_col = sqlalchemy.Column(unnested_col_name)

# Set up initial query
cols = [table1.c.foo, table2.c.bar] if select_from_table2 else [table1.c.foo]
q = sqlalchemy.select(cols) if old_syntax else sqlalchemy.select(*cols)
q = q.select_from(unnested_foos.join(table1, table1.c.foo == unnested_foo_col))
return q


@sqlalchemy_before_1_4
def test_no_implicit_join_asterix_for_inner_unnest_before_1_4(faux_conn, metadata):
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
q = prepare_implicit_join_base_query(faux_conn, metadata, True, True)
expected_initial_sql = (
"SELECT `table1`.`foo`, `table2`.`bar` \n"
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
)
found_initial_sql = q.compile(faux_conn).string
assert found_initial_sql == expected_initial_sql

q = sqlalchemy.select(["*"]).select_from(q)

expected_outer_sql = (
"SELECT * \n"
"FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n"
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`)"
)
found_outer_sql = q.compile(faux_conn).string
assert found_outer_sql == expected_outer_sql


@sqlalchemy_1_4_or_higher
def test_no_implicit_join_asterix_for_inner_unnest(faux_conn, metadata):
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
q = prepare_implicit_join_base_query(faux_conn, metadata, True, False)
expected_initial_sql = (
"SELECT `table1`.`foo`, `table2`.`bar` \n"
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
)
found_initial_sql = q.compile(faux_conn).string
assert found_initial_sql == expected_initial_sql

q = q.subquery()
q = sqlalchemy.select("*").select_from(q)

expected_outer_sql = (
"SELECT * \n"
"FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n"
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`"
)
found_outer_sql = q.compile(faux_conn).string
assert found_outer_sql == expected_outer_sql


@sqlalchemy_before_1_4
def test_no_implicit_join_for_inner_unnest_before_1_4(faux_conn, metadata):
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
q = prepare_implicit_join_base_query(faux_conn, metadata, True, True)
expected_initial_sql = (
"SELECT `table1`.`foo`, `table2`.`bar` \n"
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
)
found_initial_sql = q.compile(faux_conn).string
assert found_initial_sql == expected_initial_sql

q = sqlalchemy.select([q.c.foo]).select_from(q)

expected_outer_sql = (
"SELECT `foo` \n"
"FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n"
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`)"
)
found_outer_sql = q.compile(faux_conn).string
assert found_outer_sql == expected_outer_sql


@sqlalchemy_1_4_or_higher
def test_no_implicit_join_for_inner_unnest(faux_conn, metadata):
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
q = prepare_implicit_join_base_query(faux_conn, metadata, True, False)
expected_initial_sql = (
"SELECT `table1`.`foo`, `table2`.`bar` \n"
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
)
found_initial_sql = q.compile(faux_conn).string
assert found_initial_sql == expected_initial_sql

q = q.subquery()
q = sqlalchemy.select(q.c.foo).select_from(q)

expected_outer_sql = (
"SELECT `anon_1`.`foo` \n"
"FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n"
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`"
)
found_outer_sql = q.compile(faux_conn).string
assert found_outer_sql == expected_outer_sql


@sqlalchemy_1_4_or_higher
def test_no_implicit_join_asterix_for_inner_unnest_no_table2_column(
faux_conn, metadata
):
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
q = prepare_implicit_join_base_query(faux_conn, metadata, False, False)
expected_initial_sql = (
"SELECT `table1`.`foo` \n"
"FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
)
found_initial_sql = q.compile(faux_conn).string
assert found_initial_sql == expected_initial_sql

q = q.subquery()
q = sqlalchemy.select("*").select_from(q)

expected_outer_sql = (
"SELECT * \n"
"FROM (SELECT `table1`.`foo` AS `foo` \n"
"FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`"
)
found_outer_sql = q.compile(faux_conn).string
assert found_outer_sql == expected_outer_sql


@sqlalchemy_1_4_or_higher
def test_no_implicit_join_for_inner_unnest_no_table2_column(faux_conn, metadata):
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
q = prepare_implicit_join_base_query(faux_conn, metadata, False, False)
expected_initial_sql = (
"SELECT `table1`.`foo` \n"
"FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
)
found_initial_sql = q.compile(faux_conn).string
assert found_initial_sql == expected_initial_sql

q = q.subquery()
q = sqlalchemy.select(q.c.foo).select_from(q)

expected_outer_sql = (
"SELECT `anon_1`.`foo` \n"
"FROM (SELECT `table1`.`foo` AS `foo` \n"
"FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`"
)
found_outer_sql = q.compile(faux_conn).string
assert found_outer_sql == expected_outer_sql