diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index 5297f223..03488250 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -269,6 +269,14 @@ def _known_tables(self): if table is not None: known_tables.add(table.name) + # If we have the table in the `from` of our parent, do not add the alias + # as this will add the table twice and cause an implicit JOIN for that + # table on itself + asfrom_froms = self.stack[-1].get("asfrom_froms", []) + for from_ in asfrom_froms: + if isinstance(from_, Table): + known_tables.add(from_.name) + return known_tables def visit_column( diff --git a/tests/unit/test_compiler.py b/tests/unit/test_compiler.py index db02e593..139b6cbc 100644 --- a/tests/unit/test_compiler.py +++ b/tests/unit/test_compiler.py @@ -21,7 +21,7 @@ import sqlalchemy.exc from .conftest import setup_table -from .conftest import sqlalchemy_1_4_or_higher +from .conftest import sqlalchemy_1_4_or_higher, sqlalchemy_before_1_4 def test_constraints_are_ignored(faux_conn, metadata): @@ -114,3 +114,167 @@ def test_no_alias_for_known_tables_cte(faux_conn, metadata): ) found_cte_sql = q.compile(faux_conn).string assert found_cte_sql == expected_cte_sql + + +def prepare_implicit_join_base_query( + faux_conn, metadata, select_from_table2, old_syntax +): + table1 = setup_table( + faux_conn, "table1", metadata, sqlalchemy.Column("foo", sqlalchemy.Integer) + ) + table2 = setup_table( + faux_conn, + "table2", + metadata, + sqlalchemy.Column("foos", sqlalchemy.ARRAY(sqlalchemy.Integer)), + sqlalchemy.Column("bar", sqlalchemy.Integer), + ) + F = sqlalchemy.func + + unnested_col_name = "unnested_foos" + unnested_foos = F.unnest(table2.c.foos).alias(unnested_col_name) + unnested_foo_col = sqlalchemy.Column(unnested_col_name) + + # Set up initial query + cols = [table1.c.foo, table2.c.bar] if select_from_table2 else [table1.c.foo] + q = sqlalchemy.select(cols) if old_syntax else sqlalchemy.select(*cols) + q = q.select_from(unnested_foos.join(table1, table1.c.foo == unnested_foo_col)) + return q + + +@sqlalchemy_before_1_4 +def test_no_implicit_join_asterix_for_inner_unnest_before_1_4(faux_conn, metadata): + # See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368 + q = prepare_implicit_join_base_query(faux_conn, metadata, True, True) + expected_initial_sql = ( + "SELECT `table1`.`foo`, `table2`.`bar` \n" + "FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`" + ) + found_initial_sql = q.compile(faux_conn).string + assert found_initial_sql == expected_initial_sql + + q = sqlalchemy.select(["*"]).select_from(q) + + expected_outer_sql = ( + "SELECT * \n" + "FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n" + "FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`)" + ) + found_outer_sql = q.compile(faux_conn).string + assert found_outer_sql == expected_outer_sql + + +@sqlalchemy_1_4_or_higher +def test_no_implicit_join_asterix_for_inner_unnest(faux_conn, metadata): + # See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368 + q = prepare_implicit_join_base_query(faux_conn, metadata, True, False) + expected_initial_sql = ( + "SELECT `table1`.`foo`, `table2`.`bar` \n" + "FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`" + ) + found_initial_sql = q.compile(faux_conn).string + assert found_initial_sql == expected_initial_sql + + q = q.subquery() + q = sqlalchemy.select("*").select_from(q) + + expected_outer_sql = ( + "SELECT * \n" + "FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n" + "FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`" + ) + found_outer_sql = q.compile(faux_conn).string + assert found_outer_sql == expected_outer_sql + + +@sqlalchemy_before_1_4 +def test_no_implicit_join_for_inner_unnest_before_1_4(faux_conn, metadata): + # See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368 + q = prepare_implicit_join_base_query(faux_conn, metadata, True, True) + expected_initial_sql = ( + "SELECT `table1`.`foo`, `table2`.`bar` \n" + "FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`" + ) + found_initial_sql = q.compile(faux_conn).string + assert found_initial_sql == expected_initial_sql + + q = sqlalchemy.select([q.c.foo]).select_from(q) + + expected_outer_sql = ( + "SELECT `foo` \n" + "FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n" + "FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`)" + ) + found_outer_sql = q.compile(faux_conn).string + assert found_outer_sql == expected_outer_sql + + +@sqlalchemy_1_4_or_higher +def test_no_implicit_join_for_inner_unnest(faux_conn, metadata): + # See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368 + q = prepare_implicit_join_base_query(faux_conn, metadata, True, False) + expected_initial_sql = ( + "SELECT `table1`.`foo`, `table2`.`bar` \n" + "FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`" + ) + found_initial_sql = q.compile(faux_conn).string + assert found_initial_sql == expected_initial_sql + + q = q.subquery() + q = sqlalchemy.select(q.c.foo).select_from(q) + + expected_outer_sql = ( + "SELECT `anon_1`.`foo` \n" + "FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n" + "FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`" + ) + found_outer_sql = q.compile(faux_conn).string + assert found_outer_sql == expected_outer_sql + + +@sqlalchemy_1_4_or_higher +def test_no_implicit_join_asterix_for_inner_unnest_no_table2_column( + faux_conn, metadata +): + # See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368 + q = prepare_implicit_join_base_query(faux_conn, metadata, False, False) + expected_initial_sql = ( + "SELECT `table1`.`foo` \n" + "FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`" + ) + found_initial_sql = q.compile(faux_conn).string + assert found_initial_sql == expected_initial_sql + + q = q.subquery() + q = sqlalchemy.select("*").select_from(q) + + expected_outer_sql = ( + "SELECT * \n" + "FROM (SELECT `table1`.`foo` AS `foo` \n" + "FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`" + ) + found_outer_sql = q.compile(faux_conn).string + assert found_outer_sql == expected_outer_sql + + +@sqlalchemy_1_4_or_higher +def test_no_implicit_join_for_inner_unnest_no_table2_column(faux_conn, metadata): + # See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368 + q = prepare_implicit_join_base_query(faux_conn, metadata, False, False) + expected_initial_sql = ( + "SELECT `table1`.`foo` \n" + "FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`" + ) + found_initial_sql = q.compile(faux_conn).string + assert found_initial_sql == expected_initial_sql + + q = q.subquery() + q = sqlalchemy.select(q.c.foo).select_from(q) + + expected_outer_sql = ( + "SELECT `anon_1`.`foo` \n" + "FROM (SELECT `table1`.`foo` AS `foo` \n" + "FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`" + ) + found_outer_sql = q.compile(faux_conn).string + assert found_outer_sql == expected_outer_sql