-
-
Notifications
You must be signed in to change notification settings - Fork 32.8k
Fixed #20024 -- Made exclude(field__in=[None, ...]) generate IS NULL clause. #19691
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Left some comments for improvements but you'll likely run into issues that relate to ticket-24267 (see #16817).
The problem you'll face is that the ORM believes that only the isnull
lookup is dealing with null values adequately.
django/django/db/models/sql/query.py
Lines 1618 to 1645 in 748ca0a
if ( | |
current_negated | |
and (lookup_type != "isnull" or condition.rhs is False) | |
and condition.rhs is not None | |
): | |
require_outer = True | |
if lookup_type != "isnull": | |
# The condition added here will be SQL like this: | |
# NOT (col IS NOT NULL), where the first NOT is added in | |
# upper layers of code. The reason for addition is that if col | |
# is null, then col != someval will result in SQL "unknown" | |
# which isn't the same as in Python. The Python None handling | |
# is wanted, and it can be gotten by | |
# (col IS NULL OR col != someval) | |
# <=> | |
# NOT (col IS NOT NULL AND col = someval). | |
if ( | |
self.is_nullable(targets[0]) | |
or self.alias_map[join_list[-1]].join_type == LOUTER | |
): | |
lookup_class = targets[0].get_lookup("isnull") | |
col = self._get_col(targets[0], join_info.targets[0], alias) | |
clause.add(lookup_class(col, False), AND) | |
# If someval is a nullable column, someval IS NOT NULL is | |
# added. | |
if isinstance(value, Col) and self.is_nullable(value.target): | |
lookup_class = value.target.get_lookup("isnull") | |
clause.add(lookup_class(value, False), AND) |
and thus tries to be helpful and throws a AND "queries_tag"."parent_id" IS NOT NULL
in the mix.
django/db/models/lookups.py
Outdated
|
||
if self.rhs_is_direct_value() and isinstance(self.rhs, (list, tuple)): | ||
values = list(self.rhs) | ||
has_none = None in values | ||
filtered_values = [v for v in values if v is not None] | ||
if not filtered_values and not has_none: | ||
# Avoid circular import when importing NothingNode | ||
from django.db.models.sql.where import NothingNode | ||
|
||
where = NothingNode() | ||
return compiler.compile(where) | ||
|
||
lhs, lhs_params = self.process_lhs(compiler, connection) | ||
sql_parts = [] | ||
params = [] | ||
|
||
if filtered_values: | ||
placeholders = ", ".join(["%s"] * len(filtered_values)) | ||
sql_parts.append(f"{lhs} IN ({placeholders})") | ||
params.extend(lhs_params + filtered_values) | ||
|
||
if has_none: | ||
sql_parts.append(f"{lhs} IS NULL") | ||
|
||
return " OR ".join(sql_parts), params | ||
|
||
return super().as_sql(compiler, connection) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think all of this can be simplified to
if self.rhs_is_direct_value() and isinstance(self.rhs, (list, tuple)): | |
values = list(self.rhs) | |
has_none = None in values | |
filtered_values = [v for v in values if v is not None] | |
if not filtered_values and not has_none: | |
# Avoid circular import when importing NothingNode | |
from django.db.models.sql.where import NothingNode | |
where = NothingNode() | |
return compiler.compile(where) | |
lhs, lhs_params = self.process_lhs(compiler, connection) | |
sql_parts = [] | |
params = [] | |
if filtered_values: | |
placeholders = ", ".join(["%s"] * len(filtered_values)) | |
sql_parts.append(f"{lhs} IN ({placeholders})") | |
params.extend(lhs_params + filtered_values) | |
if has_none: | |
sql_parts.append(f"{lhs} IS NULL") | |
return " OR ".join(sql_parts), params | |
return super().as_sql(compiler, connection) | |
try: | |
sql, params = super().as_sql(compiler, connection) | |
except EmptyResultSet: | |
sql, params = "", () | |
if ( | |
compiler.query.is_nullable(self.lhs.output_field) | |
and self.rhs_is_direct_value() | |
and None in self.rhs | |
): | |
lhs_sql, lhs_params = self.process_lhs(compiler, connection) | |
if sql: | |
sql = f"({sql} OR {lhs_sql} IS NULL)" | |
params = (*params, *lhs_params) | |
else: | |
sql = f"{lhs_sql} IS NULL" | |
params = (*params, *lhs_params) | |
elif not sql: | |
raise EmptyResultSet | |
return sql, params |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nice. I forgot to run the full test suite before i pushed. My long patch had 14 other tests failing. I'll rerun all the tests with this and then start looking at each test that failed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as mentioned in the review message you'll likely need something like the following additionnaly
diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
index dc019df166..0fd8bcb2a4 100644
--- a/django/db/models/sql/query.py
+++ b/django/db/models/sql/query.py
@@ -1621,7 +1621,11 @@ def build_filter(
and condition.rhs is not None
):
require_outer = True
- if lookup_type != "isnull":
+ if lookup_type != "null" and not (
+ lookup_type == "in"
+ and isinstance(value, (list, tuple))
+ and None in value
+ ):
# The condition added here will be SQL like this:
# NOT (col IS NOT NULL), where the first NOT is added in
# upper layers of code. The reason for addition is that if col
It will also to happen to fix a long standing expected failure
diff --git a/tests/queries/tests.py b/tests/queries/tests.py
index 4158a9a596..e30a556ebb 100644
--- a/tests/queries/tests.py
+++ b/tests/queries/tests.py
@@ -3528,7 +3528,6 @@ def test_null_in_exclude_qs(self):
# into subquery above
self.assertIs(inner_qs._result_cache, None)
- @unittest.expectedFailure
def test_col_not_in_list_containing_null(self):
"""
The following case is not handled properly because
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated. I didn't even think to look here. I am going to start 'reading' through the code base (the ORM part) more. I don't usually jump down rabbit holes this big. It's crazy how much code there is here to handle SQL joins. Very cool though.
tests/queries/tests.py
Outdated
class ExcludedNoneInTest(TestCase): | ||
def setUp(self): | ||
Tag.objects.create(name="null", parent_id=None) | ||
Tag.objects.create(name="one", parent_id=1) | ||
Tag.objects.create(name="two", parent_id=2) | ||
|
||
def test_exclude_in_with_none(self): | ||
qs = Tag.objects.exclude(parent_id__in=[None, 1]) | ||
values = list(qs.values_list("parent_id", flat=True)) | ||
self.assertEqual(values, [2]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move these tests close to the one added in #13030.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
test moved, also switched test model.
clause. this fixes a long-standing issue where exclude(field__in=[None, ...]) would incorrectly return no results due to SQL NULL handling in NOT IN clauses. Now, if the list includes None, the ORM splits the condition into: IS NULL OR IN (...) This aligns exclude(...__in=...) behavior with user expectations and avoids leaking SQL's tri-valued logic into the ORM. Includes tests and a release note entry under 5.2.5.
3a8aae3
to
c8d48e5
Compare
Just a quick note. I had all tests passing and I found this one, test_col_not_in_list_containing_null, with an expectedFailure decorator. I removed the decorator, but it still failed. The exclude(field__in=[None]) case was behaving weirdly because of how None interacts with NOT IN (...) in SQL. I tried rewriting __in=[None} to use IS NULL / IS NOT NULL, which made test_col_not_in_list_containing_null pass, but that broke test_in_ignore_solo_none. This one expects Django to short-circuit id__in=[None] (PK lookup) into .none() so that no query is executed. Ended up handling this by checking if it's a PK and short circuiting in that case, other wise it falls back to the IS NULL logic. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should also test the composite primary key case being
filter(pk__in=[(tenant_id, None])
filter(pk__in=[(None, None])
exclude(pk__in=[(tenant_id, None])
exclude(pk__in=[(None, None])
django/db/models/lookups.py
Outdated
and max_in_list_size | ||
and len(self.rhs) > max_in_list_size | ||
): | ||
and isinstance(self.rhs, (list, tuple, set)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can't do that, any iterable should be accepted here.
django/db/models/lookups.py
Outdated
self.rhs = [v for v in self.rhs if v is not None] | ||
if not self.rhs: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can't alter self.rhs
during compilation as the expression can be reused later on and it would result in a different query.
django/db/models/lookups.py
Outdated
if getattr(self.lhs.output_field, "primary_key", False): | ||
return super().as_sql(compiler, connection) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This won't work with composite primary keys.
django/db/models/lookups.py
Outdated
# Short-circuit PK lookups like id__in=[None] to none(). | ||
# This avoids unnecessary "IS NULL" queries when the QuerySet | ||
# will return no results (e.g. assertNumQueries(0) tests). | ||
if getattr(self.lhs.output_field, "primary_key", False): | ||
return super().as_sql(compiler, connection) | ||
|
||
lhs, params = self.process_lhs(compiler, connection) | ||
op = "IS NOT NULL" if compiler.query.where.negated else "IS NULL" | ||
return f"{lhs} {op}", params |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should not cheat like that, getting the suite to pass is a mean to ensure proper behavior and not an end in itself.
I suggest the following approach instead
diff --git a/django/db/models/lookups.py b/django/db/models/lookups.py
index 51817710e9..9fc892a6cb 100644
--- a/django/db/models/lookups.py
+++ b/django/db/models/lookups.py
@@ -553,7 +553,25 @@ def as_sql(self, compiler, connection):
and len(self.rhs) > max_in_list_size
):
return self.split_parameter_list_as_sql(compiler, connection)
- return super().as_sql(compiler, connection)
+ try:
+ sql, params = super().as_sql(compiler, connection)
+ except EmptyResultSet:
+ sql, params = "", ()
+ if (
+ compiler.query.is_nullable(self.lhs.output_field)
+ and self.rhs_is_direct_value()
+ and None in self.rhs
+ ):
+ lhs_sql, lhs_params = self.process_lhs(compiler, connection)
+ if sql:
+ sql = f"({sql} OR {lhs_sql} IS NULL)"
+ params = (*params, *lhs_params)
+ else:
+ sql = f"{lhs_sql} IS NULL"
+ params = (*params, *lhs_params)
+ elif not sql:
+ raise EmptyResultSet
+ return sql, params
- It delegates empty result set to
super()
- It doesn't mutate the expression at compilation
- It properly handles nullable left hand sides (not only excluding primary keys which cannot contain nulls)
1cef0ae
to
30ffc72
Compare
Trac ticket number
ticket-20024
Branch description
This fixes a long-standing issue where exclude(field__in=[None, ...]) would incorrectly return no results due to SQL NULL handling in NOT IN clauses.
Now, if the list includes None, the ORM splits the condition into:
IS NULL OR IN (...)
This aligns exclude(...__in=...) behavior with user expectations and avoids leaking SQL's tri-valued logic into the ORM.
Includes tests and a release note entry under 5.2.5.
Checklist
main
branch.