Fix incorrect handling of NULL index entries in indexed ROW() comparisons.

tglsfdc · tglsfdc · commit da936200dbd9 · 2016-03-09T14:51:01.000-05:00
An index search using a row comparison such as ROW(a, b) &gt; ROW('x', 'y')
would stop upon reaching a NULL entry in the "b" column, ignoring the
fact that there might be non-NULL "b" values associated with later values
of "a".  This happens because _bt_mark_scankey_required() marks the
subsidiary scankey for "b" as required, which is just wrong: it's for
a column after the one with the first inequality key (namely "a"), and
thus can't be considered a required match.

This bit of brain fade dates back to the very beginnings of our support
for indexed ROW() comparisons, in 2006.  Kind of astonishing that no one
came across it before Glen Takahashi, in bug #14010.

Back-patch to all supported versions.

Note: the given test case doesn't actually fail in unpatched 9.1, evidently
because the fix for bug #6278 (i.e., stopping at nulls in either scan
direction) is required to make it fail.  I'm sure I could devise a case
that fails in 9.1 as well, perhaps with something involving making a cursor
back up; but it doesn't seem worth the trouble.
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
@@ -1285,12 +1285,9 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
  *
  * Depending on the operator type, the key may be required for both scan
  * directions or just one.  Also, if the key is a row comparison header,
- * we have to mark the appropriate subsidiary ScanKeys as required.  In
- * such cases, the first subsidiary key is required, but subsequent ones
- * are required only as long as they correspond to successive index columns
- * and match the leading column as to sort direction.
- * Otherwise the row comparison ordering is different from the index ordering
- * and so we can't stop the scan on the basis of those lower-order columns.
+ * we have to mark its first subsidiary ScanKey as required.  (Subsequent
+ * subsidiary ScanKeys are normally for lower-order columns, and thus
+ * cannot be required, since they're after the first non-equality scankey.)
  *
  * Note: when we set required-key flag bits in a subsidiary scankey, we are
  * scribbling on a data structure belonging to the index AM's caller, not on
@@ -1328,24 +1325,12 @@ _bt_mark_scankey_required(ScanKey skey)
 	if (skey->sk_flags & SK_ROW_HEADER)
 	{
 		ScanKey		subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
-		AttrNumber	attno = skey->sk_attno;
 
-		/* First subkey should be same as the header says */
-		Assert(subkey->sk_attno == attno);
-
-		for (;;)
-		{
-			Assert(subkey->sk_flags & SK_ROW_MEMBER);
-			if (subkey->sk_attno != attno)
-				break;			/* non-adjacent key, so not required */
-			if (subkey->sk_strategy != skey->sk_strategy)
-				break;			/* wrong direction, so not required */
-			subkey->sk_flags |= addflags;
-			if (subkey->sk_flags & SK_ROW_END)
-				break;
-			subkey++;
-			attno++;
-		}
+		/* First subkey should be same column/operator as the header */
+		Assert(subkey->sk_flags & SK_ROW_MEMBER);
+		Assert(subkey->sk_attno == skey->sk_attno);
+		Assert(subkey->sk_strategy == skey->sk_strategy);
+		subkey->sk_flags |= addflags;
 	}
 }
 
diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out
@@ -278,6 +278,29 @@ order by thousand, tenthous;
       999 |     9999
 (25 rows)
 
+-- Test case for bug #14010: indexed row comparisons fail with nulls
+create temp table test_table (a text, b text);
+insert into test_table values ('a', 'b');
+insert into test_table select 'a', null from generate_series(1,1000);
+insert into test_table values ('b', 'a');
+create index on test_table (a,b);
+set enable_sort = off;
+explain (costs off)
+select a,b from test_table where (a,b) > ('a','a') order by a,b;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Index Only Scan using test_table_a_b_idx on test_table
+   Index Cond: (ROW(a, b) > ROW('a'::text, 'a'::text))
+(2 rows)
+
+select a,b from test_table where (a,b) > ('a','a') order by a,b;
+ a | b 
+---+---
+ a | b
+ b | a
+(2 rows)
+
+reset enable_sort;
 -- Check row comparisons with IN
 select * from int8_tbl i8 where i8 in (row(123,456));  -- fail, type mismatch
 ERROR:  cannot compare dissimilar column types bigint and integer at record column 1
diff --git a/src/test/regress/sql/rowtypes.sql b/src/test/regress/sql/rowtypes.sql
@@ -116,6 +116,21 @@ select thousand, tenthous from tenk1
 where (thousand, tenthous) >= (997, 5000)
 order by thousand, tenthous;
 
+-- Test case for bug #14010: indexed row comparisons fail with nulls
+create temp table test_table (a text, b text);
+insert into test_table values ('a', 'b');
+insert into test_table select 'a', null from generate_series(1,1000);
+insert into test_table values ('b', 'a');
+create index on test_table (a,b);
+set enable_sort = off;
+
+explain (costs off)
+select a,b from test_table where (a,b) > ('a','a') order by a,b;
+
+select a,b from test_table where (a,b) > ('a','a') order by a,b;
+
+reset enable_sort;
+
 -- Check row comparisons with IN
 select * from int8_tbl i8 where i8 in (row(123,456));  -- fail, type mismatch