Skip to content

Commit 788e35a

Browse files
committed
Fix erroneous hash calculations in gin_extract_jsonb_path().
The jsonb_path_ops code calculated hash values inconsistently in some cases involving nested arrays and objects. This would result in queries possibly not finding entries that they should find, when using a jsonb_path_ops GIN index for the search. The problem cases involve JSONB values that contain both scalars and sub-objects at the same nesting level, for example an array containing both scalars and sub-arrays. To fix, reset the current stack->hash after processing each value or sub-object, not before; and don't try to be cute about the outermost level's initial hash. Correcting this means that existing jsonb_path_ops indexes may now be inconsistent with the new hash calculation code. The symptom is the same --- searches not finding entries they should find --- but the specific rows affected are likely to be different. Users will need to REINDEX jsonb_path_ops indexes to make sure that all searches work as expected. Per bug #13756 from Daniel Cheng. Back-patch to 9.4 where the faulty logic was introduced.
1 parent 038aa89 commit 788e35a

File tree

3 files changed

+88
-33
lines changed

3 files changed

+88
-33
lines changed

src/backend/utils/adt/jsonb_gin.c

+18-33
Original file line numberDiff line numberDiff line change
@@ -375,58 +375,43 @@ gin_extract_jsonb_path(PG_FUNCTION_ARGS)
375375
parent = stack;
376376
stack = (PathHashStack *) palloc(sizeof(PathHashStack));
377377

378-
if (parent->parent)
379-
{
380-
/*
381-
* We pass forward hashes from previous container nesting
382-
* levels so that nested arrays with an outermost nested
383-
* object will have element hashes mixed with the
384-
* outermost key. It's also somewhat useful to have
385-
* nested objects' innermost values have hashes that are a
386-
* function of not just their own key, but outer keys too.
387-
*
388-
* Nesting an array within another array will not alter
389-
* innermost scalar element hash values, but that seems
390-
* inconsequential.
391-
*/
392-
stack->hash = parent->hash;
393-
}
394-
else
395-
{
396-
/*
397-
* At the outermost level, initialize hash with container
398-
* type proxy value. Note that this makes JB_FARRAY and
399-
* JB_FOBJECT part of the on-disk representation, but they
400-
* are that in the base jsonb object storage already.
401-
*/
402-
stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT;
403-
}
378+
/*
379+
* We pass forward hashes from outer nesting levels so that
380+
* the hashes for nested values will include outer keys as
381+
* well as their own keys.
382+
*
383+
* Nesting an array within another array will not alter
384+
* innermost scalar element hash values, but that seems
385+
* inconsequential.
386+
*/
387+
stack->hash = parent->hash;
404388
stack->parent = parent;
405389
break;
406390
case WJB_KEY:
407-
/* initialize hash from parent */
408-
stack->hash = stack->parent->hash;
409-
/* and mix in this key */
391+
/* mix this key into the current outer hash */
410392
JsonbHashScalarValue(&v, &stack->hash);
411393
/* hash is now ready to incorporate the value */
412394
break;
413395
case WJB_ELEM:
414-
/* array elements use parent hash mixed with element's hash */
415-
stack->hash = stack->parent->hash;
416-
/* FALL THRU */
417396
case WJB_VALUE:
418397
/* mix the element or value's hash into the prepared hash */
419398
JsonbHashScalarValue(&v, &stack->hash);
420399
/* and emit an index entry */
421400
entries[i++] = UInt32GetDatum(stack->hash);
422-
/* Note: we assume we'll see KEY before another VALUE */
401+
/* reset hash for next key, value, or sub-object */
402+
stack->hash = stack->parent->hash;
423403
break;
424404
case WJB_END_ARRAY:
425405
case WJB_END_OBJECT:
426406
/* Pop the stack */
427407
parent = stack->parent;
428408
pfree(stack);
429409
stack = parent;
410+
/* reset hash for next key, value, or sub-object */
411+
if (stack->parent)
412+
stack->hash = stack->parent->hash;
413+
else
414+
stack->hash = 0;
430415
break;
431416
default:
432417
elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);

src/test/regress/expected/jsonb.out

+50
Original file line numberDiff line numberDiff line change
@@ -2193,6 +2193,56 @@ SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},1]}';
21932193
t
21942194
(1 row)
21952195

2196+
-- check some corner cases for indexed nested containment (bug #13756)
2197+
create temp table nestjsonb (j jsonb);
2198+
insert into nestjsonb (j) values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
2199+
insert into nestjsonb (j) values ('[[14,2,3]]');
2200+
insert into nestjsonb (j) values ('[1,[14,2,3]]');
2201+
create index on nestjsonb using gin(j jsonb_path_ops);
2202+
set enable_seqscan = on;
2203+
set enable_bitmapscan = off;
2204+
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
2205+
j
2206+
---------------------------------------------------
2207+
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
2208+
(1 row)
2209+
2210+
select * from nestjsonb where j @> '{"c":3}';
2211+
j
2212+
---------------------------------------------------
2213+
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
2214+
(1 row)
2215+
2216+
select * from nestjsonb where j @> '[[14]]';
2217+
j
2218+
-----------------
2219+
[[14, 2, 3]]
2220+
[1, [14, 2, 3]]
2221+
(2 rows)
2222+
2223+
set enable_seqscan = off;
2224+
set enable_bitmapscan = on;
2225+
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
2226+
j
2227+
---------------------------------------------------
2228+
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
2229+
(1 row)
2230+
2231+
select * from nestjsonb where j @> '{"c":3}';
2232+
j
2233+
---------------------------------------------------
2234+
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
2235+
(1 row)
2236+
2237+
select * from nestjsonb where j @> '[[14]]';
2238+
j
2239+
-----------------
2240+
[[14, 2, 3]]
2241+
[1, [14, 2, 3]]
2242+
(2 rows)
2243+
2244+
reset enable_seqscan;
2245+
reset enable_bitmapscan;
21962246
-- nested object field / array index lookup
21972247
SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'n';
21982248
?column?

src/test/regress/sql/jsonb.sql

+20
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,26 @@ SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4}]}';
488488
SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},3]}';
489489
SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},1]}';
490490

491+
-- check some corner cases for indexed nested containment (bug #13756)
492+
create temp table nestjsonb (j jsonb);
493+
insert into nestjsonb (j) values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
494+
insert into nestjsonb (j) values ('[[14,2,3]]');
495+
insert into nestjsonb (j) values ('[1,[14,2,3]]');
496+
create index on nestjsonb using gin(j jsonb_path_ops);
497+
498+
set enable_seqscan = on;
499+
set enable_bitmapscan = off;
500+
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
501+
select * from nestjsonb where j @> '{"c":3}';
502+
select * from nestjsonb where j @> '[[14]]';
503+
set enable_seqscan = off;
504+
set enable_bitmapscan = on;
505+
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
506+
select * from nestjsonb where j @> '{"c":3}';
507+
select * from nestjsonb where j @> '[[14]]';
508+
reset enable_seqscan;
509+
reset enable_bitmapscan;
510+
491511
-- nested object field / array index lookup
492512
SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'n';
493513
SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'a';

0 commit comments

Comments
 (0)