From fe68f11b61f53d98eab544fa29baf80d43ffeff9 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Thu, 20 Oct 2016 01:45:51 +0300 Subject: [PATCH 1/4] Fix unused variable warnings --- jsquery_op.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jsquery_op.c b/jsquery_op.c index 498884a..15eead5 100644 --- a/jsquery_op.c +++ b/jsquery_op.c @@ -449,7 +449,7 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) if (JsonbType(jb) == jbvScalar) { JsonbIterator *it; - int32 r; + int32 r PG_USED_FOR_ASSERTS_ONLY; JsonbValue v; it = JsonbIteratorInit(jb->val.binary.data); @@ -572,7 +572,7 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) if (JsonbType(jb) == jbvScalar) { JsonbIterator *it; - int32 r; + int32 r PG_USED_FOR_ASSERTS_ONLY; JsonbValue v; it = JsonbIteratorInit(jb->val.binary.data); From c624498385bb0708da4844d7fb1106df215b2b32 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Thu, 20 Oct 2016 01:45:23 +0300 Subject: [PATCH 2/4] Port to jsonbc --- jsquery.h | 1 + jsquery_op.c | 56 ++++++++++++++++++++++++++++++++-------------------- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/jsquery.h b/jsquery.h index b2b4d0f..69bbedf 100644 --- a/jsquery.h +++ b/jsquery.h @@ -19,6 +19,7 @@ #include "fmgr.h" #include "utils/numeric.h" #include "utils/jsonb.h" +#include "utils/json_generic.h" typedef struct { diff --git a/jsquery_op.c b/jsquery_op.c index 15eead5..ee92dc2 100644 --- a/jsquery_op.c +++ b/jsquery_op.c @@ -43,7 +43,6 @@ compareNumeric(Numeric a, Numeric b) ); } -#define jbvScalar jbvBinary static int JsonbType(JsonbValue *jb) { @@ -52,15 +51,9 @@ JsonbType(JsonbValue *jb) if (jb->type == jbvBinary) { JsonbContainer *jbc = jb->val.binary.data; - - if (jbc->header & JB_FSCALAR) + type = jbc->type; + if (type == (jbvArray | jbvScalar)) type = jbvScalar; - else if (jbc->header & JB_FOBJECT) - type = jbvObject; - else if (jbc->header & JB_FARRAY) - type = jbvArray; - else - elog(ERROR, "Unknown container type: 0x%08x", jbc->header); } return type; @@ -174,18 +167,22 @@ checkArrayEquality(JsQueryItem *jsq, JsonbValue *jb) JsonbIterator *it; JsonbValue v; JsQueryItem elem; + int nelems; if (!(jsq->type == jqiArray && JsonbType(jb) == jbvArray)) return false; + nelems = JsonContainerSize(jb->val.binary.data); + if (nelems < 0) + nelems = JsonGetArraySize(jb->val.binary.data); + + if (nelems != jsq->array.nelems) + return false; it = JsonbIteratorInit(jb->val.binary.data); r = JsonbIteratorNext(&it, &v, true); Assert(r == WJB_BEGIN_ARRAY); - if (v.val.array.nElems != jsq->array.nelems) - return false; - while((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE) { if (r != WJB_ELEM) @@ -338,7 +335,28 @@ executeExpr(JsQueryItem *jsq, int32 op, JsonbValue *jb, JsQueryItem *jsqLeftArg) r = JsonbIteratorNext(&it, &v, true); Assert(r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT); - length = (r == WJB_BEGIN_ARRAY) ? v.val.array.nElems : v.val.object.nPairs; + if (r == WJB_BEGIN_ARRAY) + { + length = v.val.array.nElems; + + if (length < 0) + length = JsonGetArraySize(jb->val.binary.data); + } + else + { + length = v.val.object.nPairs; + + if (length < 0) + { + length = 0; + + while ((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE) + { + if (r == WJB_KEY) + length++; + } + } + } v.type = jbvNumeric; v.val.numeric = DatumGetNumeric(DirectFunctionCall1(int4_numeric, Int32GetDatum(length))); @@ -609,16 +627,14 @@ jsquery_json_exec(PG_FUNCTION_ARGS) JsonbValue jbv; JsQueryItem jsq; - jbv.type = jbvBinary; - jbv.val.binary.data = &jb->root; - jbv.val.binary.len = VARSIZE_ANY_EXHDR(jb); + JsonValueInitBinary(&jbv, &jb->root); jsqInit(&jsq, jq); res = recursiveExecute(&jsq, &jbv, NULL); PG_FREE_IF_COPY(jq, 0); - PG_FREE_IF_COPY(jb, 1); + PG_FREE_IF_COPY_JSONB(jb, 1); PG_RETURN_BOOL(res); } @@ -633,15 +649,13 @@ json_jsquery_exec(PG_FUNCTION_ARGS) JsonbValue jbv; JsQueryItem jsq; - jbv.type = jbvBinary; - jbv.val.binary.data = &jb->root; - jbv.val.binary.len = VARSIZE_ANY_EXHDR(jb); + JsonValueInitBinary(&jbv, &jb->root); jsqInit(&jsq, jq); res = recursiveExecute(&jsq, &jbv, NULL); - PG_FREE_IF_COPY(jb, 0); + PG_FREE_IF_COPY_JSONB(jb, 0); PG_FREE_IF_COPY(jq, 1); PG_RETURN_BOOL(res); From d57758a602de7e6da4aa0838304396aa532390d9 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Tue, 1 Nov 2016 23:00:38 +0300 Subject: [PATCH 3/4] fixup! add array/object length as @# --- expected/jsquery.out | 24 ++++++ jsquery_op.c | 193 +++++++++++++++++++------------------------ sql/jsquery.sql | 4 + 3 files changed, 115 insertions(+), 106 deletions(-) diff --git a/expected/jsquery.out b/expected/jsquery.out index a550b9a..d86b0b4 100644 --- a/expected/jsquery.out +++ b/expected/jsquery.out @@ -1391,6 +1391,30 @@ select '{"a":[1,2]}' @@ '@# = 1'::jsquery; t (1 row) +select '[]' @@ '@# is numeric'::jsquery; + ?column? +---------- + t +(1 row) + +select '{}' @@ '@# is numeric'::jsquery; + ?column? +---------- + t +(1 row) + +select '0' @@ '@# is numeric'::jsquery; + ?column? +---------- + f +(1 row) + +select '0' @@ '@# = 1'::jsquery; + ?column? +---------- + f +(1 row) + --ALL select 'a.*: = 4'::jsquery; jsquery diff --git a/jsquery_op.c b/jsquery_op.c index ee92dc2..432e93e 100644 --- a/jsquery_op.c +++ b/jsquery_op.c @@ -29,7 +29,7 @@ #include "jsquery.h" -static bool recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg); +static bool recursiveExecute(JsQueryItem *jsq, JsonbValue *jb); static int compareNumeric(Numeric a, Numeric b) @@ -59,6 +59,43 @@ JsonbType(JsonbValue *jb) return type; } +static JsonbValue * +JsonbSize(JsonbValue *jb, JsonbValue *size) +{ + JsonbValue v; + JsonbIterator *it; + JsonbIteratorToken r; + int32 length; + int type = JsonbType(jb); + + if (type != jbvArray && type != jbvObject) + return NULL; + + Assert(jb->type == jbvBinary); + + it = JsonbIteratorInit(jb->val.binary.data); + r = JsonbIteratorNext(&it, &v, true); + Assert(r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT); + + if (r == WJB_BEGIN_ARRAY) + { + length = v.val.array.nElems; + if (length < 0) + length = JsonGetArraySize(jb->val.binary.data); + } + else + { + length = v.val.object.nPairs; + if (length < 0) + length = JsonGetObjectSize(jb->val.binary.data); + } + + size->type = jbvNumeric; + size->val.numeric = DatumGetNumeric(DirectFunctionCall1( + int4_numeric, Int32GetDatum(length))); + return size; +} + static bool recursiveAny(JsQueryItem *jsq, JsonbValue *jb) { @@ -81,7 +118,7 @@ recursiveAny(JsQueryItem *jsq, JsonbValue *jb) if (r == WJB_VALUE || r == WJB_ELEM) { - res = recursiveExecute(jsq, &v, NULL); + res = recursiveExecute(jsq, &v); if (res == false && v.type == jbvBinary) res = recursiveAny(jsq, &v); @@ -113,7 +150,7 @@ recursiveAll(JsQueryItem *jsq, JsonbValue *jb) if (r == WJB_VALUE || r == WJB_ELEM) { - if ((res = recursiveExecute(jsq, &v, NULL)) == true) + if ((res = recursiveExecute(jsq, &v)) == true) { if (v.type == jbvBinary) res = recursiveAll(jsq, &v); @@ -312,7 +349,7 @@ makeCompare(JsQueryItem *jsq, int32 op, JsonbValue *jb) } static bool -executeExpr(JsQueryItem *jsq, int32 op, JsonbValue *jb, JsQueryItem *jsqLeftArg) +executeExpr(JsQueryItem *jsq, int32 op, JsonbValue *jb) { bool res = false; /* @@ -322,100 +359,37 @@ executeExpr(JsQueryItem *jsq, int32 op, JsonbValue *jb, JsQueryItem *jsqLeftArg) Assert(jsq->type == jqiAny || jsq->type == jqiString || jsq->type == jqiNumeric || jsq->type == jqiNull || jsq->type == jqiBool || jsq->type == jqiArray); - if (jsqLeftArg && jsqLeftArg->type == jqiLength) + switch(op) { - if (JsonbType(jb) == jbvArray || JsonbType(jb) == jbvObject) - { - int32 length; - JsonbIterator *it; - JsonbValue v; - int r; - - it = JsonbIteratorInit(jb->val.binary.data); - r = JsonbIteratorNext(&it, &v, true); - Assert(r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT); - - if (r == WJB_BEGIN_ARRAY) - { - length = v.val.array.nElems; - - if (length < 0) - length = JsonGetArraySize(jb->val.binary.data); - } + case jqiEqual: + if (JsonbType(jb) == jbvArray && jsq->type == jqiArray) + res = checkArrayEquality(jsq, jb); else - { - length = v.val.object.nPairs; - - if (length < 0) - { - length = 0; - - while ((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE) - { - if (r == WJB_KEY) - length++; - } - } - } - - v.type = jbvNumeric; - v.val.numeric = DatumGetNumeric(DirectFunctionCall1(int4_numeric, Int32GetDatum(length))); - - switch(op) - { - case jqiEqual: - case jqiLess: - case jqiGreater: - case jqiLessOrEqual: - case jqiGreaterOrEqual: - res = makeCompare(jsq, op, &v); - break; - case jqiIn: - res = checkScalarIn(jsq, &v); - break; - case jqiOverlap: - case jqiContains: - case jqiContained: - break; - default: - elog(ERROR, "Unknown operation"); - } - } - } - else - { - switch(op) - { - case jqiEqual: - if (JsonbType(jb) == jbvArray && jsq->type == jqiArray) - res = checkArrayEquality(jsq, jb); - else - res = checkScalarEquality(jsq, jb); - break; - case jqiIn: - res = checkScalarIn(jsq, jb); - break; - case jqiOverlap: - case jqiContains: - case jqiContained: - res = executeArrayOp(jsq, op, jb); - break; - case jqiLess: - case jqiGreater: - case jqiLessOrEqual: - case jqiGreaterOrEqual: - res = makeCompare(jsq, op, jb); - break; - default: - elog(ERROR, "Unknown operation"); - } + res = checkScalarEquality(jsq, jb); + break; + case jqiIn: + res = checkScalarIn(jsq, jb); + break; + case jqiOverlap: + case jqiContains: + case jqiContained: + res = executeArrayOp(jsq, op, jb); + break; + case jqiLess: + case jqiGreater: + case jqiLessOrEqual: + case jqiGreaterOrEqual: + res = makeCompare(jsq, op, jb); + break; + default: + elog(ERROR, "Unknown operation"); } return res; } static bool -recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) +recursiveExecute(JsQueryItem *jsq, JsonbValue *jb) { JsQueryItem elem; bool res = false; @@ -425,25 +399,25 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) switch(jsq->type) { case jqiAnd: jsqGetLeftArg(jsq, &elem); - res = recursiveExecute(&elem, jb, jsqLeftArg); + res = recursiveExecute(&elem, jb); if (res == true) { jsqGetRightArg(jsq, &elem); - res = recursiveExecute(&elem, jb, jsqLeftArg); + res = recursiveExecute(&elem, jb); } break; case jqiOr: jsqGetLeftArg(jsq, &elem); - res = recursiveExecute(&elem, jb, jsqLeftArg); + res = recursiveExecute(&elem, jb); if (res == false) { jsqGetRightArg(jsq, &elem); - res = recursiveExecute(&elem, jb, jsqLeftArg); + res = recursiveExecute(&elem, jb); } break; case jqiNot: jsqGetArg(jsq, &elem); - res = !recursiveExecute(&elem, jb, jsqLeftArg); + res = !recursiveExecute(&elem, jb); break; case jqiKey: if (JsonbType(jb) == jbvObject) { @@ -457,7 +431,7 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) if (v != NULL) { jsqGetNext(jsq, &elem); - res = recursiveExecute(&elem, v, NULL); + res = recursiveExecute(&elem, v); pfree(v); } } @@ -480,23 +454,23 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) r = JsonbIteratorNext(&it, &v, true); Assert(r == WJB_ELEM); - res = recursiveExecute(&elem, &v, jsqLeftArg); + res = recursiveExecute(&elem, &v); } else { - res = recursiveExecute(&elem, jb, jsqLeftArg); + res = recursiveExecute(&elem, jb); } break; case jqiAny: jsqGetNext(jsq, &elem); - if (recursiveExecute(&elem, jb, NULL)) + if (recursiveExecute(&elem, jb)) res = true; else if (jb->type == jbvBinary) res = recursiveAny(&elem, jb); break; case jqiAll: jsqGetNext(jsq, &elem); - if ((res = recursiveExecute(&elem, jb, NULL)) == true) + if ((res = recursiveExecute(&elem, jb)) == true) { if (jb->type == jbvBinary) res = recursiveAll(&elem, jb); @@ -520,7 +494,7 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) { if (r == WJB_ELEM) { - res = recursiveExecute(&elem, &v, NULL); + res = recursiveExecute(&elem, &v); if (jsq->type == jqiAnyArray) { @@ -554,7 +528,7 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) { if (r == WJB_VALUE) { - res = recursiveExecute(&elem, &v, NULL); + res = recursiveExecute(&elem, &v); if (jsq->type == jqiAnyKey) { @@ -580,12 +554,19 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) case jqiContained: case jqiOverlap: jsqGetArg(jsq, &elem); - res = executeExpr(&elem, jsq->type, jb, jsqLeftArg); + res = executeExpr(&elem, jsq->type, jb); break; case jqiLength: + { + JsonbValue size; jsqGetNext(jsq, &elem); - res = recursiveExecute(&elem, jb, jsq); + if (JsonbSize(jb, &size)) + { + res = recursiveExecute(&elem, &size); + pfree(size.val.numeric); + } break; + } case jqiIs: if (JsonbType(jb) == jbvScalar) { @@ -631,7 +612,7 @@ jsquery_json_exec(PG_FUNCTION_ARGS) jsqInit(&jsq, jq); - res = recursiveExecute(&jsq, &jbv, NULL); + res = recursiveExecute(&jsq, &jbv); PG_FREE_IF_COPY(jq, 0); PG_FREE_IF_COPY_JSONB(jb, 1); @@ -653,7 +634,7 @@ json_jsquery_exec(PG_FUNCTION_ARGS) jsqInit(&jsq, jq); - res = recursiveExecute(&jsq, &jbv, NULL); + res = recursiveExecute(&jsq, &jbv); PG_FREE_IF_COPY_JSONB(jb, 0); PG_FREE_IF_COPY(jq, 1); diff --git a/sql/jsquery.sql b/sql/jsquery.sql index 309b790..15889d3 100644 --- a/sql/jsquery.sql +++ b/sql/jsquery.sql @@ -283,6 +283,10 @@ select '{"a":[1,2]}' @@ '%.@# in (2, 4)'::jsquery; select '{"a":[1,2]}' @@ '*.@# in (2, 4)'::jsquery; select '{"a":[1,2]}' @@ '*.@# ($ = 4 or $ = 2)'::jsquery; select '{"a":[1,2]}' @@ '@# = 1'::jsquery; +select '[]' @@ '@# is numeric'::jsquery; +select '{}' @@ '@# is numeric'::jsquery; +select '0' @@ '@# is numeric'::jsquery; +select '0' @@ '@# = 1'::jsquery; --ALL select 'a.*: = 4'::jsquery; From c0da8a546d163d42775291b3523cfd39a2634580 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Sat, 12 Nov 2016 22:30:02 +0300 Subject: [PATCH 4/4] Add selectivity estimation --- Makefile | 5 +- expected/jsquery_stats.out | 425 +++++++++++++++++++++++++++++++ jsquery--1.0.sql | 9 +- jsquery_selfuncs.c | 495 +++++++++++++++++++++++++++++++++++++ sql/jsquery_stats.sql | 175 +++++++++++++ 5 files changed, 1105 insertions(+), 4 deletions(-) create mode 100644 expected/jsquery_stats.out create mode 100644 jsquery_selfuncs.c create mode 100644 sql/jsquery_stats.sql diff --git a/Makefile b/Makefile index 2559ad9..9961607 100644 --- a/Makefile +++ b/Makefile @@ -2,12 +2,13 @@ MODULE_big = jsquery OBJS = jsonb_gin_ops.o jsquery_constr.o jsquery_extract.o \ - jsquery_gram.o jsquery_io.o jsquery_op.o jsquery_support.o + jsquery_gram.o jsquery_io.o jsquery_op.o jsquery_selfuncs.o \ + jsquery_support.o EXTENSION = jsquery DATA = jsquery--1.0.sql -REGRESS = jsquery +REGRESS = jsquery jsquery_stats # We need a UTF8 database ENCODING = UTF8 diff --git a/expected/jsquery_stats.out b/expected/jsquery_stats.out new file mode 100644 index 0000000..be1f0ff --- /dev/null +++ b/expected/jsquery_stats.out @@ -0,0 +1,425 @@ +DROP FUNCTION IF EXISTS explain_jsonb(text); + +CREATE OR REPLACE FUNCTION explain_jsonb(sql_query text) +RETURNS TABLE(explain_line json) AS +$$ +BEGIN + RETURN QUERY EXECUTE 'EXPLAIN (ANALYZE, FORMAT json) ' || sql_query; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION get_plan_and_actual_rows(sql_query text) +RETURNS TABLE(plan integer, actual integer) AS +$$ + SELECT + (plan->>'Plan Rows')::integer plan, + (plan->>'Actual Rows')::integer actual + FROM ( + SELECT explain_jsonb(sql_query) #> '{0,Plan,Plans,0}' + ) p(plan) +$$ LANGUAGE sql; + +CREATE OR REPLACE FUNCTION check_estimate(sql_query text, accuracy real) +RETURNS boolean AS +$$ + SELECT plan BETWEEN actual / (1 + accuracy) AND (actual + 1) * (1 + accuracy) + FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x +$$ LANGUAGE sql; + +CREATE OR REPLACE FUNCTION check_estimate2(sql_query text, accuracy real) +RETURNS TABLE(min integer, max integer) AS +$$ + SELECT (actual * (1 - accuracy))::integer, ((actual + 1) * (1 + accuracy))::integer + FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x +$$ LANGUAGE sql; + +DROP TABLE IF EXISTS jsonb_stats_test; +CREATE TABLE jsonb_stats_test(js jsonb); + +INSERT INTO jsonb_stats_test SELECT NULL FROM generate_series(1, 10000); + +INSERT INTO jsonb_stats_test SELECT 'null' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT 'true' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT 'false' FROM generate_series(1, 5000); + +INSERT INTO jsonb_stats_test SELECT '12345' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT (1000 * (i % 10))::text::jsonb FROM generate_series(1, 4000) i; +INSERT INTO jsonb_stats_test SELECT i::text::jsonb FROM generate_series(1, 5000) i; + +INSERT INTO jsonb_stats_test SELECT '"foo"' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT format('"bar%s"', i % 10)::jsonb FROM generate_series(1, 4000) i; +INSERT INTO jsonb_stats_test SELECT format('"baz%s"', i)::jsonb FROM generate_series(1, 5000) i; + +INSERT INTO jsonb_stats_test SELECT '{}' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'bar') FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'baz' || (i % 10)) FROM generate_series(1, 3000) i; +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', i % 10) FROM generate_series(1, 2000) i; +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('"foo \"bar"', i % 10) FROM generate_series(1, 2000) i; + +INSERT INTO jsonb_stats_test SELECT '[]' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT '["foo"]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[12345]' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT '[["foo"]]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[{"key": "foo"}]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, "foo"]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, 12345]' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT '[null, ["foo"]]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, {"key": "foo"}]' FROM generate_series(1, 2000); + +-- Build random variable-length integer arrays +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array()) +FROM generate_series(1, 1000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int)) +FROM generate_series(1, 4000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int, + floor(random() * 10)::int)) +FROM generate_series(1, 3000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int, + floor(random() * 10)::int, + floor(random() * 10)::int)) +FROM generate_series(1, 2000); + + +ANALYZE jsonb_stats_test; + +CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate(sql_condition text, accuracy real) +RETURNS boolean AS +$$ + SELECT check_estimate('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition, accuracy) +$$ LANGUAGE sql; + +DROP FUNCTION IF EXISTS check_jsonb_stats_test_estimate2(text, real); + +CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate2(sql_condition text, accuracy real) +RETURNS TABLE(plan integer, actual integer) AS +$$ + SELECT get_plan_and_actual_rows('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition) +$$ LANGUAGE sql; + + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '* = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate2($$js @@ '% = *'$$, 0.01); + check_jsonb_stats_test_estimate2 +---------------------------------- + (16993,18000) +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = null'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = true'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = false'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS BOOLEAN'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS NUMERIC'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS STRING'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS OBJECT'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS ARRAY'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key = null'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '*.bad_key = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '%.bad_key = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.* = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.# = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.% = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key1.bad_key2 = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ 'foo = *'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS OBJECT'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS OBJECT'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS ARRAY'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS ARRAY'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS STRING'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS STRING'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS NUMERIC'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS NUMERIC'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = *'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = *'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = 3'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = 3'$$, 0.2); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = 100'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = 100'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 0'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 1'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 2'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = "foo"'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#.# = "foo"'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#.% = "foo"'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#.* = "foo"'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array = *'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 0'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 1'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 2'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 3'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 4'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# < 3'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array @> [3]'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate2($$js @@ 'array @> [3,8]'$$, 1); + check_jsonb_stats_test_estimate2 +---------------------------------- + (31,169) +(1 row) + + diff --git a/jsquery--1.0.sql b/jsquery--1.0.sql index 3bf1d9b..f009cd8 100644 --- a/jsquery--1.0.sql +++ b/jsquery--1.0.sql @@ -30,12 +30,17 @@ CREATE FUNCTION json_jsquery_exec(jsonb, jsquery) AS 'MODULE_PATHNAME' LANGUAGE C STRICT IMMUTABLE; +CREATE FUNCTION jsquery_sel(internal, oid, internal, integer) + RETURNS float8 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT; + CREATE OPERATOR @@ ( LEFTARG = jsquery, RIGHTARG = jsonb, PROCEDURE = jsquery_json_exec, COMMUTATOR = '@@', - RESTRICT = contsel, + RESTRICT = jsquery_sel, JOIN = contjoinsel ); @@ -44,7 +49,7 @@ CREATE OPERATOR @@ ( RIGHTARG = jsquery, PROCEDURE = json_jsquery_exec, COMMUTATOR = '@@', - RESTRICT = contsel, + RESTRICT = jsquery_sel, JOIN = contjoinsel ); diff --git a/jsquery_selfuncs.c b/jsquery_selfuncs.c new file mode 100644 index 0000000..3eecb57 --- /dev/null +++ b/jsquery_selfuncs.c @@ -0,0 +1,495 @@ +/*------------------------------------------------------------------------- + * + * jsquery_selfuncs.c + * Functions for selectivity estimation of jsquery operators + * + * Copyright (c) 2016, PostgreSQL Global Development Group + * Author: Nikita Glukhov + * + * IDENTIFICATION + * contrib/jsquery/jsquery_selfuncs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "miscadmin.h" +#include "catalog/pg_operator.h" +#include "utils/json_selfuncs.h" +#include "utils/selfuncs.h" + +#include "jsquery.h" + +#include + +/* Default selectivity constant for "@@" operator */ +#define DEFAULT_JSQUERY_SEL 0.005 + +static Selectivity jsqSelectivity(JsonPathStats stats, + JsQueryItem *jsq, + JsQueryItem *jsqLeftArg); + +static JsonbValue * +JsQueryItemToJsonbValue(JsQueryItem *jsq, JsonbValue *jbv) +{ + switch (jsq->type) + { + case jqiNull: + jbv->type = jbvNull; + break; + + case jqiString: + jbv->type = jbvString; + jbv->val.string.val = jsqGetString(jsq, &jbv->val.string.len); + break; + + case jqiBool: + jbv->type = jbvBool; + jbv->val.boolean = jsqGetBool(jsq); + break; + + case jqiNumeric: + jbv->type = jbvNumeric; + jbv->val.numeric = jsqGetNumeric(jsq); + break; + + default: + elog(ERROR, "Invalid jsquery scalar type %d", jsq->type); + break; + } + + return jbv; +} + +static Selectivity +jsqSelectivityScalarCmp(JsonPathStats stats, JsQueryItem *jsq, int32 op) +{ + JsonbValue jbv; + Oid opr; + + if (jsq->type != jqiNumeric) /* TODO other types support */ + return 0.0; + + if (jsq->type == jqiAny) + return jsonPathStatsGetFreq(stats, DEFAULT_JSQUERY_SEL); + + JsQueryItemToJsonbValue(jsq, &jbv); + + switch (op) + { + case jqiEqual: + opr = JsonbEqOperator; + break; + case jqiLess: + opr = JsonbLtOperator; + break; + case jqiGreater: + opr = JsonbGtOperator; + break; + case jqiLessOrEqual: + opr = JsonbLeOperator; + break; + case jqiGreaterOrEqual: + opr = JsonbGeOperator; + break; + default: + elog(ERROR, "Unknown jsquery operation %d", op); + } + + return jsonSelectivity(stats, JsonbGetDatum(JsonbValueToJsonb(&jbv)), opr); +} + +static Selectivity +jsqSelectivityScalarEq(JsonPathStats stats, JsQueryItem *jsq) +{ + JsonbValue jbv; + Datum jsonb; + + if (jsq->type == jqiAny) + return jsonPathStatsGetFreq(stats, DEFAULT_JSQUERY_SEL); + + jsonb = JsonbGetDatum(JsonbValueToJsonb( + JsQueryItemToJsonbValue(jsq, &jbv))); + + return jsonSelectivity(stats, jsonb, JsonbEqOperator); +} + +static Selectivity +jsqSelectivityArrayEq(JsonPathStats stats, JsQueryItem *jsq) +{ + JsonbParseState *ps = NULL; + JsQueryItem elem; + JsonbValue *jbv; + + if (jsq->type != jqiArray) + return 0.0; + + pushJsonbValue(&ps, WJB_BEGIN_ARRAY, NULL); + + for (jsqIterateInit(jsq); jsqIterateArray(jsq, &elem);) + { + JsonbValue v; + + if (elem.type == jqiAny) + continue; + + pushJsonbValue(&ps, WJB_ELEM, JsQueryItemToJsonbValue(&elem, &v)); + } + + jbv = pushJsonbValue(&ps, WJB_END_ARRAY, NULL); + + return jsonSelectivity(stats, JsonbGetDatum(JsonbValueToJsonb(jbv)), + JsonbEqOperator); +} + +static Selectivity +jsqSelectivityScalarIn(JsonPathStats stats, JsQueryItem *jsq) +{ + JsQueryItem elem; + Selectivity res = 1.0; + + if (jsq->type != jqiArray) + return 0.0; + + for (jsqIterateInit(jsq); res > 0.0 && jsqIterateArray(jsq, &elem);) + res *= 1.0 - jsqSelectivityScalarEq(stats, &elem); + + return 1.0 - res; +} + +static Selectivity +jsqSelectivityArrayAnyAll(JsonPathStats stats, JsQueryItem *jsq, bool any) +{ + JsonPathStats arrstats = jsonPathStatsGetSubpath(stats, NULL, 0); + float4 arrfreq = jsonPathStatsGetTypeFreq(stats, jbvArray, 0.001); + float4 arrsize; + float4 arrsize0freq; + Selectivity res; + + if (!arrstats || arrfreq <= 0.0) + return 0.0; + + arrsize = jsonPathStatsGetAvgArraySize(arrstats); + + if (arrsize == 0.0) + return any ? 0.0 : arrfreq; + + res = jsqSelectivity(arrstats, jsq, NULL); + + res /= arrfreq; + CLAMP_PROBABILITY(res); + + if (any) + res = 1 - res; + + /* TODO use other array size stats */ + arrsize0freq = jsonPathStatsGetArrayIndexSelectivity(arrstats, 0) / arrfreq; + + res = pow(res, arrsize / arrsize0freq); + + if (any) + res = 1 - res; + + res *= arrsize0freq; + + if (!any) + res += 1.0 - arrsize0freq; + + return arrfreq * res; +} + + +static Selectivity +jsqSelectivityKeyAnyAll(JsonPathStats stats, JsQueryItem *jsq, bool any) +{ +/* float4 objfreq = jsonPathStatsGetTypeFreq(stats, jbvObject, 0.0); */ + JsonPathStats keystats = NULL; + Selectivity res = 1.0; + + while (res > 0.0 && jsonPathStatsGetNextKeyStats(stats, &keystats, true)) + { + Selectivity sel = jsqSelectivity(keystats, jsq, NULL); + res *= any ? (1.0 - sel) : sel; + } + + if (any) + res = 1.0 - res; + + return res; +} + +static Selectivity +jsqSelectivityAnyAll(JsonPathStats stats, JsQueryItem *jsq, bool any) +{ + JsQueryItem elem; + Selectivity sel; + Selectivity res; + Selectivity freq = jsonPathStatsGetFreq(stats, 0.0); + + if (freq <= 0.0) + return 0.0; + + jsqGetNext(jsq, &elem); + + sel = jsqSelectivity(stats, &elem, NULL) / freq; + CLAMP_PROBABILITY(sel); + //res = any ? (1.0 - sel) : sel; + res = sel; + + if (res > 0.0 && jsonPathStatsGetTypeFreq(stats, jbvArray, 0.0) > 0.0) + { + sel = jsqSelectivityArrayAnyAll(stats, jsq, any) / freq; + CLAMP_PROBABILITY(sel); + //res *= any ? (1.0 - sel) : sel; + res = any ? res + sel : res * sel; + } + + if (res > 0.0 && jsonPathStatsGetTypeFreq(stats, jbvObject, 0.0) > 0.0) + { + sel = jsqSelectivityKeyAnyAll(stats, jsq, any) / freq; + CLAMP_PROBABILITY(sel); + //res *= any ? (1.0 - sel) : sel; + res = any ? res + sel : res * sel; + } + + //if (any) + // res = 1.0 - res; + CLAMP_PROBABILITY(res); + + return res * freq; +} + +static Selectivity +jsqSelectivityArrayOp(JsonPathStats stats, JsQueryItem *op, JsQueryItem *arg) +{ + JsQueryItem elem; + + if (arg->type != jqiArray) + return 0.0; + + if (op->type == jqiContains || op->type == jqiOverlap) + { + JsQueryItem eq; + Selectivity res = 1.0; + bool contains = op->type == jqiContains; + + eq.type = jqiEqual; + eq.base = arg->base; + + for (jsqIterateInit(arg); res > 0.0 && jsqIterateArray(arg, &elem);) + { + Selectivity sel; + eq.arg = arg->array.arrayPtr[arg->array.current - 1]; /* FIXME */ + sel = jsqSelectivityArrayAnyAll(stats, &eq, true); + res *= contains ? sel : (1.0 - sel); + } + + return contains ? res : (1.0 - res); + } + else + { + JsQueryItem in; + + in.type = jqiIn; + in.base = op->base; + in.arg = op->arg; + + return jsqSelectivityArrayAnyAll(stats, &in, false); + } +} + +static Selectivity +jsqSelectivityExpr(JsonPathStats stats, JsQueryItem *expr, + JsQueryItem *jsqLeftArg) +{ + JsQueryItem arg; + + jsqGetArg(expr, &arg); + + Assert(!jsqGetNext(&arg, NULL)); + Assert(arg.type == jqiAny || arg.type == jqiString || + arg.type == jqiNumeric || arg.type == jqiNull || + arg.type == jqiBool || arg.type == jqiArray); + + switch (expr->type) + { + case jqiEqual: + return arg.type == jqiArray ? jsqSelectivityArrayEq(stats, &arg) + : jsqSelectivityScalarEq(stats, &arg); + + case jqiLess: + case jqiGreater: + case jqiLessOrEqual: + case jqiGreaterOrEqual: + return jsqSelectivityScalarCmp(stats, &arg, expr->type); + + case jqiIn: + return jsqSelectivityScalarIn(stats, &arg); + + case jqiOverlap: + case jqiContains: + case jqiContained: + return jsqSelectivityArrayOp(stats, expr, &arg); + + default: + elog(ERROR, "Unknown operation"); + return DEFAULT_JSQUERY_SEL; + } +} + +static Selectivity +jsqSelectivity(JsonPathStats stats, JsQueryItem *jsq, JsQueryItem *jsqLeftArg) +{ + JsQueryItem elem; + Selectivity res = DEFAULT_JSQUERY_SEL; + + check_stack_depth(); + + switch (jsq->type) + { + case jqiAnd: + jsqGetLeftArg(jsq, &elem); + res = jsqSelectivity(stats, &elem, jsqLeftArg); + if (res > 0.0) + { + jsqGetRightArg(jsq, &elem); + res *= jsqSelectivity(stats, &elem, jsqLeftArg); + } + break; + + case jqiOr: + jsqGetLeftArg(jsq, &elem); + res = jsqSelectivity(stats, &elem, jsqLeftArg); + if (res < 1.0) + { + jsqGetRightArg(jsq, &elem); + res = 1.0 - (1.0 - res) * + (1.0 - jsqSelectivity(stats, &elem, jsqLeftArg)); + } + break; + + case jqiNot: + jsqGetArg(jsq, &elem); + res = 1.0 - jsqSelectivity(stats, &elem, jsqLeftArg); + break; + + case jqiKey: + { + int keylen; + char *key = jsqGetString(jsq, &keylen); + JsonPathStats keystats = jsonPathStatsGetSubpath(stats, + key, keylen); + + if (keystats) + { + jsqGetNext(jsq, &elem); + res = jsqSelectivity(keystats, &elem, NULL); + } + else + res = 0.0; + + break; + } + + case jqiCurrent: + jsqGetNext(jsq, &elem); + res = jsqSelectivity(stats, &elem, jsqLeftArg); + break; + + case jqiAnyKey: + case jqiAllKey: + jsqGetNext(jsq, &elem); + res = jsqSelectivityKeyAnyAll(stats, &elem, jsq->type == jqiAnyKey); + break; + + case jqiAnyArray: + case jqiAllArray: + jsqGetNext(jsq, &elem); + res = jsqSelectivityArrayAnyAll(stats, &elem, + jsq->type == jqiAnyArray); + break; + + case jqiAny: + case jqiAll: + res = jsqSelectivityAnyAll(stats, jsq, jsq->type == jqiAny); + break; + + case jqiEqual: + case jqiIn: + case jqiLess: + case jqiGreater: + case jqiLessOrEqual: + case jqiGreaterOrEqual: + case jqiContains: + case jqiContained: + case jqiOverlap: + res = jsqSelectivityExpr(stats, jsq, jsqLeftArg); + break; + + case jqiLength: + jsqGetNext(jsq, &elem); + stats = jsonPathStatsGetLengthStats(stats); + res = stats ? jsqSelectivity(stats, &elem, jsq) : 0.0; + break; + + case jqiIs: + res = jsonPathStatsGetTypeFreq(stats, jsqGetIsType(jsq), 0.0); + break; + + default: + elog(ERROR,"Wrong state: %d", jsq->type); + } + + CLAMP_PROBABILITY(res); + + return res; +} + +PG_FUNCTION_INFO_V1(jsquery_sel); + +Datum +jsquery_sel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); +/* Oid operator = PG_GETARG_OID(1); */ + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + Selectivity sel = DEFAULT_JSQUERY_SEL; + VariableStatData vardata; + JsonStatData jsdata; + JsonPathStats pstats; + Node *other; + bool varonleft; + JsQuery *jq; + JsQueryItem jsq; + + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8((float8) sel); + + if (!IsA(other, Const) || !varonleft /* FIXME l/r operator */) + goto out; + + if (((Const *) other)->constisnull) + { + sel = 0.0; + goto out; + } + + jq = DatumGetJsQueryP(((Const *) other)->constvalue); + jsqInit(&jsq, jq); + + if (!jsonStatsInit(&jsdata, &vardata)) + goto out; + + pstats = jsonStatsGetPathStatsStr(&jsdata, "$", 1); + + sel = pstats ? jsqSelectivity(pstats, &jsq, NULL) : 0.0; + +out: + ReleaseVariableStats(vardata); + + CLAMP_PROBABILITY(sel); + + PG_RETURN_FLOAT8((float8) sel); +} diff --git a/sql/jsquery_stats.sql b/sql/jsquery_stats.sql new file mode 100644 index 0000000..9140d2b --- /dev/null +++ b/sql/jsquery_stats.sql @@ -0,0 +1,175 @@ +DROP FUNCTION IF EXISTS explain_jsonb(text); + +CREATE OR REPLACE FUNCTION explain_jsonb(sql_query text) +RETURNS TABLE(explain_line json) AS +$$ +BEGIN + RETURN QUERY EXECUTE 'EXPLAIN (ANALYZE, FORMAT json) ' || sql_query; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION get_plan_and_actual_rows(sql_query text) +RETURNS TABLE(plan integer, actual integer) AS +$$ + SELECT + (plan->>'Plan Rows')::integer plan, + (plan->>'Actual Rows')::integer actual + FROM ( + SELECT explain_jsonb(sql_query) #> '{0,Plan,Plans,0}' + ) p(plan) +$$ LANGUAGE sql; + +CREATE OR REPLACE FUNCTION check_estimate(sql_query text, accuracy real) +RETURNS boolean AS +$$ + SELECT plan BETWEEN actual / (1 + accuracy) AND (actual + 1) * (1 + accuracy) + FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x +$$ LANGUAGE sql; + +CREATE OR REPLACE FUNCTION check_estimate2(sql_query text, accuracy real) +RETURNS TABLE(min integer, max integer) AS +$$ + SELECT (actual * (1 - accuracy))::integer, ((actual + 1) * (1 + accuracy))::integer + FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x +$$ LANGUAGE sql; + +DROP TABLE IF EXISTS jsonb_stats_test; +CREATE TABLE jsonb_stats_test(js jsonb); + +INSERT INTO jsonb_stats_test SELECT NULL FROM generate_series(1, 10000); + +INSERT INTO jsonb_stats_test SELECT 'null' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT 'true' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT 'false' FROM generate_series(1, 5000); + +INSERT INTO jsonb_stats_test SELECT '12345' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT (1000 * (i % 10))::text::jsonb FROM generate_series(1, 4000) i; +INSERT INTO jsonb_stats_test SELECT i::text::jsonb FROM generate_series(1, 5000) i; + +INSERT INTO jsonb_stats_test SELECT '"foo"' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT format('"bar%s"', i % 10)::jsonb FROM generate_series(1, 4000) i; +INSERT INTO jsonb_stats_test SELECT format('"baz%s"', i)::jsonb FROM generate_series(1, 5000) i; + +INSERT INTO jsonb_stats_test SELECT '{}' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'bar') FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'baz' || (i % 10)) FROM generate_series(1, 3000) i; +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', i % 10) FROM generate_series(1, 2000) i; +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('"foo \"bar"', i % 10) FROM generate_series(1, 2000) i; + +INSERT INTO jsonb_stats_test SELECT '[]' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT '["foo"]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[12345]' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT '[["foo"]]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[{"key": "foo"}]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, "foo"]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, 12345]' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT '[null, ["foo"]]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, {"key": "foo"}]' FROM generate_series(1, 2000); + +-- Build random variable-length integer arrays +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array()) +FROM generate_series(1, 1000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int)) +FROM generate_series(1, 4000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int, + floor(random() * 10)::int)) +FROM generate_series(1, 3000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int, + floor(random() * 10)::int, + floor(random() * 10)::int)) +FROM generate_series(1, 2000); + + +ANALYZE jsonb_stats_test; + +CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate(sql_condition text, accuracy real) +RETURNS boolean AS +$$ + SELECT check_estimate('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition, accuracy) +$$ LANGUAGE sql; + +DROP FUNCTION IF EXISTS check_jsonb_stats_test_estimate2(text, real); + +CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate2(sql_condition text, accuracy real) +RETURNS TABLE(plan integer, actual integer) AS +$$ + SELECT get_plan_and_actual_rows('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition) +$$ LANGUAGE sql; + + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ '* = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate2($$js @@ '% = *'$$, 0.01); + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = null'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = true'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = false'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS BOOLEAN'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS NUMERIC'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS STRING'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS OBJECT'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS ARRAY'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key = null'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ '*.bad_key = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ '%.bad_key = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.* = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.# = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.% = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key1.bad_key2 = *'$$, 0.01); + +SELECT check_jsonb_stats_test_estimate($$js @@ 'foo = *'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS OBJECT'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS OBJECT'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS ARRAY'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS ARRAY'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS STRING'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS STRING'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS NUMERIC'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS NUMERIC'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = *'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = *'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '# = 3'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = 3'$$, 0.2); +SELECT check_jsonb_stats_test_estimate($$js @@ '# = 100'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = 100'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 0'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 1'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 2'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = "foo"'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#.# = "foo"'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#.% = "foo"'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#.* = "foo"'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array = *'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 0'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 1'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 2'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 3'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 4'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# < 3'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array @> [3]'$$, 0.1); +SELECT check_jsonb_stats_test_estimate2($$js @@ 'array @> [3,8]'$$, 1); +