From e0b080fe05997d11d579660bc38972b484b66c3d Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 23 May 2016 18:40:43 +0300 Subject: [PATCH 001/184] RuntimeInsert node --- src/runtime_insert.c | 1 + src/runtime_insert.h | 47 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 src/runtime_insert.c create mode 100644 src/runtime_insert.h diff --git a/src/runtime_insert.c b/src/runtime_insert.c new file mode 100644 index 0000000000..1f7c513414 --- /dev/null +++ b/src/runtime_insert.c @@ -0,0 +1 @@ +#include "runtime_insert.h" diff --git a/src/runtime_insert.h b/src/runtime_insert.h new file mode 100644 index 0000000000..db6a2cbe70 --- /dev/null +++ b/src/runtime_insert.h @@ -0,0 +1,47 @@ +#ifndef RUNTIME_INSERT_H +#define RUNTIME_INSERT_H + +#include "postgres.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" + +#include "pathman.h" +#include "nodes_common.h" + + +typedef struct +{ + CustomPath cpath; +} RuntimeInsertPath; + +typedef struct +{ + CustomScanState css; +} RuntimeInsertState; + +extern bool pg_pathman_enable_runtime_insert; + +extern CustomScanMethods runtime_insert_plan_methods; +extern CustomExecMethods runtime_insert_exec_methods; + +Path * create_runtimeinsert_path(PlannerInfo *root, AppendPath *inner_append, + ParamPathInfo *param_info, + double sel); + +Plan * create_runtimeinsert_plan(PlannerInfo *root, RelOptInfo *rel, + CustomPath *best_path, List *tlist, + List *clauses, List *custom_plans); + +Node * runtimeinsert_create_scan_state(CustomScan *node); + +void runtimeinsert_begin(CustomScanState *node, EState *estate, int eflags); + +TupleTableSlot * runtimeappend_exec(CustomScanState *node); + +void runtimeinsert_end(CustomScanState *node); + +void runtimeinsert_rescan(CustomScanState *node); + +void runtimeinsert_explain(CustomScanState *node, List *ancestors, ExplainState *es); + +#endif From 719f2f5d1265bc355d0df89b2ce45994caf5b84b Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 25 May 2016 18:03:42 +0300 Subject: [PATCH 002/184] add blank PartitionFilter node (replaces RuntimeInsert), move nodes' static initialization code to init_XXX_static_data() --- Makefile | 2 +- src/hooks.c | 7 +- src/partition_filter.c | 155 +++++++++++++++++++++++++++++++++++++ src/partition_filter.h | 45 +++++++++++ src/pg_pathman.c | 62 +++------------ src/runtime_insert.c | 1 - src/runtime_insert.h | 47 ----------- src/runtime_merge_append.c | 31 ++++++++ src/runtime_merge_append.h | 2 + src/runtimeappend.c | 31 ++++++++ src/runtimeappend.h | 4 + 11 files changed, 283 insertions(+), 104 deletions(-) create mode 100644 src/partition_filter.c create mode 100644 src/partition_filter.h delete mode 100644 src/runtime_insert.c delete mode 100644 src/runtime_insert.h diff --git a/Makefile b/Makefile index edd745c0cb..d403f63321 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # contrib/pg_pathman/Makefile MODULE_big = pg_pathman -OBJS = src/init.o src/utils.o src/runtimeappend.o src/runtime_merge_append.o src/pg_pathman.o src/dsm_array.o \ +OBJS = src/init.o src/utils.o src/partition_filter.o src/runtimeappend.o src/runtime_merge_append.o src/pg_pathman.o src/dsm_array.o \ src/rangeset.o src/pl_funcs.o src/worker.o src/hooks.o src/nodes_common.o $(WIN32RES) EXTENSION = pg_pathman diff --git a/src/hooks.c b/src/hooks.c index ef92ebf06b..fc46afa25e 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -15,6 +15,7 @@ #include "pathman.h" #include "runtimeappend.h" #include "runtime_merge_append.h" +#include "partition_filter.h" #include "utils.h" @@ -346,14 +347,16 @@ void pg_pathman_enable_assign_hook(bool newval, void *extra) /* Return quickly if nothing has changed */ if (newval == (pg_pathman_enable && pg_pathman_enable_runtimeappend && - pg_pathman_enable_runtime_merge_append)) + pg_pathman_enable_runtime_merge_append && + pg_pathman_enable_partition_filter)) return; pg_pathman_enable_runtime_merge_append = newval; pg_pathman_enable_runtimeappend = newval; + pg_pathman_enable_partition_filter = newval; elog(NOTICE, - "RuntimeAppend and RuntimeMergeAppend nodes have been %s", + "RuntimeAppend, RuntimeMergeAppend and PartitionFilter nodes have been %s", newval ? "enabled" : "disabled"); } diff --git a/src/partition_filter.c b/src/partition_filter.c new file mode 100644 index 0000000000..d699896c31 --- /dev/null +++ b/src/partition_filter.c @@ -0,0 +1,155 @@ +#include "partition_filter.h" +#include "utils/guc.h" + + +bool pg_pathman_enable_partition_filter = true; + +CustomScanMethods partition_filter_plan_methods; +CustomExecMethods partition_filter_exec_methods; + + + +void +init_partition_filter_static_data(void) +{ + partition_filter_plan_methods.CustomName = "PartitionFilter"; + partition_filter_plan_methods.CreateCustomScanState = partition_filter_create_scan_state; + + partition_filter_exec_methods.CustomName = "PartitionFilter"; + partition_filter_exec_methods.BeginCustomScan = partition_filter_begin; + partition_filter_exec_methods.ExecCustomScan = partition_filter_exec; + partition_filter_exec_methods.EndCustomScan = partition_filter_end; + partition_filter_exec_methods.ReScanCustomScan = partition_filter_rescan; + partition_filter_exec_methods.MarkPosCustomScan = NULL; + partition_filter_exec_methods.RestrPosCustomScan = NULL; + partition_filter_exec_methods.ExplainCustomScan = partition_filter_explain; + + DefineCustomBoolVariable("pg_pathman.enable_partitionfilter", + "Enables the planner's use of PartitionFilter custom node.", + NULL, + &pg_pathman_enable_partition_filter, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); +} + +Plan * +create_partition_filter_plan(Plan *subplan, PartRelationInfo *prel) +{ + CustomScan *cscan = makeNode(CustomScan); + + cscan->scan.plan.startup_cost = subplan->startup_cost; + cscan->scan.plan.total_cost = subplan->total_cost; + cscan->scan.plan.plan_rows = subplan->plan_rows; + cscan->scan.plan.plan_width = subplan->plan_width; + + cscan->scan.plan.qual = NIL; + + cscan->custom_plans = list_make1(subplan); + + cscan->scan.plan.targetlist = subplan->targetlist; + + /* No relation will be scanned */ + cscan->scan.scanrelid = 0; + cscan->custom_scan_tlist = subplan->targetlist; + + cscan->methods = &partition_filter_plan_methods; + + return &cscan->scan.plan; +} + +Node * +partition_filter_create_scan_state(CustomScan *node) +{ + PartitionFilterState *state = palloc0(sizeof(PartitionFilterState)); + + NodeSetTag(state, T_CustomScanState); + + state->css.flags = node->flags; + state->css.methods = &partition_filter_exec_methods; + + state->subplan = (Plan *) linitial(node->custom_plans); + + return (Node *) state; +} + +void +partition_filter_begin(CustomScanState *node, EState *estate, int eflags) +{ + PartitionFilterState *state = (PartitionFilterState *) node; + + node->custom_ps = list_make1(ExecInitNode(state->subplan, estate, eflags)); + + state->firstStart = true; +} + +TupleTableSlot * +partition_filter_exec(CustomScanState *node) +{ + PartitionFilterState *state = (PartitionFilterState *) node; + + EState *estate = node->ss.ps.state; + PlanState *child_ps = (PlanState *) linitial(node->custom_ps); + TupleTableSlot *slot; + + if (state->firstStart) + state->savedRelInfo = estate->es_result_relation_info; + + slot = ExecProcNode(child_ps); + + if (!TupIsNull(slot)) + { + /* estate->es_result_relation_info = NULL; */ + + return slot; + } + + return NULL; +} + +void +partition_filter_end(CustomScanState *node) +{ + Assert(list_length(node->custom_ps) == 1); + + ExecEndNode((PlanState *) linitial(node->custom_ps)); +} + +void +partition_filter_rescan(CustomScanState *node) +{ + Assert(list_length(node->custom_ps) == 1); + + ExecReScan((PlanState *) linitial(node->custom_ps)); +} + +void +partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *es) +{ + /* Nothing to do here now */ +} + +/* Add proxy PartitionFilter nodes to subplans of ModifyTable node */ +void +add_partition_filters(List *rtable, ModifyTable *modify_table) +{ + ListCell *lc1, + *lc2; + + Assert(IsA(modify_table, ModifyTable)); + + if (!pg_pathman_enable_partition_filter) + return; + + forboth (lc1, modify_table->plans, lc2, modify_table->resultRelations) + { + Index rindex = lfirst_int(lc2); + PartRelationInfo *prel = get_pathman_relation_info(getrelid(rindex, rtable), + NULL); + if (prel) + lfirst(lc1) = create_partition_filter_plan((Plan *) lfirst(lc1), prel); + } +} diff --git a/src/partition_filter.h b/src/partition_filter.h new file mode 100644 index 0000000000..0db5ea316c --- /dev/null +++ b/src/partition_filter.h @@ -0,0 +1,45 @@ +#ifndef RUNTIME_INSERT_H +#define RUNTIME_INSERT_H + +#include "postgres.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" + +#include "pathman.h" +#include "nodes_common.h" + + +typedef struct +{ + CustomScanState css; + bool firstStart; + ResultRelInfo *savedRelInfo; + Plan *subplan; +} PartitionFilterState; + + +extern bool pg_pathman_enable_partition_filter; + +extern CustomScanMethods partition_filter_plan_methods; +extern CustomExecMethods partition_filter_exec_methods; + + +void add_partition_filters(List *rtable, ModifyTable *modify_table); + +void init_partition_filter_static_data(void); + +Plan * create_partition_filter_plan(Plan *subplan, PartRelationInfo *prel); + +Node * partition_filter_create_scan_state(CustomScan *node); + +void partition_filter_begin(CustomScanState *node, EState *estate, int eflags); + +TupleTableSlot * partition_filter_exec(CustomScanState *node); + +void partition_filter_end(CustomScanState *node); + +void partition_filter_rescan(CustomScanState *node); + +void partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *es); + +#endif diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 6cab004069..8ad95bf89f 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -42,6 +42,7 @@ #include "hooks.h" #include "runtimeappend.h" #include "runtime_merge_append.h" +#include "partition_filter.h" PG_MODULE_MAGIC; @@ -159,37 +160,9 @@ _PG_init(void) planner_hook_original = planner_hook; planner_hook = pathman_planner_hook; - /* RuntimeAppend */ - runtimeappend_path_methods.CustomName = "RuntimeAppend"; - runtimeappend_path_methods.PlanCustomPath = create_runtimeappend_plan; - - runtimeappend_plan_methods.CustomName = "RuntimeAppend"; - runtimeappend_plan_methods.CreateCustomScanState = runtimeappend_create_scan_state; - - runtimeappend_exec_methods.CustomName = "RuntimeAppend"; - runtimeappend_exec_methods.BeginCustomScan = runtimeappend_begin; - runtimeappend_exec_methods.ExecCustomScan = runtimeappend_exec; - runtimeappend_exec_methods.EndCustomScan = runtimeappend_end; - runtimeappend_exec_methods.ReScanCustomScan = runtimeappend_rescan; - runtimeappend_exec_methods.MarkPosCustomScan = NULL; - runtimeappend_exec_methods.RestrPosCustomScan = NULL; - runtimeappend_exec_methods.ExplainCustomScan = runtimeappend_explain; - - /* RuntimeMergeAppend */ - runtime_merge_append_path_methods.CustomName = "RuntimeMergeAppend"; - runtime_merge_append_path_methods.PlanCustomPath = create_runtimemergeappend_plan; - - runtime_merge_append_plan_methods.CustomName = "RuntimeMergeAppend"; - runtime_merge_append_plan_methods.CreateCustomScanState = runtimemergeappend_create_scan_state; - - runtime_merge_append_exec_methods.CustomName = "RuntimeMergeAppend"; - runtime_merge_append_exec_methods.BeginCustomScan = runtimemergeappend_begin; - runtime_merge_append_exec_methods.ExecCustomScan = runtimemergeappend_exec; - runtime_merge_append_exec_methods.EndCustomScan = runtimemergeappend_end; - runtime_merge_append_exec_methods.ReScanCustomScan = runtimemergeappend_rescan; - runtime_merge_append_exec_methods.MarkPosCustomScan = NULL; - runtime_merge_append_exec_methods.RestrPosCustomScan = NULL; - runtime_merge_append_exec_methods.ExplainCustomScan = runtimemergeappend_explain; + init_runtimeappend_static_data(); + init_runtime_merge_append_static_data(); + init_partition_filter_static_data(); DefineCustomBoolVariable("pg_pathman.enable", "Enables pg_pathman's optimizations during the planner stage", @@ -201,28 +174,6 @@ _PG_init(void) NULL, pg_pathman_enable_assign_hook, NULL); - - DefineCustomBoolVariable("pg_pathman.enable_runtimeappend", - "Enables the planner's use of RuntimeAppend custom node.", - NULL, - &pg_pathman_enable_runtimeappend, - true, - PGC_USERSET, - 0, - NULL, - NULL, - NULL); - - DefineCustomBoolVariable("pg_pathman.enable_runtimemergeappend", - "Enables the planner's use of RuntimeMergeAppend custom node.", - NULL, - &pg_pathman_enable_runtime_merge_append, - true, - PGC_USERSET, - 0, - NULL, - NULL, - NULL); } PartRelationInfo * @@ -301,6 +252,11 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) disable_inheritance_subselect(parse); handle_modification_query(parse); break; + case CMD_INSERT: + result = standard_planner(parse, cursorOptions, boundParams); + add_partition_filters(result->rtable, + (ModifyTable *) result->planTree); + return result; default: break; } diff --git a/src/runtime_insert.c b/src/runtime_insert.c deleted file mode 100644 index 1f7c513414..0000000000 --- a/src/runtime_insert.c +++ /dev/null @@ -1 +0,0 @@ -#include "runtime_insert.h" diff --git a/src/runtime_insert.h b/src/runtime_insert.h deleted file mode 100644 index db6a2cbe70..0000000000 --- a/src/runtime_insert.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef RUNTIME_INSERT_H -#define RUNTIME_INSERT_H - -#include "postgres.h" -#include "optimizer/paths.h" -#include "optimizer/pathnode.h" - -#include "pathman.h" -#include "nodes_common.h" - - -typedef struct -{ - CustomPath cpath; -} RuntimeInsertPath; - -typedef struct -{ - CustomScanState css; -} RuntimeInsertState; - -extern bool pg_pathman_enable_runtime_insert; - -extern CustomScanMethods runtime_insert_plan_methods; -extern CustomExecMethods runtime_insert_exec_methods; - -Path * create_runtimeinsert_path(PlannerInfo *root, AppendPath *inner_append, - ParamPathInfo *param_info, - double sel); - -Plan * create_runtimeinsert_plan(PlannerInfo *root, RelOptInfo *rel, - CustomPath *best_path, List *tlist, - List *clauses, List *custom_plans); - -Node * runtimeinsert_create_scan_state(CustomScan *node); - -void runtimeinsert_begin(CustomScanState *node, EState *estate, int eflags); - -TupleTableSlot * runtimeappend_exec(CustomScanState *node); - -void runtimeinsert_end(CustomScanState *node); - -void runtimeinsert_rescan(CustomScanState *node); - -void runtimeinsert_explain(CustomScanState *node, List *ancestors, ExplainState *es); - -#endif diff --git a/src/runtime_merge_append.c b/src/runtime_merge_append.c index 239f428aa5..9ed120a09b 100644 --- a/src/runtime_merge_append.c +++ b/src/runtime_merge_append.c @@ -21,6 +21,7 @@ #include "miscadmin.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/guc.h" #include "lib/binaryheap.h" @@ -169,6 +170,36 @@ unpack_runtimemergeappend_private(RuntimeMergeAppendState *scan_state, FillStateField(nullsFirst, bool, lfirst_int); } +void +init_runtime_merge_append_static_data(void) +{ + runtime_merge_append_path_methods.CustomName = "RuntimeMergeAppend"; + runtime_merge_append_path_methods.PlanCustomPath = create_runtimemergeappend_plan; + + runtime_merge_append_plan_methods.CustomName = "RuntimeMergeAppend"; + runtime_merge_append_plan_methods.CreateCustomScanState = runtimemergeappend_create_scan_state; + + runtime_merge_append_exec_methods.CustomName = "RuntimeMergeAppend"; + runtime_merge_append_exec_methods.BeginCustomScan = runtimemergeappend_begin; + runtime_merge_append_exec_methods.ExecCustomScan = runtimemergeappend_exec; + runtime_merge_append_exec_methods.EndCustomScan = runtimemergeappend_end; + runtime_merge_append_exec_methods.ReScanCustomScan = runtimemergeappend_rescan; + runtime_merge_append_exec_methods.MarkPosCustomScan = NULL; + runtime_merge_append_exec_methods.RestrPosCustomScan = NULL; + runtime_merge_append_exec_methods.ExplainCustomScan = runtimemergeappend_explain; + + DefineCustomBoolVariable("pg_pathman.enable_runtimemergeappend", + "Enables the planner's use of RuntimeMergeAppend custom node.", + NULL, + &pg_pathman_enable_runtime_merge_append, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); +} + Path * create_runtimemergeappend_path(PlannerInfo *root, AppendPath *inner_append, diff --git a/src/runtime_merge_append.h b/src/runtime_merge_append.h index 2a657c0643..3fc7f4c867 100644 --- a/src/runtime_merge_append.h +++ b/src/runtime_merge_append.h @@ -49,6 +49,8 @@ extern CustomScanMethods runtime_merge_append_plan_methods; extern CustomExecMethods runtime_merge_append_exec_methods; +void init_runtime_merge_append_static_data(void); + Path * create_runtimemergeappend_path(PlannerInfo *root, AppendPath *inner_append, ParamPathInfo *param_info, double sel); diff --git a/src/runtimeappend.c b/src/runtimeappend.c index 838d887ae1..900b8240ad 100644 --- a/src/runtimeappend.c +++ b/src/runtimeappend.c @@ -9,6 +9,7 @@ */ #include "postgres.h" #include "utils/memutils.h" +#include "utils/guc.h" #include "runtimeappend.h" #include "pathman.h" @@ -20,6 +21,36 @@ CustomScanMethods runtimeappend_plan_methods; CustomExecMethods runtimeappend_exec_methods; +void +init_runtimeappend_static_data(void) +{ + runtimeappend_path_methods.CustomName = "RuntimeAppend"; + runtimeappend_path_methods.PlanCustomPath = create_runtimeappend_plan; + + runtimeappend_plan_methods.CustomName = "RuntimeAppend"; + runtimeappend_plan_methods.CreateCustomScanState = runtimeappend_create_scan_state; + + runtimeappend_exec_methods.CustomName = "RuntimeAppend"; + runtimeappend_exec_methods.BeginCustomScan = runtimeappend_begin; + runtimeappend_exec_methods.ExecCustomScan = runtimeappend_exec; + runtimeappend_exec_methods.EndCustomScan = runtimeappend_end; + runtimeappend_exec_methods.ReScanCustomScan = runtimeappend_rescan; + runtimeappend_exec_methods.MarkPosCustomScan = NULL; + runtimeappend_exec_methods.RestrPosCustomScan = NULL; + runtimeappend_exec_methods.ExplainCustomScan = runtimeappend_explain; + + DefineCustomBoolVariable("pg_pathman.enable_runtimeappend", + "Enables the planner's use of RuntimeAppend custom node.", + NULL, + &pg_pathman_enable_runtimeappend, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); +} + Path * create_runtimeappend_path(PlannerInfo *root, AppendPath *inner_append, diff --git a/src/runtimeappend.h b/src/runtimeappend.h index 9e37c4ba6e..d2ca2b6f55 100644 --- a/src/runtimeappend.h +++ b/src/runtimeappend.h @@ -53,12 +53,16 @@ typedef struct TupleTableSlot *slot; } RuntimeAppendState; + extern bool pg_pathman_enable_runtimeappend; extern CustomPathMethods runtimeappend_path_methods; extern CustomScanMethods runtimeappend_plan_methods; extern CustomExecMethods runtimeappend_exec_methods; + +void init_runtimeappend_static_data(void); + Path * create_runtimeappend_path(PlannerInfo *root, AppendPath *inner_append, ParamPathInfo *param_info, double sel); From 4eb7f3d3ddbddac6396b988f34c2bcec65f6811b Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 26 May 2016 17:27:27 +0300 Subject: [PATCH 003/184] PartitionFilter can be executed --- src/partition_filter.c | 91 ++++++++++++++++++++++++++++++++++++------ src/partition_filter.h | 8 +++- 2 files changed, 85 insertions(+), 14 deletions(-) diff --git a/src/partition_filter.c b/src/partition_filter.c index d699896c31..5cc82463ea 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -1,5 +1,6 @@ #include "partition_filter.h" #include "utils/guc.h" +#include "nodes/nodeFuncs.h" bool pg_pathman_enable_partition_filter = true; @@ -8,6 +9,7 @@ CustomScanMethods partition_filter_plan_methods; CustomExecMethods partition_filter_exec_methods; +static List * pfilter_build_tlist(List *tlist); void init_partition_filter_static_data(void) @@ -37,7 +39,7 @@ init_partition_filter_static_data(void) } Plan * -create_partition_filter_plan(Plan *subplan, PartRelationInfo *prel) +make_partition_filter_plan(Plan *subplan, PartRelationInfo *prel) { CustomScan *cscan = makeNode(CustomScan); @@ -46,17 +48,17 @@ create_partition_filter_plan(Plan *subplan, PartRelationInfo *prel) cscan->scan.plan.plan_rows = subplan->plan_rows; cscan->scan.plan.plan_width = subplan->plan_width; - cscan->scan.plan.qual = NIL; - + cscan->methods = &partition_filter_plan_methods; cscan->custom_plans = list_make1(subplan); - cscan->scan.plan.targetlist = subplan->targetlist; + cscan->scan.plan.targetlist = pfilter_build_tlist(subplan->targetlist); /* No relation will be scanned */ cscan->scan.scanrelid = 0; cscan->custom_scan_tlist = subplan->targetlist; - cscan->methods = &partition_filter_plan_methods; + /* Save partitioned table's Oid */ + cscan->custom_private = list_make1_int(prel->key.relid); return &cscan->scan.plan; } @@ -64,14 +66,20 @@ create_partition_filter_plan(Plan *subplan, PartRelationInfo *prel) Node * partition_filter_create_scan_state(CustomScan *node) { - PartitionFilterState *state = palloc0(sizeof(PartitionFilterState)); + PartitionFilterState *state = palloc0(sizeof(PartitionFilterState)); NodeSetTag(state, T_CustomScanState); state->css.flags = node->flags; state->css.methods = &partition_filter_exec_methods; + /* Extract necessary variables */ state->subplan = (Plan *) linitial(node->custom_plans); + state->partitioned_table = linitial_int(node->custom_private); + + /* Prepare dummy Const node */ + NodeSetTag(&state->temp_const, T_Const); + state->temp_const.location = -1; return (Node *) state; } @@ -79,21 +87,25 @@ partition_filter_create_scan_state(CustomScan *node) void partition_filter_begin(CustomScanState *node, EState *estate, int eflags) { - PartitionFilterState *state = (PartitionFilterState *) node; + PartitionFilterState *state = (PartitionFilterState *) node; node->custom_ps = list_make1(ExecInitNode(state->subplan, estate, eflags)); - + state->prel = get_pathman_relation_info(state->partitioned_table, NULL); state->firstStart = true; } TupleTableSlot * partition_filter_exec(CustomScanState *node) { - PartitionFilterState *state = (PartitionFilterState *) node; +#define CopyToTempConst(const_field, attr_field) \ + ( state->temp_const.const_field = \ + slot->tts_tupleDescriptor->attrs[attnum - 1]->attr_field ) - EState *estate = node->ss.ps.state; - PlanState *child_ps = (PlanState *) linitial(node->custom_ps); - TupleTableSlot *slot; + PartitionFilterState *state = (PartitionFilterState *) node; + + EState *estate = node->ss.ps.state; + PlanState *child_ps = (PlanState *) linitial(node->custom_ps); + TupleTableSlot *slot; if (state->firstStart) state->savedRelInfo = estate->es_result_relation_info; @@ -102,6 +114,27 @@ partition_filter_exec(CustomScanState *node) if (!TupIsNull(slot)) { + WalkerContext wcxt; + bool isnull; + AttrNumber attnum = state->prel->attnum; + Datum value = slot_getattr(slot, attnum, &isnull); + + state->temp_const.constvalue = value; + state->temp_const.constisnull = isnull; + + CopyToTempConst(consttype, atttypid); + CopyToTempConst(consttypmod, atttypmod); + CopyToTempConst(constcollid, attcollation); + CopyToTempConst(constlen, attlen); + CopyToTempConst(constbyval, attbyval); + + wcxt.prel = state->prel; + wcxt.econtext = NULL; + wcxt.hasLeast = false; + wcxt.hasGreatest = false; + + walk_expr_tree((Expr *) &state->temp_const, &wcxt); + /* estate->es_result_relation_info = NULL; */ return slot; @@ -132,6 +165,38 @@ partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *e /* Nothing to do here now */ } +/* + * Build partition filter's target list pointing to subplan tuple's elements + */ +static List * +pfilter_build_tlist(List *tlist) +{ + List *result_tlist = NIL; + ListCell *lc; + int i = 1; + + foreach (lc, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + Var *var = makeVar(INDEX_VAR, /* point to subplan's elements */ + i, /* direct attribute mapping */ + exprType((Node *) tle->expr), + exprTypmod((Node *) tle->expr), + exprCollation((Node *) tle->expr), + 0); + + result_tlist = lappend(result_tlist, + makeTargetEntry((Expr *) var, + i, + NULL, + tle->resjunk)); + i++; /* next resno */ + } + + return result_tlist; +} + /* Add proxy PartitionFilter nodes to subplans of ModifyTable node */ void add_partition_filters(List *rtable, ModifyTable *modify_table) @@ -150,6 +215,6 @@ add_partition_filters(List *rtable, ModifyTable *modify_table) PartRelationInfo *prel = get_pathman_relation_info(getrelid(rindex, rtable), NULL); if (prel) - lfirst(lc1) = create_partition_filter_plan((Plan *) lfirst(lc1), prel); + lfirst(lc1) = make_partition_filter_plan((Plan *) lfirst(lc1), prel); } } diff --git a/src/partition_filter.h b/src/partition_filter.h index 0db5ea316c..3bf042cd92 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -14,7 +14,13 @@ typedef struct CustomScanState css; bool firstStart; ResultRelInfo *savedRelInfo; + + Oid partitioned_table; + PartRelationInfo *prel; + Plan *subplan; + Const temp_const; /* temporary const for expr walker */ + } PartitionFilterState; @@ -28,7 +34,7 @@ void add_partition_filters(List *rtable, ModifyTable *modify_table); void init_partition_filter_static_data(void); -Plan * create_partition_filter_plan(Plan *subplan, PartRelationInfo *prel); +Plan * make_partition_filter_plan(Plan *subplan, PartRelationInfo *prel); Node * partition_filter_create_scan_state(CustomScan *node); From 1b2bb7c25fc523ebe86c08153b13d176354de6f9 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 26 May 2016 19:01:42 +0300 Subject: [PATCH 004/184] initial release of PartitionFilter node --- src/nodes_common.c | 2 +- src/nodes_common.h | 2 + src/partition_filter.c | 94 +++++++++-- src/partition_filter.h | 14 +- src/pathman.h | 4 + src/pg_pathman.c | 363 ++++++++++++++++++++++++----------------- 6 files changed, 314 insertions(+), 165 deletions(-) diff --git a/src/nodes_common.c b/src/nodes_common.c index a66587f831..c81d1b8e8f 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -105,7 +105,7 @@ select_required_plans(HTAB *children_table, Oid *parts, int nparts, int *nres) } /* Transform partition ranges into plain array of partition Oids */ -static Oid * +Oid * get_partition_oids(List *ranges, int *n, PartRelationInfo *prel) { ListCell *range_cell; diff --git a/src/nodes_common.h b/src/nodes_common.h index 82d4bb9d88..fb268b1b29 100644 --- a/src/nodes_common.h +++ b/src/nodes_common.h @@ -51,6 +51,8 @@ clear_plan_states(CustomScanState *scan_state) } } +Oid * get_partition_oids(List *ranges, int *n, PartRelationInfo *prel); + Path * create_append_path_common(PlannerInfo *root, AppendPath *inner_append, ParamPathInfo *param_info, diff --git a/src/partition_filter.c b/src/partition_filter.c index 5cc82463ea..481136d648 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -10,6 +10,7 @@ CustomExecMethods partition_filter_exec_methods; static List * pfilter_build_tlist(List *tlist); +static ResultRelInfo * getResultRelInfo(Oid partid, PartitionFilterState *state); void init_partition_filter_static_data(void) @@ -39,7 +40,8 @@ init_partition_filter_static_data(void) } Plan * -make_partition_filter_plan(Plan *subplan, PartRelationInfo *prel) +make_partition_filter_plan(Plan *subplan, Oid partitioned_table, + OnConflictAction conflict_action) { CustomScan *cscan = makeNode(CustomScan); @@ -57,8 +59,9 @@ make_partition_filter_plan(Plan *subplan, PartRelationInfo *prel) cscan->scan.scanrelid = 0; cscan->custom_scan_tlist = subplan->targetlist; - /* Save partitioned table's Oid */ - cscan->custom_private = list_make1_int(prel->key.relid); + /* Pack partitioned table's Oid and conflict_action */ + cscan->custom_private = list_make2_int(partitioned_table, + conflict_action); return &cscan->scan.plan; } @@ -76,6 +79,11 @@ partition_filter_create_scan_state(CustomScan *node) /* Extract necessary variables */ state->subplan = (Plan *) linitial(node->custom_plans); state->partitioned_table = linitial_int(node->custom_private); + state->onConflictAction = lsecond_int(node->custom_private); + + /* Check boundaries */ + Assert(state->onConflictAction >= ONCONFLICT_NONE || + state->onConflictAction <= ONCONFLICT_UPDATE); /* Prepare dummy Const node */ NodeSetTag(&state->temp_const, T_Const); @@ -89,9 +97,21 @@ partition_filter_begin(CustomScanState *node, EState *estate, int eflags) { PartitionFilterState *state = (PartitionFilterState *) node; + HTAB *result_rels_table; + HASHCTL *result_rels_table_config = &state->result_rels_table_config; + node->custom_ps = list_make1(ExecInitNode(state->subplan, estate, eflags)); state->prel = get_pathman_relation_info(state->partitioned_table, NULL); - state->firstStart = true; + + memset(result_rels_table_config, 0, sizeof(HASHCTL)); + result_rels_table_config->keysize = sizeof(Oid); + result_rels_table_config->entrysize = sizeof(ResultRelInfoHandle); + + result_rels_table = hash_create("ResultRelInfo storage", 10, + result_rels_table_config, + HASH_ELEM | HASH_BLOBS); + + state->result_rels_table = result_rels_table; } TupleTableSlot * @@ -107,14 +127,15 @@ partition_filter_exec(CustomScanState *node) PlanState *child_ps = (PlanState *) linitial(node->custom_ps); TupleTableSlot *slot; - if (state->firstStart) - state->savedRelInfo = estate->es_result_relation_info; - slot = ExecProcNode(child_ps); if (!TupIsNull(slot)) { WalkerContext wcxt; + List *ranges; + int nparts; + Oid *parts; + bool isnull; AttrNumber attnum = state->prel->attnum; Datum value = slot_getattr(slot, attnum, &isnull); @@ -133,9 +154,11 @@ partition_filter_exec(CustomScanState *node) wcxt.hasLeast = false; wcxt.hasGreatest = false; - walk_expr_tree((Expr *) &state->temp_const, &wcxt); + ranges = walk_expr_tree((Expr *) &state->temp_const, &wcxt)->rangeset; + parts = get_partition_oids(ranges, &nparts, state->prel); + Assert(nparts == 1); /* there has to be only 1 partition */ - /* estate->es_result_relation_info = NULL; */ + estate->es_result_relation_info = getResultRelInfo(parts[0], state); return slot; } @@ -146,8 +169,22 @@ partition_filter_exec(CustomScanState *node) void partition_filter_end(CustomScanState *node) { - Assert(list_length(node->custom_ps) == 1); + PartitionFilterState *state = (PartitionFilterState *) node; + + HASH_SEQ_STATUS stat; + ResultRelInfoHandle *rri_handle; + + hash_seq_init(&stat, state->result_rels_table); + while ((rri_handle = (ResultRelInfoHandle *) hash_seq_search(&stat)) != NULL) + { + ExecCloseIndices(rri_handle->resultRelInfo); + heap_close(rri_handle->resultRelInfo->ri_RelationDesc, + RowExclusiveLock); + } + + hash_destroy(state->result_rels_table); + Assert(list_length(node->custom_ps) == 1); ExecEndNode((PlanState *) linitial(node->custom_ps)); } @@ -165,6 +202,34 @@ partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *e /* Nothing to do here now */ } + +static ResultRelInfo * +getResultRelInfo(Oid partid, PartitionFilterState *state) +{ + ResultRelInfoHandle *resultRelInfoHandle; + bool found; + + resultRelInfoHandle = hash_search(state->result_rels_table, + (const void *) &partid, + HASH_ENTER, &found); + + if (!found) + { + ResultRelInfo *resultRelInfo = (ResultRelInfo *) palloc(sizeof(ResultRelInfo)); + InitResultRelInfo(resultRelInfo, + heap_open(partid, RowExclusiveLock), + 0, + state->css.ss.ps.state->es_instrument); + + ExecOpenIndices(resultRelInfo, state->onConflictAction != ONCONFLICT_NONE); + + resultRelInfoHandle->partid = partid; + resultRelInfoHandle->resultRelInfo = resultRelInfo; + } + + return resultRelInfoHandle->resultRelInfo; +} + /* * Build partition filter's target list pointing to subplan tuple's elements */ @@ -212,9 +277,12 @@ add_partition_filters(List *rtable, ModifyTable *modify_table) forboth (lc1, modify_table->plans, lc2, modify_table->resultRelations) { Index rindex = lfirst_int(lc2); - PartRelationInfo *prel = get_pathman_relation_info(getrelid(rindex, rtable), - NULL); + Oid relid = getrelid(rindex, rtable); + PartRelationInfo *prel = get_pathman_relation_info(relid, NULL); + if (prel) - lfirst(lc1) = make_partition_filter_plan((Plan *) lfirst(lc1), prel); + lfirst(lc1) = make_partition_filter_plan((Plan *) lfirst(lc1), + relid, + modify_table->onConflictAction); } } diff --git a/src/partition_filter.h b/src/partition_filter.h index 3bf042cd92..f86a31eec8 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -9,18 +9,25 @@ #include "nodes_common.h" +typedef struct +{ + Oid partid; + ResultRelInfo *resultRelInfo; +} ResultRelInfoHandle; + typedef struct { CustomScanState css; - bool firstStart; - ResultRelInfo *savedRelInfo; Oid partitioned_table; PartRelationInfo *prel; + OnConflictAction onConflictAction; Plan *subplan; Const temp_const; /* temporary const for expr walker */ + HTAB *result_rels_table; + HASHCTL result_rels_table_config; } PartitionFilterState; @@ -34,7 +41,8 @@ void add_partition_filters(List *rtable, ModifyTable *modify_table); void init_partition_filter_static_data(void); -Plan * make_partition_filter_plan(Plan *subplan, PartRelationInfo *prel); +Plan * make_partition_filter_plan(Plan *subplan, Oid partitioned_table, + OnConflictAction conflict_action); Node * partition_filter_create_scan_state(CustomScan *node); diff --git a/src/pathman.h b/src/pathman.h index 3c8480f8f9..5c0df3b783 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -227,6 +227,10 @@ typedef struct ExprContext *econtext; } WalkerContext; +bool search_range_partition(Datum value, + const PartRelationInfo *prel, const RangeRelation *rangerel, + int strategy, FmgrInfo *cmp_func, WrapperNode *result); + WrapperNode *walk_expr_tree(Expr *expr, WalkerContext *context); void finish_least_greatest(WrapperNode *wrap, WalkerContext *context); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 8ad95bf89f..d1a2091dfc 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -82,6 +82,7 @@ static Datum increase_hashable_value(const PartRelationInfo *prel, Datum value); static Datum decrease_hashable_value(const PartRelationInfo *prel, Datum value); static int make_hash(const PartRelationInfo *prel, int value); static void handle_binary_opexpr(WalkerContext *context, WrapperNode *result, const Var *v, const Const *c); +static WrapperNode *handle_const(const Const *c, WalkerContext *context); static WrapperNode *handle_opexpr(const OpExpr *expr, WalkerContext *context); static WrapperNode *handle_boolexpr(const BoolExpr *expr, WalkerContext *context); static WrapperNode *handle_arrexpr(const ScalarArrayOpExpr *expr, WalkerContext *context); @@ -827,6 +828,10 @@ walk_expr_tree(Expr *expr, WalkerContext *context) switch (expr->type) { + /* Useful for INSERT optimization */ + case T_Const: + return handle_const((Const *) expr, context); + /* AND, OR, NOT expressions */ case T_BoolExpr: boolexpr = (BoolExpr *) expr; @@ -918,6 +923,167 @@ decrease_hashable_value(const PartRelationInfo *prel, Datum value) } } +bool +search_range_partition(Datum value, + const PartRelationInfo *prel, const RangeRelation *rangerel, + int strategy, FmgrInfo *cmp_func, WrapperNode *result) +{ + if (rangerel != NULL) + { + RangeEntry *re; + bool lossy = false, + is_less, + is_greater; +#ifdef USE_ASSERT_CHECKING + bool found = false; + int counter = 0; +#endif + int i, + startidx = 0, + cmp_min, + cmp_max, + endidx = rangerel->ranges.length - 1; + RangeEntry *ranges = dsm_array_get_pointer(&rangerel->ranges); + bool byVal = rangerel->by_val; + + /* Check boundaries */ + if (rangerel->ranges.length == 0) + { + result->rangeset = NIL; + return true; + } + else + { + Assert(cmp_func); + + /* Corner cases */ + cmp_min = FunctionCall2(cmp_func, value, + PATHMAN_GET_DATUM(ranges[0].min, byVal)), + cmp_max = FunctionCall2(cmp_func, value, + PATHMAN_GET_DATUM(ranges[rangerel->ranges.length - 1].max, byVal)); + + if ((cmp_min < 0 && + (strategy == BTLessEqualStrategyNumber || + strategy == BTEqualStrategyNumber)) || + (cmp_min <= 0 && strategy == BTLessStrategyNumber)) + { + result->rangeset = NIL; + return true; + } + + if (cmp_max >= 0 && (strategy == BTGreaterEqualStrategyNumber || + strategy == BTGreaterStrategyNumber || + strategy == BTEqualStrategyNumber)) + { + result->rangeset = NIL; + return true; + } + + if ((cmp_min < 0 && strategy == BTGreaterStrategyNumber) || + (cmp_min <= 0 && strategy == BTGreaterEqualStrategyNumber)) + { + result->rangeset = list_make1_irange(make_irange(startidx, endidx, false)); + return true; + } + + if (cmp_max >= 0 && (strategy == BTLessEqualStrategyNumber || + strategy == BTLessStrategyNumber)) + { + result->rangeset = list_make1_irange(make_irange(startidx, endidx, false)); + return true; + } + } + + /* Binary search */ + while (true) + { + Assert(cmp_func); + + i = startidx + (endidx - startidx) / 2; + Assert(i >= 0 && i < rangerel->ranges.length); + re = &ranges[i]; + cmp_min = FunctionCall2(cmp_func, value, PATHMAN_GET_DATUM(re->min, byVal)); + cmp_max = FunctionCall2(cmp_func, value, PATHMAN_GET_DATUM(re->max, byVal)); + + is_less = (cmp_min < 0 || (cmp_min == 0 && strategy == BTLessStrategyNumber)); + is_greater = (cmp_max > 0 || (cmp_max >= 0 && strategy != BTLessStrategyNumber)); + + if (!is_less && !is_greater) + { + if (strategy == BTGreaterEqualStrategyNumber && cmp_min == 0) + lossy = false; + else if (strategy == BTLessStrategyNumber && cmp_max == 0) + lossy = false; + else + lossy = true; +#ifdef USE_ASSERT_CHECKING + found = true; +#endif + break; + } + + /* If we still didn't find partition then it doesn't exist */ + if (startidx >= endidx) + { + result->rangeset = NIL; + return true; + } + + if (is_less) + endidx = i - 1; + else if (is_greater) + startidx = i + 1; + + /* For debug's sake */ + Assert(++counter < 100); + } + + Assert(found); + + /* Filter partitions */ + switch(strategy) + { + case BTLessStrategyNumber: + case BTLessEqualStrategyNumber: + if (lossy) + { + result->rangeset = list_make1_irange(make_irange(i, i, true)); + if (i > 0) + result->rangeset = lcons_irange( + make_irange(0, i - 1, false), result->rangeset); + } + else + { + result->rangeset = list_make1_irange( + make_irange(0, i, false)); + } + return true; + case BTEqualStrategyNumber: + result->rangeset = list_make1_irange(make_irange(i, i, true)); + return true; + case BTGreaterEqualStrategyNumber: + case BTGreaterStrategyNumber: + if (lossy) + { + result->rangeset = list_make1_irange(make_irange(i, i, true)); + if (i < prel->children_count - 1) + result->rangeset = lappend_irange(result->rangeset, + make_irange(i + 1, prel->children_count - 1, false)); + } + else + { + result->rangeset = list_make1_irange( + make_irange(i, prel->children_count - 1, false)); + } + return true; + } + result->rangeset = list_make1_irange(make_irange(startidx, endidx, true)); + return true; + } + + return false; +} + /* * This function determines which partitions should appear in query plan */ @@ -928,11 +1094,8 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, HashRelationKey key; RangeRelation *rangerel; Datum value; - int i, - int_value, + int int_value, strategy; - bool is_less, - is_greater; FmgrInfo cmp_func; Oid cmp_proc_oid; const OpExpr *expr = (const OpExpr *)result->orig; @@ -989,151 +1152,8 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, case PT_RANGE: value = c->constvalue; rangerel = get_pathman_range_relation(prel->key.relid, NULL); - if (rangerel != NULL) - { - RangeEntry *re; - bool lossy = false; -#ifdef USE_ASSERT_CHECKING - bool found = false; - int counter = 0; -#endif - int startidx = 0, - cmp_min, - cmp_max, - endidx = rangerel->ranges.length - 1; - RangeEntry *ranges = dsm_array_get_pointer(&rangerel->ranges); - bool byVal = rangerel->by_val; - - /* Check boundaries */ - if (rangerel->ranges.length == 0) - { - result->rangeset = NIL; - return; - } - else - { - /* Corner cases */ - cmp_min = FunctionCall2(&cmp_func, value, - PATHMAN_GET_DATUM(ranges[0].min, byVal)), - cmp_max = FunctionCall2(&cmp_func, value, - PATHMAN_GET_DATUM(ranges[rangerel->ranges.length - 1].max, byVal)); - - if ((cmp_min < 0 && - (strategy == BTLessEqualStrategyNumber || - strategy == BTEqualStrategyNumber)) || - (cmp_min <= 0 && strategy == BTLessStrategyNumber)) - { - result->rangeset = NIL; - return; - } - - if (cmp_max >= 0 && (strategy == BTGreaterEqualStrategyNumber || - strategy == BTGreaterStrategyNumber || - strategy == BTEqualStrategyNumber)) - { - result->rangeset = NIL; - return; - } - - if ((cmp_min < 0 && strategy == BTGreaterStrategyNumber) || - (cmp_min <= 0 && strategy == BTGreaterEqualStrategyNumber)) - { - result->rangeset = list_make1_irange(make_irange(startidx, endidx, false)); - return; - } - - if (cmp_max >= 0 && (strategy == BTLessEqualStrategyNumber || - strategy == BTLessStrategyNumber)) - { - result->rangeset = list_make1_irange(make_irange(startidx, endidx, false)); - return; - } - } - - /* Binary search */ - while (true) - { - i = startidx + (endidx - startidx) / 2; - Assert(i >= 0 && i < rangerel->ranges.length); - re = &ranges[i]; - cmp_min = FunctionCall2(&cmp_func, value, PATHMAN_GET_DATUM(re->min, byVal)); - cmp_max = FunctionCall2(&cmp_func, value, PATHMAN_GET_DATUM(re->max, byVal)); - - is_less = (cmp_min < 0 || (cmp_min == 0 && strategy == BTLessStrategyNumber)); - is_greater = (cmp_max > 0 || (cmp_max >= 0 && strategy != BTLessStrategyNumber)); - - if (!is_less && !is_greater) - { - if (strategy == BTGreaterEqualStrategyNumber && cmp_min == 0) - lossy = false; - else if (strategy == BTLessStrategyNumber && cmp_max == 0) - lossy = false; - else - lossy = true; -#ifdef USE_ASSERT_CHECKING - found = true; -#endif - break; - } - - /* If we still didn't find partition then it doesn't exist */ - if (startidx >= endidx) - { - result->rangeset = NIL; - return; - } - - if (is_less) - endidx = i - 1; - else if (is_greater) - startidx = i + 1; - - /* For debug's sake */ - Assert(++counter < 100); - } - - Assert(found); - - /* Filter partitions */ - switch(strategy) - { - case BTLessStrategyNumber: - case BTLessEqualStrategyNumber: - if (lossy) - { - result->rangeset = list_make1_irange(make_irange(i, i, true)); - if (i > 0) - result->rangeset = lcons_irange( - make_irange(0, i - 1, false), result->rangeset); - } - else - { - result->rangeset = list_make1_irange( - make_irange(0, i, false)); - } - return; - case BTEqualStrategyNumber: - result->rangeset = list_make1_irange(make_irange(i, i, true)); - return; - case BTGreaterEqualStrategyNumber: - case BTGreaterStrategyNumber: - if (lossy) - { - result->rangeset = list_make1_irange(make_irange(i, i, true)); - if (i < prel->children_count - 1) - result->rangeset = lappend_irange(result->rangeset, - make_irange(i + 1, prel->children_count - 1, false)); - } - else - { - result->rangeset = list_make1_irange( - make_irange(i, prel->children_count - 1, false)); - } - return; - } - result->rangeset = list_make1_irange(make_irange(startidx, endidx, true)); + if (search_range_partition(value, prel, rangerel, strategy, &cmp_func, result)) return; - } } result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, true)); @@ -1253,6 +1273,53 @@ extract_const(WalkerContext *wcxt, Param *param) value, isnull, get_typbyval(param->paramtype)); } +static WrapperNode * +handle_const(const Const *c, WalkerContext *context) +{ + const PartRelationInfo *prel = context->prel; + + WrapperNode *result = (WrapperNode *)palloc(sizeof(WrapperNode)); + + switch (prel->parttype) + { + case PT_HASH: + { + HashRelationKey key; + int int_value = DatumGetInt32(c->constvalue); + + key.hash = make_hash(prel, int_value); + result->rangeset = list_make1_irange(make_irange(key.hash, key.hash, true)); + + return result; + } + + case PT_RANGE: + { + Oid cmp_proc_oid; + FmgrInfo cmp_func; + RangeRelation *rangerel; + TypeCacheEntry *tce; + + tce = lookup_type_cache(c->consttype, 0); + cmp_proc_oid = get_opfamily_proc(tce->btree_opf, + c->consttype, + c->consttype, + BTORDER_PROC); + fmgr_info(cmp_proc_oid, &cmp_func); + rangerel = get_pathman_range_relation(prel->key.relid, NULL); + if (search_range_partition(c->constvalue, prel, rangerel, + BTEqualStrategyNumber, &cmp_func, result)) + return result; + /* else fallhrough */ + } + + default: + result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, true)); + result->paramsel = 1.0; + return result; + } +} + /* * Operator expression handler */ From e6b09979edcead4c379b933ff96527ef0a4eb747 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 26 May 2016 20:08:14 +0300 Subject: [PATCH 005/184] replace nparts assert with elogs in PartitionFilter --- src/partition_filter.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/partition_filter.c b/src/partition_filter.c index 481136d648..44e9889925 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -156,7 +156,11 @@ partition_filter_exec(CustomScanState *node) ranges = walk_expr_tree((Expr *) &state->temp_const, &wcxt)->rangeset; parts = get_partition_oids(ranges, &nparts, state->prel); - Assert(nparts == 1); /* there has to be only 1 partition */ + + if (nparts > 1) + elog(ERROR, "PartitionFilter selected more than one partition"); + else if (nparts == 0) + elog(ERROR, "PartitionFilter could not select suitable partition"); estate->es_result_relation_info = getResultRelInfo(parts[0], state); @@ -192,7 +196,6 @@ void partition_filter_rescan(CustomScanState *node) { Assert(list_length(node->custom_ps) == 1); - ExecReScan((PlanState *) linitial(node->custom_ps)); } From c63c9a16d00dbac057c798bc1ce7dca847f2ab42 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 27 May 2016 00:55:29 +0300 Subject: [PATCH 006/184] PartitionFilter now transforms INSERT statements within subplans --- src/partition_filter.c | 80 +++++++++++++++++++++++++++++++++--------- src/partition_filter.h | 6 ++-- src/pg_pathman.c | 11 ++++-- 3 files changed, 75 insertions(+), 22 deletions(-) diff --git a/src/partition_filter.c b/src/partition_filter.c index 44e9889925..d8949e67c5 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -40,7 +40,7 @@ init_partition_filter_static_data(void) } Plan * -make_partition_filter_plan(Plan *subplan, Oid partitioned_table, +make_partition_filter(Plan *subplan, Oid partitioned_table, OnConflictAction conflict_action) { CustomScan *cscan = makeNode(CustomScan); @@ -265,27 +265,73 @@ pfilter_build_tlist(List *tlist) return result_tlist; } -/* Add proxy PartitionFilter nodes to subplans of ModifyTable node */ void -add_partition_filters(List *rtable, ModifyTable *modify_table) +add_partition_filters(List *rtable, Plan *plan) { - ListCell *lc1, - *lc2; + ListCell *l; - Assert(IsA(modify_table, ModifyTable)); - - if (!pg_pathman_enable_partition_filter) + if (plan == NULL || !pg_pathman_enable_partition_filter) return; - forboth (lc1, modify_table->plans, lc2, modify_table->resultRelations) + /* Plan-type-specific fixes*/ + switch (nodeTag(plan)) { - Index rindex = lfirst_int(lc2); - Oid relid = getrelid(rindex, rtable); - PartRelationInfo *prel = get_pathman_relation_info(relid, NULL); - - if (prel) - lfirst(lc1) = make_partition_filter_plan((Plan *) lfirst(lc1), - relid, - modify_table->onConflictAction); + case T_SubqueryScan: + add_partition_filters(rtable, ((SubqueryScan *) plan)->subplan); + break; + + case T_CustomScan: + foreach(l, ((CustomScan *) plan)->custom_plans) + add_partition_filters(rtable, (Plan *) lfirst(l)); + break; + + /* + * Add proxy PartitionFilter nodes + * to subplans of ModifyTable node + */ + case T_ModifyTable: + { + ModifyTable *modify_table = ((ModifyTable *) plan); + ListCell *lc1, + *lc2; + + forboth (lc1, modify_table->plans, lc2, modify_table->resultRelations) + { + Index rindex = lfirst_int(lc2); + Oid relid = getrelid(rindex, rtable); + PartRelationInfo *prel = get_pathman_relation_info(relid, NULL); + + add_partition_filters(rtable, (Plan *) lfirst(lc1)); + + if (prel) + lfirst(lc1) = make_partition_filter((Plan *) lfirst(lc1), + relid, + modify_table->onConflictAction); + } + } + break; + + /* Since they look alike */ + case T_MergeAppend: + case T_Append: + foreach(l, ((Append *) plan)->appendplans) + add_partition_filters(rtable, (Plan *) lfirst(l)); + break; + + case T_BitmapAnd: + foreach(l, ((BitmapAnd *) plan)->bitmapplans) + add_partition_filters(rtable, (Plan *) lfirst(l)); + break; + + case T_BitmapOr: + foreach(l, ((BitmapOr *) plan)->bitmapplans) + add_partition_filters(rtable, (Plan *) lfirst(l)); + break; + + default: + break; } + + add_partition_filters(rtable, plan->lefttree); + add_partition_filters(rtable, plan->righttree); } diff --git a/src/partition_filter.h b/src/partition_filter.h index f86a31eec8..3efae5052f 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -37,12 +37,12 @@ extern CustomScanMethods partition_filter_plan_methods; extern CustomExecMethods partition_filter_exec_methods; -void add_partition_filters(List *rtable, ModifyTable *modify_table); +void add_partition_filters(List *rtable, Plan *plan); void init_partition_filter_static_data(void); -Plan * make_partition_filter_plan(Plan *subplan, Oid partitioned_table, - OnConflictAction conflict_action); +Plan * make_partition_filter(Plan *subplan, Oid partitioned_table, + OnConflictAction conflict_action); Node * partition_filter_create_scan_state(CustomScan *node); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index d1a2091dfc..5e823b794d 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -254,10 +254,17 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) handle_modification_query(parse); break; case CMD_INSERT: + { + ListCell *lc; + result = standard_planner(parse, cursorOptions, boundParams); - add_partition_filters(result->rtable, - (ModifyTable *) result->planTree); + + add_partition_filters(result->rtable, result->planTree); + foreach (lc, result->subplans) + add_partition_filters(result->rtable, (Plan *) lfirst(lc)); + return result; + } default: break; } From 04d5ed27d07af13dcab5927e5824dd3cfa65fb62 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 27 May 2016 04:34:58 +0300 Subject: [PATCH 007/184] initialize ResultRelInfos for partitions using savedRelInfo --- src/partition_filter.c | 22 ++++++++++++++++++++++ src/partition_filter.h | 1 + 2 files changed, 23 insertions(+) diff --git a/src/partition_filter.c b/src/partition_filter.c index d8949e67c5..de08a95e82 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -102,6 +102,7 @@ partition_filter_begin(CustomScanState *node, EState *estate, int eflags) node->custom_ps = list_make1(ExecInitNode(state->subplan, estate, eflags)); state->prel = get_pathman_relation_info(state->partitioned_table, NULL); + state->savedRelInfo = NULL; memset(result_rels_table_config, 0, sizeof(HASHCTL)); result_rels_table_config->keysize = sizeof(Oid); @@ -129,6 +130,10 @@ partition_filter_exec(CustomScanState *node) slot = ExecProcNode(child_ps); + /* Save original ResultRelInfo */ + if (!state->savedRelInfo) + state->savedRelInfo = estate->es_result_relation_info; + if (!TupIsNull(slot)) { WalkerContext wcxt; @@ -140,9 +145,11 @@ partition_filter_exec(CustomScanState *node) AttrNumber attnum = state->prel->attnum; Datum value = slot_getattr(slot, attnum, &isnull); + /* Fill const with value ... */ state->temp_const.constvalue = value; state->temp_const.constisnull = isnull; + /* ... and some other important data */ CopyToTempConst(consttype, atttypid); CopyToTempConst(consttypmod, atttypmod); CopyToTempConst(constcollid, attcollation); @@ -162,6 +169,7 @@ partition_filter_exec(CustomScanState *node) else if (nparts == 0) elog(ERROR, "PartitionFilter could not select suitable partition"); + /* Replace main table with suitable partition */ estate->es_result_relation_info = getResultRelInfo(parts[0], state); return slot; @@ -209,6 +217,9 @@ partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *e static ResultRelInfo * getResultRelInfo(Oid partid, PartitionFilterState *state) { +#define CopyToResultRelInfo(field_name) \ + ( resultRelInfo->field_name = state->savedRelInfo->field_name ) + ResultRelInfoHandle *resultRelInfoHandle; bool found; @@ -226,6 +237,17 @@ getResultRelInfo(Oid partid, PartitionFilterState *state) ExecOpenIndices(resultRelInfo, state->onConflictAction != ONCONFLICT_NONE); + /* Copy necessary fields from saved ResultRelInfo */ + CopyToResultRelInfo(ri_WithCheckOptions); + CopyToResultRelInfo(ri_WithCheckOptionExprs); + CopyToResultRelInfo(ri_junkFilter); + CopyToResultRelInfo(ri_projectReturning); + CopyToResultRelInfo(ri_onConflictSetProj); + CopyToResultRelInfo(ri_onConflictSetWhere); + + /* ri_ConstraintExprs will be initialized by ExecRelCheck() */ + resultRelInfo->ri_ConstraintExprs = NULL; + resultRelInfoHandle->partid = partid; resultRelInfoHandle->resultRelInfo = resultRelInfo; } diff --git a/src/partition_filter.h b/src/partition_filter.h index 3efae5052f..c9ee5e7789 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -22,6 +22,7 @@ typedef struct Oid partitioned_table; PartRelationInfo *prel; OnConflictAction onConflictAction; + ResultRelInfo *savedRelInfo; Plan *subplan; Const temp_const; /* temporary const for expr walker */ From e32f4c64da0db7a0deaf8043d00b59d6603b58f4 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 27 May 2016 17:12:29 +0300 Subject: [PATCH 008/184] some triggers (excluding BEFORE\AFTER stmt) on parent table can be executed on partitions --- src/partition_filter.c | 32 +++++++++++++++++++- src/utils.c | 69 ++++++++++++++++++++++++++++++++++++++++++ src/utils.h | 4 +++ 3 files changed, 104 insertions(+), 1 deletion(-) diff --git a/src/partition_filter.c b/src/partition_filter.c index de08a95e82..7025f3db65 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -1,4 +1,5 @@ #include "partition_filter.h" +#include "utils.h" #include "utils/guc.h" #include "nodes/nodeFuncs.h" @@ -220,6 +221,15 @@ getResultRelInfo(Oid partid, PartitionFilterState *state) #define CopyToResultRelInfo(field_name) \ ( resultRelInfo->field_name = state->savedRelInfo->field_name ) +#define ResizeTriggerField(field_name, field_type) \ + do { \ + if (resultRelInfo->field_name) \ + pfree(resultRelInfo->field_name); \ + /* palloc0() is necessary here */ \ + resultRelInfo->field_name = (field_type *) \ + palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(field_type)); \ + } while (0) + ResultRelInfoHandle *resultRelInfoHandle; bool found; @@ -229,7 +239,9 @@ getResultRelInfo(Oid partid, PartitionFilterState *state) if (!found) { - ResultRelInfo *resultRelInfo = (ResultRelInfo *) palloc(sizeof(ResultRelInfo)); + bool grown_up; + ResultRelInfo *resultRelInfo = (ResultRelInfo *) palloc(sizeof(ResultRelInfo)); + InitResultRelInfo(resultRelInfo, heap_open(partid, RowExclusiveLock), 0, @@ -237,6 +249,24 @@ getResultRelInfo(Oid partid, PartitionFilterState *state) ExecOpenIndices(resultRelInfo, state->onConflictAction != ONCONFLICT_NONE); + resultRelInfo->ri_TrigDesc = append_trigger_descs(resultRelInfo->ri_TrigDesc, + state->savedRelInfo->ri_TrigDesc, + &grown_up); + if (grown_up) + { + ResizeTriggerField(ri_TrigFunctions, FmgrInfo); + ResizeTriggerField(ri_TrigWhenExprs, List *); + + if (resultRelInfo->ri_TrigInstrument) + { + pfree(resultRelInfo->ri_TrigInstrument); + + resultRelInfo->ri_TrigInstrument = + InstrAlloc(resultRelInfo->ri_TrigDesc->numtriggers, + state->css.ss.ps.state->es_instrument); + } + } + /* Copy necessary fields from saved ResultRelInfo */ CopyToResultRelInfo(ri_WithCheckOptions); CopyToResultRelInfo(ri_WithCheckOptionExprs); diff --git a/src/utils.c b/src/utils.c index 4adce33c83..f907cc5c35 100644 --- a/src/utils.c +++ b/src/utils.c @@ -141,3 +141,72 @@ check_rinfo_for_partitioned_attr(List *rinfo, Index varno, AttrNumber varattno) return false; } + +TriggerDesc * +append_trigger_descs(TriggerDesc *src, TriggerDesc *more, bool *grown_up) +{ +#define CopyToTriggerDesc(bool_field_name) \ + ( new_desc->bool_field_name |= (src->bool_field_name || more->bool_field_name) ) + + TriggerDesc *new_desc = (TriggerDesc *) palloc0(sizeof(TriggerDesc)); + Trigger *cur_trigger; + int i; + + /* Quick choices */ + if (!src && !more) + { + *grown_up = false; + return NULL; + } + else if (!src) + { + *grown_up = true; /* expand space for new triggers */ + return more; + } + else if (!more) + { + *grown_up = false; /* no new triggers will be added */ + return src; + } + + *grown_up = true; + new_desc->numtriggers = src->numtriggers + more->numtriggers; + new_desc->triggers = palloc(new_desc->numtriggers * sizeof(Trigger)); + + cur_trigger = new_desc->triggers; + + /* Copy triggers from 'a' */ + for (i = 0; i < src->numtriggers; i++) + memcpy(cur_trigger++, &(src->triggers[i]), sizeof(Trigger)); + + /* Copy triggers from 'b' */ + for (i = 0; i < more->numtriggers; i++) + memcpy(cur_trigger++, &(more->triggers[i]), sizeof(Trigger)); + + /* Copy insert bool flags */ + CopyToTriggerDesc(trig_insert_before_row); + CopyToTriggerDesc(trig_insert_after_row); + CopyToTriggerDesc(trig_insert_instead_row); + CopyToTriggerDesc(trig_insert_before_statement); + CopyToTriggerDesc(trig_insert_after_statement); + + /* Copy update bool flags */ + CopyToTriggerDesc(trig_update_before_row); + CopyToTriggerDesc(trig_update_after_row); + CopyToTriggerDesc(trig_update_instead_row); + CopyToTriggerDesc(trig_update_before_statement); + CopyToTriggerDesc(trig_update_after_statement); + + /* Copy delete bool flags */ + CopyToTriggerDesc(trig_delete_before_row); + CopyToTriggerDesc(trig_delete_after_row); + CopyToTriggerDesc(trig_delete_instead_row); + CopyToTriggerDesc(trig_delete_before_statement); + CopyToTriggerDesc(trig_delete_after_statement); + + /* Copy truncate bool flags */ + CopyToTriggerDesc(trig_truncate_before_statement); + CopyToTriggerDesc(trig_truncate_after_statement); + + return new_desc; +} diff --git a/src/utils.h b/src/utils.h index b3dd37f114..ebd1d1546f 100644 --- a/src/utils.h +++ b/src/utils.h @@ -31,4 +31,8 @@ bool check_rinfo_for_partitioned_attr(List *rinfo, Index varno, AttrNumber varattno); +TriggerDesc * append_trigger_descs(TriggerDesc *src, + TriggerDesc *more, + bool *grown_up); + #endif From 073cf4cb299ac643a59bb149fe9aedba55c07760 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 31 May 2016 19:47:33 +0300 Subject: [PATCH 009/184] small fixes, refactoring (move all remaining hooks to 'hooks.c', move some other funcs to 'utils.c', merge all binary searches into select_range_partitions()), remove useless triggers --- hash.sql | 54 ---- range.sql | 68 +---- src/hooks.c | 95 +++++- src/hooks.h | 30 +- src/init.c | 12 +- src/nodes_common.c | 122 ++++---- src/nodes_common.h | 9 +- src/partition_filter.c | 20 +- src/partition_filter.h | 11 +- src/pathman.h | 110 ++++--- src/pg_pathman.c | 594 +++++++++++++++---------------------- src/pl_funcs.c | 70 +++-- src/runtime_merge_append.c | 3 +- src/runtime_merge_append.h | 11 +- src/runtimeappend.h | 11 +- src/utils.c | 61 +++- src/utils.h | 22 +- src/worker.c | 44 +-- 18 files changed, 668 insertions(+), 679 deletions(-) diff --git a/hash.sql b/hash.sql index 111c3c9f9f..fcddc24e1c 100644 --- a/hash.sql +++ b/hash.sql @@ -53,7 +53,6 @@ BEGIN VALUES (v_relname, attribute, 1); /* Create triggers */ - PERFORM @extschema@.create_hash_insert_trigger(v_relname, attribute, partitions_count); /* Do not create update trigger by default */ -- PERFORM @extschema@.create_hash_update_trigger(relation, attribute, partitions_count); @@ -67,59 +66,6 @@ BEGIN END $$ LANGUAGE plpgsql; -/* - * Creates hash trigger for specified relation - */ -CREATE OR REPLACE FUNCTION @extschema@.create_hash_insert_trigger( - IN relation REGCLASS - , IN attr TEXT - , IN partitions_count INTEGER) -RETURNS VOID AS -$$ -DECLARE - func TEXT := ' - CREATE OR REPLACE FUNCTION %s() - RETURNS TRIGGER AS $body$ - DECLARE - hash INTEGER; - BEGIN - hash := NEW.%s %% %s; - %s - RETURN NULL; - END $body$ LANGUAGE plpgsql;'; - funcname TEXT; - trigger TEXT := ' - CREATE TRIGGER %s - BEFORE INSERT ON %s - FOR EACH ROW EXECUTE PROCEDURE %s();'; - triggername TEXT; - -- fields TEXT; - -- fields_format TEXT; - insert_stmt TEXT; - relname TEXT; - schema TEXT; -BEGIN - /* drop trigger and corresponding function */ - PERFORM @extschema@.drop_hash_triggers(relation); - - SELECT * INTO schema, relname - FROM @extschema@.get_plain_schema_and_relname(relation); - - /* generate INSERT statement for trigger */ - insert_stmt = format('EXECUTE format(''INSERT INTO %s.%s SELECT $1.*'', hash) USING NEW;' - , schema, quote_ident(relname || '_%s')); - - /* format and create new trigger for relation */ - funcname := schema || '.' || quote_ident(format('%s_insert_trigger_func', relname)); - triggername := quote_ident(format('%s_%s_insert_trigger', schema, relname)); - - func := format(func, funcname, attr, partitions_count, insert_stmt); - trigger := format(trigger, triggername, relation, funcname); - EXECUTE func; - EXECUTE trigger; -END -$$ LANGUAGE plpgsql; - /* * Drops all partitions for specified relation */ diff --git a/range.sql b/range.sql index 976be5bbae..d6527b10a9 100644 --- a/range.sql +++ b/range.sql @@ -96,7 +96,6 @@ BEGIN END LOOP; /* Create triggers */ - PERFORM @extschema@.create_range_insert_trigger(v_relname, p_attribute); -- PERFORM create_hash_update_trigger(relation, attribute, partitions_count); /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(p_relation::oid); @@ -186,7 +185,6 @@ BEGIN END LOOP; /* Create triggers */ - PERFORM @extschema@.create_range_insert_trigger(p_relation, p_attribute); -- PERFORM create_hash_update_trigger(relation, attribute, partitions_count); /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(p_relation::regclass::oid); @@ -251,7 +249,6 @@ BEGIN END LOOP; /* Create triggers */ - PERFORM @extschema@.create_range_insert_trigger(p_relation, p_attribute); /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(p_relation::regclass::oid); @@ -310,9 +307,6 @@ BEGIN i := i + 1; END LOOP; - /* Create triggers */ - PERFORM @extschema@.create_range_insert_trigger(p_relation, p_attribute); - /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(p_relation::regclass::oid); @@ -328,7 +322,7 @@ END $$ LANGUAGE plpgsql; /* - * + * */ CREATE OR REPLACE FUNCTION @extschema@.check_boundaries( p_relation REGCLASS @@ -706,7 +700,7 @@ BEGIN /* Prevent concurrent partition creation */ PERFORM @extschema@.acquire_partitions_lock(); - + EXECUTE format('SELECT @extschema@.append_partition_internal($1, $2, $3, ARRAY[]::%s[])', v_atttype) INTO v_part_name USING p_relation, v_atttype, v_interval; @@ -1010,62 +1004,6 @@ $$ LANGUAGE plpgsql; -/* - * Creates range partitioning insert trigger - */ -CREATE OR REPLACE FUNCTION @extschema@.create_range_insert_trigger( - v_relation REGCLASS - , v_attname TEXT) -RETURNS VOID AS -$$ -DECLARE - v_func TEXT := ' - CREATE OR REPLACE FUNCTION %s() - RETURNS TRIGGER - AS $body$ - DECLARE - v_part_relid OID; - BEGIN - IF TG_OP = ''INSERT'' THEN - IF NEW.%2$s IS NULL THEN - RAISE EXCEPTION ''ERROR: NULL value in partitioning key''; - END IF; - v_part_relid := @extschema@.find_or_create_range_partition(TG_RELID, NEW.%2$s); - IF NOT v_part_relid IS NULL THEN - EXECUTE format(''INSERT INTO %%s SELECT $1.*'', v_part_relid::regclass) - USING NEW; - ELSE - RAISE EXCEPTION ''ERROR: Cannot find partition''; - END IF; - END IF; - RETURN NULL; - END - $body$ LANGUAGE plpgsql;'; - v_funcname TEXT; - v_trigger TEXT := ' - CREATE TRIGGER %s - BEFORE INSERT ON %s - FOR EACH ROW EXECUTE PROCEDURE %s();'; - v_triggername TEXT; - v_plain_relname TEXT; - v_plain_schema TEXT; -BEGIN - SELECT * INTO v_plain_schema, v_plain_relname - FROM @extschema@.get_plain_schema_and_relname(v_relation); - - v_funcname := format(quote_ident('%s_insert_trigger_func'), v_plain_relname); - v_triggername := format('"%s_%s_insert_trigger"', v_plain_schema, v_plain_relname); - - v_func := format(v_func, v_funcname, v_attname); - v_trigger := format(v_trigger, v_triggername, v_relation, v_funcname); - - EXECUTE v_func; - EXECUTE v_trigger; - RETURN; -END -$$ LANGUAGE plpgsql; - - /* * Creates an update trigger */ @@ -1092,7 +1030,7 @@ DECLARE EXECUTE q USING %7$s; RETURN NULL; END $body$ LANGUAGE plpgsql'; - trigger TEXT := 'CREATE TRIGGER %s_update_trigger ' || + trigger TEXT := 'CREATE TRIGGER %s_update_trigger ' || 'BEFORE UPDATE ON %s ' || 'FOR EACH ROW EXECUTE PROCEDURE %s_update_trigger_func()'; att_names TEXT; diff --git a/src/hooks.c b/src/hooks.c index fc46afa25e..1c262051b6 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -11,16 +11,20 @@ #include "optimizer/cost.h" #include "optimizer/restrictinfo.h" #include "utils/guc.h" -#include "hooks.h" #include "pathman.h" +#include "hooks.h" +#include "partition_filter.h" #include "runtimeappend.h" #include "runtime_merge_append.h" -#include "partition_filter.h" #include "utils.h" set_join_pathlist_hook_type set_join_pathlist_next = NULL; set_rel_pathlist_hook_type set_rel_pathlist_hook_next = NULL; +planner_hook_type planner_hook_next = NULL; +post_parse_analyze_hook_type post_parse_analyze_hook_next = NULL; +shmem_startup_hook_type shmem_startup_hook_next = NULL; + /* Take care of joins */ void @@ -255,11 +259,11 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb palloc0((root->simple_rel_array_size + len) * sizeof(RangeTblEntry *)); /* Copy relations to the new arrays */ - for (i = 0; i < root->simple_rel_array_size; i++) - { - new_rel_array[i] = root->simple_rel_array[i]; - new_rte_array[i] = root->simple_rte_array[i]; - } + for (i = 0; i < root->simple_rel_array_size; i++) + { + new_rel_array[i] = root->simple_rel_array[i]; + new_rte_array[i] = root->simple_rte_array[i]; + } /* Free old arrays */ pfree(root->simple_rel_array); @@ -360,3 +364,80 @@ void pg_pathman_enable_assign_hook(bool newval, void *extra) newval ? "enabled" : "disabled"); } +/* + * Planner hook. It disables inheritance for tables that have been partitioned + * by pathman to prevent standart PostgreSQL partitioning mechanism from + * handling that tables. + */ +PlannedStmt * +pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) +{ + PlannedStmt *result; + + if (pg_pathman_enable) + { + inheritance_disabled = false; + switch(parse->commandType) + { + case CMD_SELECT: + disable_inheritance(parse); + break; + case CMD_UPDATE: + case CMD_DELETE: + disable_inheritance_cte(parse); + disable_inheritance_subselect(parse); + handle_modification_query(parse); + break; + case CMD_INSERT: + { + ListCell *lc; + + result = standard_planner(parse, cursorOptions, boundParams); + + add_partition_filters(result->rtable, result->planTree); + foreach (lc, result->subplans) + add_partition_filters(result->rtable, (Plan *) lfirst(lc)); + + return result; + } + default: + break; + } + } + + /* Invoke original hook */ + if (planner_hook_next) + result = planner_hook_next(parse, cursorOptions, boundParams); + else + result = standard_planner(parse, cursorOptions, boundParams); + + return result; +} + +/* + * Post parse analysis hook. It makes sure the config is loaded before executing + * any statement, including utility commands + */ +void +pathman_post_parse_analysis_hook(ParseState *pstate, Query *query) +{ + if (initialization_needed) + load_config(); + + if (post_parse_analyze_hook_next) + post_parse_analyze_hook_next(pstate, query); +} + +void +pathman_shmem_startup_hook(void) +{ + /* Allocate shared memory objects */ + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + init_dsm_config(); + init_shmem_config(); + LWLockRelease(AddinShmemInitLock); + + /* Invoke original hook if needed */ + if (shmem_startup_hook_next != NULL) + shmem_startup_hook_next(); +} diff --git a/src/hooks.h b/src/hooks.h index 33513982e7..c744e300b0 100644 --- a/src/hooks.h +++ b/src/hooks.h @@ -11,16 +11,40 @@ #define JOIN_HOOK_H #include "postgres.h" +#include "optimizer/planner.h" #include "optimizer/paths.h" +#include "parser/analyze.h" +#include "storage/ipc.h" + extern set_join_pathlist_hook_type set_join_pathlist_next; extern set_rel_pathlist_hook_type set_rel_pathlist_hook_next; +extern planner_hook_type planner_hook_next; +extern post_parse_analyze_hook_type post_parse_analyze_hook_next; +extern shmem_startup_hook_type shmem_startup_hook_next; + -void pathman_join_pathlist_hook(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, - RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); +void pathman_join_pathlist_hook(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra); -void pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); +void pathman_rel_pathlist_hook(PlannerInfo *root, + RelOptInfo *rel, + Index rti, + RangeTblEntry *rte); void pg_pathman_enable_assign_hook(char newval, void *extra); +PlannedStmt * pathman_planner_hook(Query *parse, + int cursorOptions, + ParamListInfo boundParams); + +void pathman_post_parse_analysis_hook(ParseState *pstate, + Query *query); + +void pathman_shmem_startup_hook(void); + #endif diff --git a/src/init.c b/src/init.c index b7eea062c8..f08b0b9394 100644 --- a/src/init.c +++ b/src/init.c @@ -60,9 +60,9 @@ init_shmem_config() if (!IsUnderPostmaster) { /* Initialize locks */ - pmstate->load_config_lock = LWLockAssign(); - pmstate->dsm_init_lock = LWLockAssign(); - pmstate->edit_partitions_lock = LWLockAssign(); + pmstate->load_config_lock = LWLockAssign(); + pmstate->dsm_init_lock = LWLockAssign(); + pmstate->edit_partitions_lock = LWLockAssign(); } #ifdef WIN32 else @@ -114,7 +114,7 @@ load_config(void) /* Check if we already cached config for current database */ databases = (Oid *) dsm_array_get_pointer(&pmstate->databases); - for(i=0; idsm_init_lock); @@ -331,7 +331,7 @@ load_check_constraints(Oid parent_oid, Snapshot snapshot) rangerel->by_val = tce->typbyval; } - for (i=0; ivals[i]; @@ -575,7 +575,7 @@ remove_relation_info(Oid relid) { PartRelationInfo *prel; RangeRelation *rangerel; - RelationKey key; + RelationKey key; key.dbid = MyDatabaseId; key.relid = relid; diff --git a/src/nodes_common.c b/src/nodes_common.c index c81d1b8e8f..1e8d64eec0 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -17,6 +17,11 @@ #include "utils.h" +/* Allocation settings */ +#define INITIAL_ALLOC_NUM 10 +#define ALLOC_EXP 2 + + /* Compare plans by 'original_order' */ static int cmp_child_scan_common_by_orig_order(const void *ap, @@ -78,11 +83,13 @@ transform_plans_into_states(RuntimeAppendState *scan_state, static ChildScanCommon * select_required_plans(HTAB *children_table, Oid *parts, int nparts, int *nres) { - int allocated = 10; + int allocated = INITIAL_ALLOC_NUM; int used = 0; - ChildScanCommon *result = palloc(10 * sizeof(ChildScanCommon)); + ChildScanCommon *result; int i; + result = (ChildScanCommon *) palloc(allocated * sizeof(ChildScanCommon)); + for (i = 0; i < nparts; i++) { ChildScanCommon child = hash_search(children_table, @@ -93,7 +100,7 @@ select_required_plans(HTAB *children_table, Oid *parts, int nparts, int *nres) if (allocated <= used) { - allocated *= 2; + allocated *= ALLOC_EXP; result = repalloc(result, allocated * sizeof(ChildScanCommon)); } @@ -104,39 +111,6 @@ select_required_plans(HTAB *children_table, Oid *parts, int nparts, int *nres) return result; } -/* Transform partition ranges into plain array of partition Oids */ -Oid * -get_partition_oids(List *ranges, int *n, PartRelationInfo *prel) -{ - ListCell *range_cell; - int allocated = 10; - int used = 0; - Oid *result = palloc(allocated * sizeof(Oid)); - Oid *children = dsm_array_get_pointer(&prel->children); - - foreach (range_cell, ranges) - { - int i; - int a = irange_lower(lfirst_irange(range_cell)); - int b = irange_upper(lfirst_irange(range_cell)); - - for (i = a; i <= b; i++) - { - if (allocated <= used) - { - allocated *= 2; - result = repalloc(result, allocated * sizeof(Oid)); - } - - Assert(i < prel->children_count); - result[used++] = children[i]; - } - } - - *n = used; - return result; -} - /* Replace Vars' varnos with the value provided by 'parent' */ static List * replace_tlist_varnos(List *child_tlist, RelOptInfo *parent) @@ -148,7 +122,7 @@ replace_tlist_varnos(List *child_tlist, RelOptInfo *parent) foreach (lc, child_tlist) { Var *var = (Var *) ((TargetEntry *) lfirst(lc))->expr; - Var *newvar = palloc(sizeof(Var)); + Var *newvar = (Var *) palloc(sizeof(Var)); Assert(IsA(var, Var)); @@ -229,6 +203,40 @@ unpack_runtimeappend_private(RuntimeAppendState *scan_state, CustomScan *cscan) scan_state->relid = linitial_oid(linitial(runtimeappend_private)); } + +/* Transform partition ranges into plain array of partition Oids */ +Oid * +get_partition_oids(List *ranges, int *n, PartRelationInfo *prel) +{ + ListCell *range_cell; + int allocated = INITIAL_ALLOC_NUM; + int used = 0; + Oid *result = (Oid *) palloc(allocated * sizeof(Oid)); + Oid *children = dsm_array_get_pointer(&prel->children); + + foreach (range_cell, ranges) + { + int i; + int a = irange_lower(lfirst_irange(range_cell)); + int b = irange_upper(lfirst_irange(range_cell)); + + for (i = a; i <= b; i++) + { + if (allocated <= used) + { + allocated *= ALLOC_EXP; + result = repalloc(result, allocated * sizeof(Oid)); + } + + Assert(i < prel->children_count); + result[used++] = children[i]; + } + } + + *n = used; + return result; +} + Path * create_append_path_common(PlannerInfo *root, AppendPath *inner_append, @@ -245,7 +253,7 @@ create_append_path_common(PlannerInfo *root, RuntimeAppendPath *result; - result = palloc0(size); + result = (RuntimeAppendPath *) palloc0(size); NodeSetTag(result, T_CustomPath); result->cpath.path.pathtype = T_CustomScan; @@ -266,13 +274,16 @@ create_append_path_common(PlannerInfo *root, result->relid = inner_entry->relid; result->nchildren = list_length(inner_append->subpaths); - result->children = palloc(result->nchildren * sizeof(ChildScanCommon)); + result->children = (ChildScanCommon *) + palloc(result->nchildren * sizeof(ChildScanCommon)); i = 0; foreach (lc, inner_append->subpaths) { Path *path = lfirst(lc); Index relindex = path->parent->relid; - ChildScanCommon child = palloc(sizeof(ChildScanCommonData)); + ChildScanCommon child; + + child = (ChildScanCommon) palloc(sizeof(ChildScanCommonData)); result->cpath.path.startup_cost += path->startup_cost; result->cpath.path.total_cost += path->total_cost; @@ -301,30 +312,29 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *custom_plans, CustomScanMethods *scan_methods) { - RuntimeAppendPath *gpath = (RuntimeAppendPath *) best_path; + RuntimeAppendPath *rpath = (RuntimeAppendPath *) best_path; CustomScan *cscan; cscan = makeNode(CustomScan); - cscan->custom_scan_tlist = NIL; + cscan->custom_scan_tlist = NIL; /* initial value (empty list) */ if (custom_plans) { ListCell *lc1, *lc2; - forboth (lc1, gpath->cpath.custom_paths, lc2, custom_plans) + forboth (lc1, rpath->cpath.custom_paths, lc2, custom_plans) { Plan *child_plan = (Plan *) lfirst(lc2); RelOptInfo *child_rel = ((Path *) lfirst(lc1))->parent; - /* We inforce IndexOnlyScans to return all available columns */ + /* We enforce IndexOnlyScans to return all available columns */ if (IsA(child_plan, IndexOnlyScan)) { IndexOptInfo *indexinfo = ((IndexPath *) lfirst(lc1))->indexinfo; RangeTblEntry *rentry = root->simple_rte_array[child_rel->relid]; Relation child_relation; - /* TODO: find out whether we need locks or not */ child_relation = heap_open(rentry->relid, NoLock); child_plan->targetlist = build_index_tlist(root, indexinfo, child_relation); @@ -350,7 +360,7 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, * physical tlists with the new 'custom_scan_tlist'. */ if (cscan->custom_scan_tlist) - forboth (lc1, gpath->cpath.custom_paths, lc2, custom_plans) + forboth (lc1, rpath->cpath.custom_paths, lc2, custom_plans) { Plan *child_plan = (Plan *) lfirst(lc2); RelOptInfo *child_rel = ((Path *) lfirst(lc1))->parent; @@ -371,14 +381,14 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, if (!cscan->custom_scan_tlist) cscan->custom_scan_tlist = tlist; + /* Since we're not scanning any real table directly */ cscan->scan.scanrelid = 0; cscan->custom_exprs = get_actual_clauses(clauses); cscan->custom_plans = custom_plans; - cscan->methods = scan_methods; - pack_runtimeappend_private(cscan, gpath); + pack_runtimeappend_private(cscan, rpath); return &cscan->scan.plan; } @@ -388,9 +398,11 @@ create_append_scan_state_common(CustomScan *node, CustomExecMethods *exec_methods, uint32 size) { - RuntimeAppendState *scan_state = palloc0(size); + RuntimeAppendState *scan_state; + scan_state = (RuntimeAppendState *) palloc0(size); NodeSetTag(scan_state, T_CustomScanState); + scan_state->css.flags = node->flags; scan_state->css.methods = exec_methods; scan_state->custom_exprs = node->custom_exprs; @@ -426,14 +438,16 @@ exec_append_common(CustomScanState *node, { RuntimeAppendState *scan_state = (RuntimeAppendState *) node; + /* ReScan if no plans are selected */ if (scan_state->ncur_plans == 0) ExecReScan(&node->ss.ps); for (;;) { + /* Fetch next tuple if we're done with Projections */ if (!node->ss.ps.ps_TupFromTlist) { - fetch_next_tuple(node); + fetch_next_tuple(node); /* use specific callback */ if (TupIsNull(scan_state->slot)) return NULL; @@ -527,13 +541,15 @@ explain_append_common(CustomScanState *node, HTAB *children_table, ExplainState /* Construct excess PlanStates */ if (!es->analyze) { - int allocated = 10; + int allocated = INITIAL_ALLOC_NUM; int used = 0; - ChildScanCommon *custom_ps = palloc(allocated * sizeof(ChildScanCommon)); + ChildScanCommon *custom_ps; ChildScanCommon child; HASH_SEQ_STATUS seqstat; int i; + custom_ps = (ChildScanCommon *) palloc(allocated * sizeof(ChildScanCommon)); + /* There can't be any nodes since we're not scanning anything */ Assert(!node->custom_ps); @@ -544,7 +560,7 @@ explain_append_common(CustomScanState *node, HTAB *children_table, ExplainState { if (allocated <= used) { - allocated *= 2; + allocated *= ALLOC_EXP; custom_ps = repalloc(custom_ps, allocated * sizeof(ChildScanCommon)); } diff --git a/src/nodes_common.h b/src/nodes_common.h index fb268b1b29..a456baca12 100644 --- a/src/nodes_common.h +++ b/src/nodes_common.h @@ -14,6 +14,11 @@ #include "pathman.h" +/* + * Common structure for storing selected + * Paths/Plans/PlanStates in a hash table + * or its slice. + */ typedef struct { Oid relid; /* partition relid */ @@ -23,7 +28,7 @@ typedef struct CHILD_PATH = 0, CHILD_PLAN, CHILD_PLAN_STATE - } content_type; + } content_type; union { @@ -32,7 +37,7 @@ typedef struct PlanState *plan_state; } content; - int original_order; /* for sorting in EXPLAIN */ + int original_order; /* for sorting in EXPLAIN */ } ChildScanCommonData; typedef ChildScanCommonData *ChildScanCommon; diff --git a/src/partition_filter.c b/src/partition_filter.c index 7025f3db65..1c9375f98c 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -1,6 +1,7 @@ #include "partition_filter.h" #include "utils.h" #include "utils/guc.h" +#include "utils/lsyscache.h" #include "nodes/nodeFuncs.h" @@ -70,8 +71,9 @@ make_partition_filter(Plan *subplan, Oid partitioned_table, Node * partition_filter_create_scan_state(CustomScan *node) { - PartitionFilterState *state = palloc0(sizeof(PartitionFilterState)); + PartitionFilterState *state; + state = (PartitionFilterState *) palloc0(sizeof(PartitionFilterState)); NodeSetTag(state, T_CustomScanState); state->css.flags = node->flags; @@ -141,6 +143,7 @@ partition_filter_exec(CustomScanState *node) List *ranges; int nparts; Oid *parts; + Oid selected_partid; bool isnull; AttrNumber attnum = state->prel->attnum; @@ -168,10 +171,14 @@ partition_filter_exec(CustomScanState *node) if (nparts > 1) elog(ERROR, "PartitionFilter selected more than one partition"); else if (nparts == 0) - elog(ERROR, "PartitionFilter could not select suitable partition"); + selected_partid = add_missing_partition(state->partitioned_table, + &state->temp_const); + else + selected_partid = parts[0]; /* Replace main table with suitable partition */ - estate->es_result_relation_info = getResultRelInfo(parts[0], state); + estate->es_result_relation_info = getResultRelInfo(selected_partid, + state); return slot; } @@ -225,7 +232,6 @@ getResultRelInfo(Oid partid, PartitionFilterState *state) do { \ if (resultRelInfo->field_name) \ pfree(resultRelInfo->field_name); \ - /* palloc0() is necessary here */ \ resultRelInfo->field_name = (field_type *) \ palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(field_type)); \ } while (0) @@ -317,6 +323,9 @@ pfilter_build_tlist(List *tlist) return result_tlist; } +/* + * Add PartitionFilter nodes to the plan tree + */ void add_partition_filters(List *rtable, Plan *plan) { @@ -347,6 +356,9 @@ add_partition_filters(List *rtable, Plan *plan) ListCell *lc1, *lc2; + if (modify_table->operation != CMD_INSERT) + break; + forboth (lc1, modify_table->plans, lc2, modify_table->resultRelations) { Index rindex = lfirst_int(lc2); diff --git a/src/partition_filter.h b/src/partition_filter.h index c9ee5e7789..fb7b529ca2 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -42,12 +42,15 @@ void add_partition_filters(List *rtable, Plan *plan); void init_partition_filter_static_data(void); -Plan * make_partition_filter(Plan *subplan, Oid partitioned_table, +Plan * make_partition_filter(Plan *subplan, + Oid partitioned_table, OnConflictAction conflict_action); Node * partition_filter_create_scan_state(CustomScan *node); -void partition_filter_begin(CustomScanState *node, EState *estate, int eflags); +void partition_filter_begin(CustomScanState *node, + EState *estate, + int eflags); TupleTableSlot * partition_filter_exec(CustomScanState *node); @@ -55,6 +58,8 @@ void partition_filter_end(CustomScanState *node); void partition_filter_rescan(CustomScanState *node); -void partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *es); +void partition_filter_explain(CustomScanState *node, + List *ancestors, + ExplainState *es); #endif diff --git a/src/pathman.h b/src/pathman.h index 5c0df3b783..788837c8e5 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -26,7 +26,7 @@ /* Check PostgreSQL version */ #if PG_VERSION_NUM < 90500 - #error "You are trying to build pg_pathman with PostgreSQL version lower than 9.5. Please, check you environment." + #error "You are trying to build pg_pathman with PostgreSQL version lower than 9.5. Please, check your environment." #endif #define ALL NIL @@ -46,9 +46,9 @@ typedef enum PartType */ typedef struct DsmArray { - dsm_handle segment; - size_t offset; - size_t length; + dsm_handle segment; + size_t offset; + size_t length; } DsmArray; /* @@ -56,27 +56,27 @@ typedef struct DsmArray */ typedef struct RelationKey { - Oid dbid; - Oid relid; + Oid dbid; + Oid relid; } RelationKey; /* * PartRelationInfo * Per-relation partitioning information * - * oid - parent table oid - * children - list of children oids + * oid - parent table's Oid + * children - list of children's Oids * parttype - partitioning type (HASH, LIST or RANGE) - * attnum - attribute number of parent relation + * attnum - attribute number of parent relation's column */ typedef struct PartRelationInfo { - RelationKey key; - DsmArray children; - int children_count; - PartType parttype; - Index attnum; - Oid atttype; + RelationKey key; + DsmArray children; + int children_count; + PartType parttype; + Index attnum; + Oid atttype; } PartRelationInfo; @@ -85,14 +85,14 @@ typedef struct PartRelationInfo */ typedef struct HashRelationKey { - int hash; - Oid parent_oid; + uint32 hash; + Oid parent_oid; } HashRelationKey; typedef struct HashRelation { - HashRelationKey key; - Oid child_oid; + HashRelationKey key; + Oid child_oid; } HashRelation; /* @@ -100,40 +100,48 @@ typedef struct HashRelation */ typedef struct RangeEntry { - Oid child_oid; - #ifdef HAVE_INT64_TIMESTAMP - int64 min; - int64 max; - #else - double min; - double max; - #endif + Oid child_oid; + +#ifdef HAVE_INT64_TIMESTAMP + int64 min; + int64 max; +#else + double min; + double max; +#endif } RangeEntry; typedef struct RangeRelation { - RelationKey key; - bool by_val; - DsmArray ranges; + RelationKey key; + bool by_val; + DsmArray ranges; } RangeRelation; typedef struct PathmanState { - LWLock *load_config_lock; - LWLock *dsm_init_lock; - LWLock *edit_partitions_lock; - DsmArray databases; + LWLock *load_config_lock; + LWLock *dsm_init_lock; + LWLock *edit_partitions_lock; + DsmArray databases; } PathmanState; +typedef enum +{ + SEARCH_RANGEREL_OUT_OF_RANGE = 0, + SEARCH_RANGEREL_GAP, + SEARCH_RANGEREL_FOUND +} search_rangerel_result; + extern bool inheritance_disabled; extern bool pg_pathman_enable; extern PathmanState *pmstate; #define PATHMAN_GET_DATUM(value, by_val) ( (by_val) ? (value) : PointerGetDatum(&value) ) -typedef int IndexRange; -#define RANGE_INFINITY 0x7FFF -#define RANGE_LOSSY 0x80000000 +typedef uint32 IndexRange; +#define RANGE_INFINITY 0x7FFF +#define RANGE_LOSSY 0x80000000 #define make_irange(lower, upper, lossy) \ (((lower) & RANGE_INFINITY) << 15 | ((upper) & RANGE_INFINITY) | ((lossy) ? RANGE_LOSSY : 0)) @@ -195,12 +203,19 @@ int append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte, int index, Oid childOID, List *wrappers); PartRelationInfo *get_pathman_relation_info(Oid relid, bool *found); RangeRelation *get_pathman_range_relation(Oid relid, bool *found); -int range_binary_search(const RangeRelation *rangerel, FmgrInfo *cmp_func, Datum value, bool *fountPtr); +search_rangerel_result search_range_partition_eq(Datum value, + const RangeRelation *rangerel, + FmgrInfo *cmp_func, + int *part_idx); char *get_extension_schema(void); -FmgrInfo *get_cmp_func(Oid type1, Oid type2); Oid create_partitions_bg_worker(Oid relid, Datum value, Oid value_type, bool *crashed); Oid create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed); +void handle_modification_query(Query *parse); +void disable_inheritance(Query *parse); +void disable_inheritance_cte(Query *parse); +void disable_inheritance_subselect(Query *parse); + /* copied from allpaths.h */ void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); @@ -209,10 +224,11 @@ void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, Rang typedef struct { - const Node *orig; - List *args; - List *rangeset; - double paramsel; + const Node *orig; + List *args; + List *rangeset; + bool found_gap; + double paramsel; } WrapperNode; typedef struct @@ -227,9 +243,11 @@ typedef struct ExprContext *econtext; } WalkerContext; -bool search_range_partition(Datum value, - const PartRelationInfo *prel, const RangeRelation *rangerel, - int strategy, FmgrInfo *cmp_func, WrapperNode *result); +void select_range_partitions(const Datum value, + const RangeRelation *rangerel, + const int strategy, + FmgrInfo *cmp_func, + WrapperNode *result); WrapperNode *walk_expr_tree(Expr *expr, WalkerContext *context); void finish_least_greatest(WrapperNode *wrap, WalkerContext *context); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 5e823b794d..b32119e0c9 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -40,9 +40,10 @@ #include "catalog/pg_type.h" #include "foreign/fdwapi.h" #include "hooks.h" +#include "utils.h" +#include "partition_filter.h" #include "runtimeappend.h" #include "runtime_merge_append.h" -#include "partition_filter.h" PG_MODULE_MAGIC; @@ -56,25 +57,11 @@ bool inheritance_disabled; bool pg_pathman_enable; PathmanState *pmstate; -/* Original hooks */ -static shmem_startup_hook_type shmem_startup_hook_original = NULL; -static post_parse_analyze_hook_type post_parse_analyze_hook_original = NULL; -static planner_hook_type planner_hook_original = NULL; - /* pg module functions */ void _PG_init(void); -/* Hook functions */ -static void pathman_shmem_startup(void); -void pathman_post_parse_analysis_hook(ParseState *pstate, Query *query); -static PlannedStmt * pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams); - /* Utility functions */ -static void handle_modification_query(Query *parse); static Node *wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysTrue); -static void disable_inheritance(Query *parse); -static void disable_inheritance_cte(Query *parse); -static void disable_inheritance_subselect(Query *parse); static bool disable_inheritance_subselect_walker(Node *node, void *context); /* Expression tree handlers */ @@ -150,21 +137,24 @@ _PG_init(void) RequestAddinShmemSpace(pathman_memsize()); RequestAddinLWLocks(3); - set_rel_pathlist_hook_next = set_rel_pathlist_hook; - set_rel_pathlist_hook = pathman_rel_pathlist_hook; - set_join_pathlist_next = set_join_pathlist_hook; - set_join_pathlist_hook = pathman_join_pathlist_hook; - shmem_startup_hook_original = shmem_startup_hook; - shmem_startup_hook = pathman_shmem_startup; - post_parse_analyze_hook_original = post_parse_analyze_hook; - post_parse_analyze_hook = pathman_post_parse_analysis_hook; - planner_hook_original = planner_hook; - planner_hook = pathman_planner_hook; - + /* Initialize 'next' hook pointers */ + set_rel_pathlist_hook_next = set_rel_pathlist_hook; + set_rel_pathlist_hook = pathman_rel_pathlist_hook; + set_join_pathlist_next = set_join_pathlist_hook; + set_join_pathlist_hook = pathman_join_pathlist_hook; + shmem_startup_hook_next = shmem_startup_hook; + shmem_startup_hook = pathman_shmem_startup_hook; + post_parse_analyze_hook_next = post_parse_analyze_hook; + post_parse_analyze_hook = pathman_post_parse_analysis_hook; + planner_hook_next = planner_hook; + planner_hook = pathman_planner_hook; + + /* Initialize custom nodes */ init_runtimeappend_static_data(); init_runtime_merge_append_static_data(); init_partition_filter_static_data(); + /* Main toggle */ DefineCustomBoolVariable("pg_pathman.enable", "Enables pg_pathman's optimizations during the planner stage", NULL, @@ -197,93 +187,11 @@ get_pathman_range_relation(Oid relid, bool *found) return hash_search(range_restrictions, (const void *) &key, HASH_FIND, found); } -FmgrInfo * -get_cmp_func(Oid type1, Oid type2) -{ - FmgrInfo *cmp_func; - Oid cmp_proc_oid; - TypeCacheEntry *tce; - - cmp_func = palloc(sizeof(FmgrInfo)); - tce = lookup_type_cache(type1, - TYPECACHE_BTREE_OPFAMILY | TYPECACHE_CMP_PROC | TYPECACHE_CMP_PROC_FINFO); - cmp_proc_oid = get_opfamily_proc(tce->btree_opf, - type1, - type2, - BTORDER_PROC); - fmgr_info(cmp_proc_oid, cmp_func); - return cmp_func; -} - -/* - * Post parse analysis hook. It makes sure the config is loaded before executing - * any statement, including utility commands - */ -void -pathman_post_parse_analysis_hook(ParseState *pstate, Query *query) -{ - if (initialization_needed) - load_config(); - - if (post_parse_analyze_hook_original) - post_parse_analyze_hook_original(pstate, query); -} - -/* - * Planner hook. It disables inheritance for tables that have been partitioned - * by pathman to prevent standart PostgreSQL partitioning mechanism from - * handling that tables. - */ -PlannedStmt * -pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) -{ - PlannedStmt *result; - - if (pg_pathman_enable) - { - inheritance_disabled = false; - switch(parse->commandType) - { - case CMD_SELECT: - disable_inheritance(parse); - break; - case CMD_UPDATE: - case CMD_DELETE: - disable_inheritance_cte(parse); - disable_inheritance_subselect(parse); - handle_modification_query(parse); - break; - case CMD_INSERT: - { - ListCell *lc; - - result = standard_planner(parse, cursorOptions, boundParams); - - add_partition_filters(result->rtable, result->planTree); - foreach (lc, result->subplans) - add_partition_filters(result->rtable, (Plan *) lfirst(lc)); - - return result; - } - default: - break; - } - } - - /* Invoke original hook */ - if (planner_hook_original) - result = planner_hook_original(parse, cursorOptions, boundParams); - else - result = standard_planner(parse, cursorOptions, boundParams); - - return result; -} - /* * Disables inheritance for partitioned by pathman relations. It must be done to * prevent PostgresSQL from full search. */ -static void +void disable_inheritance(Query *parse) { ListCell *lc; @@ -330,7 +238,7 @@ disable_inheritance(Query *parse) } } -static void +void disable_inheritance_cte(Query *parse) { ListCell *lc; @@ -344,7 +252,7 @@ disable_inheritance_cte(Query *parse) } } -static void +void disable_inheritance_subselect(Query *parse) { Node *quals; @@ -374,7 +282,7 @@ disable_inheritance_subselect_walker(Node *node, void *context) /* * Checks if query is affects only one partition. If true then substitute */ -static void +void handle_modification_query(Query *parse) { PartRelationInfo *prel; @@ -426,23 +334,6 @@ handle_modification_query(Query *parse) return; } -/* - * Shared memory startup hook - */ -static void -pathman_shmem_startup(void) -{ - /* Allocate shared memory objects */ - LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - init_dsm_config(); - init_shmem_config(); - LWLockRelease(AddinShmemInitLock); - - /* Invoke original hook if needed */ - if (shmem_startup_hook_original != NULL) - shmem_startup_hook_original(); -} - void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) @@ -838,7 +729,6 @@ walk_expr_tree(Expr *expr, WalkerContext *context) /* Useful for INSERT optimization */ case T_Const: return handle_const((Const *) expr, context); - /* AND, OR, NOT expressions */ case T_BoolExpr: boolexpr = (BoolExpr *) expr; @@ -930,165 +820,179 @@ decrease_hashable_value(const PartRelationInfo *prel, Datum value) } } -bool -search_range_partition(Datum value, - const PartRelationInfo *prel, const RangeRelation *rangerel, - int strategy, FmgrInfo *cmp_func, WrapperNode *result) +void +select_range_partitions(const Datum value, + const RangeRelation *rangerel, + const int strategy, + FmgrInfo *cmp_func, + WrapperNode *result) { - if (rangerel != NULL) - { - RangeEntry *re; - bool lossy = false, - is_less, - is_greater; + RangeEntry *current_re; + bool lossy = false, + is_less, + is_greater; + #ifdef USE_ASSERT_CHECKING - bool found = false; - int counter = 0; + bool found = false; + int counter = 0; #endif - int i, - startidx = 0, - cmp_min, - cmp_max, - endidx = rangerel->ranges.length - 1; - RangeEntry *ranges = dsm_array_get_pointer(&rangerel->ranges); - bool byVal = rangerel->by_val; - - /* Check boundaries */ - if (rangerel->ranges.length == 0) + + int i, + startidx = 0, + endidx = rangerel->ranges.length - 1, + cmp_min, + cmp_max; + + RangeEntry *ranges = dsm_array_get_pointer(&rangerel->ranges); + bool byVal = rangerel->by_val; + + result->found_gap = false; + + /* Check boundaries */ + if (rangerel->ranges.length == 0) + { + result->rangeset = NIL; + return; + } + else + { + Assert(cmp_func); + + /* Corner cases */ + cmp_min = FunctionCall2(cmp_func, value, + PATHMAN_GET_DATUM(ranges[startidx].min, byVal)), + cmp_max = FunctionCall2(cmp_func, value, + PATHMAN_GET_DATUM(ranges[endidx].max, byVal)); + + if ((cmp_min <= 0 && strategy == BTLessStrategyNumber) || + (cmp_min < 0 && (strategy == BTLessEqualStrategyNumber || + strategy == BTEqualStrategyNumber))) { result->rangeset = NIL; - return true; + return; } - else + + if (cmp_max >= 0 && (strategy == BTGreaterEqualStrategyNumber || + strategy == BTGreaterStrategyNumber || + strategy == BTEqualStrategyNumber)) { - Assert(cmp_func); - - /* Corner cases */ - cmp_min = FunctionCall2(cmp_func, value, - PATHMAN_GET_DATUM(ranges[0].min, byVal)), - cmp_max = FunctionCall2(cmp_func, value, - PATHMAN_GET_DATUM(ranges[rangerel->ranges.length - 1].max, byVal)); - - if ((cmp_min < 0 && - (strategy == BTLessEqualStrategyNumber || - strategy == BTEqualStrategyNumber)) || - (cmp_min <= 0 && strategy == BTLessStrategyNumber)) - { - result->rangeset = NIL; - return true; - } + result->rangeset = NIL; + return; + } - if (cmp_max >= 0 && (strategy == BTGreaterEqualStrategyNumber || - strategy == BTGreaterStrategyNumber || - strategy == BTEqualStrategyNumber)) - { - result->rangeset = NIL; - return true; - } + if ((cmp_min < 0 && strategy == BTGreaterStrategyNumber) || + (cmp_min <= 0 && strategy == BTGreaterEqualStrategyNumber)) + { + result->rangeset = list_make1_irange(make_irange(startidx, endidx, false)); + return; + } - if ((cmp_min < 0 && strategy == BTGreaterStrategyNumber) || - (cmp_min <= 0 && strategy == BTGreaterEqualStrategyNumber)) - { - result->rangeset = list_make1_irange(make_irange(startidx, endidx, false)); - return true; - } + if (cmp_max >= 0 && (strategy == BTLessEqualStrategyNumber || + strategy == BTLessStrategyNumber)) + { + result->rangeset = list_make1_irange(make_irange(startidx, endidx, false)); + return; + } + } - if (cmp_max >= 0 && (strategy == BTLessEqualStrategyNumber || - strategy == BTLessStrategyNumber)) - { - result->rangeset = list_make1_irange(make_irange(startidx, endidx, false)); - return true; - } + /* Binary search */ + while (true) + { + Assert(cmp_func); + + i = startidx + (endidx - startidx) / 2; + Assert(i >= 0 && i < rangerel->ranges.length); + + current_re = &ranges[i]; + + cmp_min = FunctionCall2(cmp_func, value, + PATHMAN_GET_DATUM(current_re->min, byVal)); + cmp_max = FunctionCall2(cmp_func, value, + PATHMAN_GET_DATUM(current_re->max, byVal)); + + is_less = (cmp_min < 0 || (cmp_min == 0 && strategy == BTLessStrategyNumber)); + is_greater = (cmp_max > 0 || (cmp_max >= 0 && strategy != BTLessStrategyNumber)); + + if (!is_less && !is_greater) + { + if (strategy == BTGreaterEqualStrategyNumber && cmp_min == 0) + lossy = false; + else if (strategy == BTLessStrategyNumber && cmp_max == 0) + lossy = false; + else + lossy = true; +#ifdef USE_ASSERT_CHECKING + found = true; +#endif + break; } - /* Binary search */ - while (true) + /* If we still haven't found partition then it doesn't exist */ + if (startidx >= endidx) { - Assert(cmp_func); + result->rangeset = NIL; + result->found_gap = true; + return; + } + + if (is_less) + endidx = i - 1; + else if (is_greater) + startidx = i + 1; - i = startidx + (endidx - startidx) / 2; - Assert(i >= 0 && i < rangerel->ranges.length); - re = &ranges[i]; - cmp_min = FunctionCall2(cmp_func, value, PATHMAN_GET_DATUM(re->min, byVal)); - cmp_max = FunctionCall2(cmp_func, value, PATHMAN_GET_DATUM(re->max, byVal)); + /* For debug's sake */ + Assert(++counter < 100); + } - is_less = (cmp_min < 0 || (cmp_min == 0 && strategy == BTLessStrategyNumber)); - is_greater = (cmp_max > 0 || (cmp_max >= 0 && strategy != BTLessStrategyNumber)); + Assert(found); - if (!is_less && !is_greater) + /* Filter partitions */ + switch(strategy) + { + case BTLessStrategyNumber: + case BTLessEqualStrategyNumber: + if (lossy) { - if (strategy == BTGreaterEqualStrategyNumber && cmp_min == 0) - lossy = false; - else if (strategy == BTLessStrategyNumber && cmp_max == 0) - lossy = false; - else - lossy = true; -#ifdef USE_ASSERT_CHECKING - found = true; -#endif - break; + result->rangeset = list_make1_irange(make_irange(i, i, true)); + if (i > 0) + result->rangeset = lcons_irange(make_irange(0, i - 1, false), + result->rangeset); } - - /* If we still didn't find partition then it doesn't exist */ - if (startidx >= endidx) + else { - result->rangeset = NIL; - return true; + result->rangeset = list_make1_irange(make_irange(0, i, false)); } + break; - if (is_less) - endidx = i - 1; - else if (is_greater) - startidx = i + 1; - - /* For debug's sake */ - Assert(++counter < 100); - } - - Assert(found); + case BTEqualStrategyNumber: + result->rangeset = list_make1_irange(make_irange(i, i, true)); + break; - /* Filter partitions */ - switch(strategy) - { - case BTLessStrategyNumber: - case BTLessEqualStrategyNumber: - if (lossy) - { - result->rangeset = list_make1_irange(make_irange(i, i, true)); - if (i > 0) - result->rangeset = lcons_irange( - make_irange(0, i - 1, false), result->rangeset); - } - else - { - result->rangeset = list_make1_irange( - make_irange(0, i, false)); - } - return true; - case BTEqualStrategyNumber: + case BTGreaterEqualStrategyNumber: + case BTGreaterStrategyNumber: + if (lossy) + { result->rangeset = list_make1_irange(make_irange(i, i, true)); - return true; - case BTGreaterEqualStrategyNumber: - case BTGreaterStrategyNumber: - if (lossy) - { - result->rangeset = list_make1_irange(make_irange(i, i, true)); - if (i < prel->children_count - 1) - result->rangeset = lappend_irange(result->rangeset, - make_irange(i + 1, prel->children_count - 1, false)); - } - else - { - result->rangeset = list_make1_irange( - make_irange(i, prel->children_count - 1, false)); - } - return true; - } - result->rangeset = list_make1_irange(make_irange(startidx, endidx, true)); - return true; - } + if (i < rangerel->ranges.length - 1) + result->rangeset = + lappend_irange(result->rangeset, + make_irange(i + 1, + rangerel->ranges.length - 1, + false)); + } + else + { + result->rangeset = + list_make1_irange(make_irange(i, + rangerel->ranges.length - 1, + false)); + } + break; - return false; + default: + elog(ERROR, "Unknown btree strategy (%u)", strategy); + break; + } } /* @@ -1135,6 +1039,7 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, context->greatest = value; context->hasGreatest = true; } + /* go to end */ } else if (strategy == BTGreaterStrategyNumber || strategy == BTGreaterEqualStrategyNumber) @@ -1148,19 +1053,24 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, context->least = value; context->hasLeast = true; } + /* go to end */ } else if (strategy == BTEqualStrategyNumber) { int_value = DatumGetInt32(c->constvalue); key.hash = make_hash(prel, int_value); result->rangeset = list_make1_irange(make_irange(key.hash, key.hash, true)); - return; + return; /* exit on equal */ } + case PT_RANGE: value = c->constvalue; rangerel = get_pathman_range_relation(prel->key.relid, NULL); - if (search_range_partition(value, prel, rangerel, strategy, &cmp_func, result)) + if (rangerel) + { + select_range_partitions(value, rangerel, strategy, &cmp_func, result); return; + } } result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, true)); @@ -1207,65 +1117,40 @@ make_hash(const PartRelationInfo *prel, int value) return value % prel->children_count; } -/* - * Search for range section. Returns position of the item in array. - * If item wasn't found then function returns closest position and sets - * foundPtr to false. If value is outside the range covered by partitions - * then returns -1. - */ -int -range_binary_search(const RangeRelation *rangerel, FmgrInfo *cmp_func, Datum value, bool *foundPtr) +search_rangerel_result +search_range_partition_eq(const Datum value, + const RangeRelation *rangerel, + FmgrInfo *cmp_func, + int *part_idx) { - RangeEntry *ranges = dsm_array_get_pointer(&rangerel->ranges); - RangeEntry *re; - bool byVal = rangerel->by_val; - int cmp_min, - cmp_max, - i = 0, - startidx = 0, - endidx = rangerel->ranges.length-1; -#ifdef USE_ASSERT_CHECKING - int counter = 0; -#endif + WrapperNode result; - *foundPtr = false; + Assert(rangerel); - /* Check boundaries */ - cmp_min = FunctionCall2(cmp_func, value, PATHMAN_GET_DATUM(ranges[0].min, byVal)), - cmp_max = FunctionCall2(cmp_func, value, PATHMAN_GET_DATUM(ranges[rangerel->ranges.length - 1].max, byVal)); + select_range_partitions(value, rangerel, + BTEqualStrategyNumber, + cmp_func, &result); - if (cmp_min < 0 || cmp_max >= 0) + if (result.found_gap) { - return -1; + *part_idx = -1; + return SEARCH_RANGEREL_GAP; } - - while (true) + else if (result.rangeset == NIL) { - i = startidx + (endidx - startidx) / 2; - Assert(i >= 0 && i < rangerel->ranges.length); - re = &ranges[i]; - cmp_min = FunctionCall2(cmp_func, value, PATHMAN_GET_DATUM(re->min, byVal)); - cmp_max = FunctionCall2(cmp_func, value, PATHMAN_GET_DATUM(re->max, byVal)); - - if (cmp_min >= 0 && cmp_max < 0) - { - *foundPtr = true; - break; - } + *part_idx = -1; + return SEARCH_RANGEREL_OUT_OF_RANGE; + } + else + { + IndexRange irange = lfirst_irange(list_head(result.rangeset)); - if (startidx >= endidx) - return i; + Assert(list_length(result.rangeset) == 1); + Assert(irange_lower(irange) == irange_upper(irange)); - if (cmp_min < 0) - endidx = i - 1; - else if (cmp_max >= 0) - startidx = i + 1; - - /* For debug's sake */ - Assert(++counter < 100); + *part_idx = irange_lower(irange); + return SEARCH_RANGEREL_FOUND; } - - return i; } static Const * @@ -1284,47 +1169,48 @@ static WrapperNode * handle_const(const Const *c, WalkerContext *context) { const PartRelationInfo *prel = context->prel; - - WrapperNode *result = (WrapperNode *)palloc(sizeof(WrapperNode)); + WrapperNode *result = (WrapperNode *) palloc(sizeof(WrapperNode)); switch (prel->parttype) { case PT_HASH: - { - HashRelationKey key; - int int_value = DatumGetInt32(c->constvalue); - - key.hash = make_hash(prel, int_value); - result->rangeset = list_make1_irange(make_irange(key.hash, key.hash, true)); + { + HashRelationKey key; + int int_value = DatumGetInt32(c->constvalue); - return result; - } + key.hash = make_hash(prel, int_value); + result->rangeset = list_make1_irange(make_irange(key.hash, key.hash, true)); + } + break; case PT_RANGE: - { - Oid cmp_proc_oid; - FmgrInfo cmp_func; - RangeRelation *rangerel; - TypeCacheEntry *tce; - - tce = lookup_type_cache(c->consttype, 0); - cmp_proc_oid = get_opfamily_proc(tce->btree_opf, - c->consttype, - c->consttype, - BTORDER_PROC); - fmgr_info(cmp_proc_oid, &cmp_func); - rangerel = get_pathman_range_relation(prel->key.relid, NULL); - if (search_range_partition(c->constvalue, prel, rangerel, - BTEqualStrategyNumber, &cmp_func, result)) - return result; - /* else fallhrough */ - } + { + Oid cmp_proc_oid; + FmgrInfo cmp_func; + RangeRelation *rangerel; + TypeCacheEntry *tce; + + tce = lookup_type_cache(c->consttype, 0); + cmp_proc_oid = get_opfamily_proc(tce->btree_opf, + c->consttype, + c->consttype, + BTORDER_PROC); + fmgr_info(cmp_proc_oid, &cmp_func); + rangerel = get_pathman_range_relation(prel->key.relid, NULL); + select_range_partitions(c->constvalue, rangerel, + BTEqualStrategyNumber, + &cmp_func, result); + + } + break; default: result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, true)); result->paramsel = 1.0; - return result; + break; } + + return result; } /* @@ -1866,24 +1752,6 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, } -//--------------------------------------------------------------- - -/* - * Returns the same list in reversed order. - */ -static List * -list_reverse(List *l) -{ - List *result = NIL; - ListCell *lc; - - foreach (lc, l) - { - result = lcons(lfirst(lc), result); - } - return result; -} - /* * generate_mergeappend_paths * Generate MergeAppend paths for an append relation diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 0a7346c2b0..371a987546 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -17,6 +17,7 @@ #include "catalog/pg_type.h" #include "executor/spi.h" #include "storage/lmgr.h" +#include "utils.h" /* declarations */ @@ -92,17 +93,17 @@ on_partitions_removed(PG_FUNCTION_ARGS) Datum find_or_create_range_partition(PG_FUNCTION_ARGS) { - int relid = DatumGetInt32(PG_GETARG_DATUM(0)); - Datum value = PG_GETARG_DATUM(1); - Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); - int pos; - bool found; - RangeRelation *rangerel; - RangeEntry *ranges; - TypeCacheEntry *tce; - PartRelationInfo *prel; - Oid cmp_proc_oid; - FmgrInfo cmp_func; + int relid = DatumGetInt32(PG_GETARG_DATUM(0)); + Datum value = PG_GETARG_DATUM(1); + Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); + int pos; + RangeRelation *rangerel; + RangeEntry *ranges; + TypeCacheEntry *tce; + PartRelationInfo *prel; + Oid cmp_proc_oid; + FmgrInfo cmp_func; + search_rangerel_result search_state; tce = lookup_type_cache(value_type, TYPECACHE_EQ_OPR | TYPECACHE_LT_OPR | TYPECACHE_GT_OPR | @@ -121,17 +122,18 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) fmgr_info(cmp_proc_oid, &cmp_func); ranges = dsm_array_get_pointer(&rangerel->ranges); - pos = range_binary_search(rangerel, &cmp_func, value, &found); + search_state = search_range_partition_eq(value, rangerel, + &cmp_func, &pos); /* - * If found then just return oid. Else create new partitions + * If found then just return oid, else create new partitions */ - if (found) + if (search_state == SEARCH_RANGEREL_FOUND) PG_RETURN_OID(ranges[pos].child_oid); /* * If not found and value is between first and last partitions - */ - if (!found && pos >= 0) + */ + else if (search_state == SEARCH_RANGEREL_GAP) PG_RETURN_NULL(); else { @@ -148,13 +150,16 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) * Check if someone else has already created partition. */ ranges = dsm_array_get_pointer(&rangerel->ranges); - pos = range_binary_search(rangerel, &cmp_func, value, &found); - if (found) + search_state = search_range_partition_eq(value, rangerel, + &cmp_func, &pos); + if (search_state == SEARCH_RANGEREL_FOUND) { LWLockRelease(pmstate->edit_partitions_lock); LWLockRelease(pmstate->load_config_lock); PG_RETURN_OID(ranges[pos].child_oid); } + else + Assert(search_state != SEARCH_RANGEREL_GAP); /* Start background worker to create new partitions */ child_oid = create_partitions_bg_worker(relid, value, value_type, &crashed); @@ -167,12 +172,12 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) } /* Repeat binary search */ - (void) range_binary_search(rangerel, &cmp_func, value, &found); - if (found) - PG_RETURN_OID(child_oid); + Assert(SEARCH_RANGEREL_FOUND == search_range_partition_eq(value, + rangerel, + &cmp_func, + &pos)); + PG_RETURN_OID(child_oid); } - - PG_RETURN_NULL(); } /* @@ -198,7 +203,7 @@ get_partition_range(PG_FUNCTION_ARGS) ArrayType *arr; prel = get_pathman_relation_info(parent_oid, NULL); - + rangerel = get_pathman_range_relation(parent_oid, NULL); if (!prel || !rangerel) @@ -331,8 +336,8 @@ check_overlap(PG_FUNCTION_ARGS) PartRelationInfo *prel; RangeRelation *rangerel; RangeEntry *ranges; - FmgrInfo cmp_func_1; - FmgrInfo cmp_func_2; + FmgrInfo cmp_func_1; + FmgrInfo cmp_func_2; int i; bool byVal; @@ -343,8 +348,8 @@ check_overlap(PG_FUNCTION_ARGS) PG_RETURN_NULL(); /* comparison functions */ - cmp_func_1 = *get_cmp_func(p1_type, prel->atttype); - cmp_func_2 = *get_cmp_func(p2_type, prel->atttype); + fill_type_cmp_fmgr_info(&cmp_func_1, p1_type, prel->atttype); + fill_type_cmp_fmgr_info(&cmp_func_2, p2_type, prel->atttype); byVal = rangerel->by_val; ranges = (RangeEntry *) dsm_array_get_pointer(&rangerel->ranges); @@ -379,12 +384,3 @@ release_partitions_lock(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } - -// Datum - -// names = stringToQualifiedNameList(class_name_or_oid); - -// ident -// bool -// SplitIdentifierString(char *rawstring, char separator, -// List **namelist) \ No newline at end of file diff --git a/src/runtime_merge_append.c b/src/runtime_merge_append.c index 9ed120a09b..8aac736c2b 100644 --- a/src/runtime_merge_append.c +++ b/src/runtime_merge_append.c @@ -441,7 +441,8 @@ runtimemergeappend_rescan(CustomScanState *node) * initialize sort-key information */ scan_state->ms_nkeys = scan_state->numCols; - scan_state->ms_sortkeys = palloc0(sizeof(SortSupportData) * scan_state->numCols); + scan_state->ms_sortkeys = (SortSupport) + palloc0(sizeof(SortSupportData) * scan_state->numCols); for (i = 0; i < scan_state->numCols; i++) { diff --git a/src/runtime_merge_append.h b/src/runtime_merge_append.h index 3fc7f4c867..1d32a21018 100644 --- a/src/runtime_merge_append.h +++ b/src/runtime_merge_append.h @@ -51,7 +51,8 @@ extern CustomExecMethods runtime_merge_append_exec_methods; void init_runtime_merge_append_static_data(void); -Path * create_runtimemergeappend_path(PlannerInfo *root, AppendPath *inner_append, +Path * create_runtimemergeappend_path(PlannerInfo *root, + AppendPath *inner_append, ParamPathInfo *param_info, double sel); @@ -61,7 +62,9 @@ Plan * create_runtimemergeappend_plan(PlannerInfo *root, RelOptInfo *rel, Node * runtimemergeappend_create_scan_state(CustomScan *node); -void runtimemergeappend_begin(CustomScanState *node, EState *estate, int eflags); +void runtimemergeappend_begin(CustomScanState *node, + EState *estate, + int eflags); TupleTableSlot * runtimemergeappend_exec(CustomScanState *node); @@ -69,6 +72,8 @@ void runtimemergeappend_end(CustomScanState *node); void runtimemergeappend_rescan(CustomScanState *node); -void runtimemergeappend_explain(CustomScanState *node, List *ancestors, ExplainState *es); +void runtimemergeappend_explain(CustomScanState *node, + List *ancestors, + ExplainState *es); #endif diff --git a/src/runtimeappend.h b/src/runtimeappend.h index d2ca2b6f55..db2e4aca60 100644 --- a/src/runtimeappend.h +++ b/src/runtimeappend.h @@ -63,7 +63,8 @@ extern CustomExecMethods runtimeappend_exec_methods; void init_runtimeappend_static_data(void); -Path * create_runtimeappend_path(PlannerInfo *root, AppendPath *inner_append, +Path * create_runtimeappend_path(PlannerInfo *root, + AppendPath *inner_append, ParamPathInfo *param_info, double sel); @@ -73,7 +74,9 @@ Plan * create_runtimeappend_plan(PlannerInfo *root, RelOptInfo *rel, Node * runtimeappend_create_scan_state(CustomScan *node); -void runtimeappend_begin(CustomScanState *node, EState *estate, int eflags); +void runtimeappend_begin(CustomScanState *node, + EState *estate, + int eflags); TupleTableSlot * runtimeappend_exec(CustomScanState *node); @@ -81,6 +84,8 @@ void runtimeappend_end(CustomScanState *node); void runtimeappend_rescan(CustomScanState *node); -void runtimeappend_explain(CustomScanState *node, List *ancestors, ExplainState *es); +void runtimeappend_explain(CustomScanState *node, + List *ancestors, + ExplainState *es); #endif diff --git a/src/utils.c b/src/utils.c index f907cc5c35..dced66a71f 100644 --- a/src/utils.c +++ b/src/utils.c @@ -8,12 +8,16 @@ * ------------------------------------------------------------------------ */ #include "utils.h" +#include "access/nbtree.h" +#include "executor/spi.h" #include "nodes/nodeFuncs.h" #include "nodes/makefuncs.h" #include "optimizer/var.h" #include "optimizer/restrictinfo.h" #include "parser/parse_param.h" #include "utils/builtins.h" +#include "utils/snapmgr.h" +#include "utils/lsyscache.h" #include "rewrite/rewriteManip.h" #include "catalog/heap.h" @@ -171,7 +175,7 @@ append_trigger_descs(TriggerDesc *src, TriggerDesc *more, bool *grown_up) *grown_up = true; new_desc->numtriggers = src->numtriggers + more->numtriggers; - new_desc->triggers = palloc(new_desc->numtriggers * sizeof(Trigger)); + new_desc->triggers = (Trigger *) palloc(new_desc->numtriggers * sizeof(Trigger)); cur_trigger = new_desc->triggers; @@ -210,3 +214,58 @@ append_trigger_descs(TriggerDesc *src, TriggerDesc *more, bool *grown_up) return new_desc; } + +Oid +add_missing_partition(Oid partitioned_table, Const *value) +{ + bool crashed; + Oid result = InvalidOid; + + SPI_connect(); + PushActiveSnapshot(GetTransactionSnapshot()); + + /* Create partitions */ + result = create_partitions(partitioned_table, + value->constvalue, + value->consttype, + &crashed); + + /* Cleanup */ + SPI_finish(); + PopActiveSnapshot(); + + return result; +} + +void +fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, Oid type2) +{ + Oid cmp_proc_oid; + TypeCacheEntry *tce; + + tce = lookup_type_cache(type1, + TYPECACHE_BTREE_OPFAMILY | + TYPECACHE_CMP_PROC | + TYPECACHE_CMP_PROC_FINFO); + + cmp_proc_oid = get_opfamily_proc(tce->btree_opf, + type1, + type2, + BTORDER_PROC); + fmgr_info(cmp_proc_oid, finfo); + + return; +} + +List * +list_reverse(List *l) +{ + List *result = NIL; + ListCell *lc; + + foreach (lc, l) + { + result = lcons(lfirst(lc), result); + } + return result; +} diff --git a/src/utils.h b/src/utils.h index ebd1d1546f..69e8545500 100644 --- a/src/utils.h +++ b/src/utils.h @@ -14,17 +14,13 @@ #include "utils/rel.h" #include "nodes/relation.h" #include "nodes/nodeFuncs.h" +#include "pathman.h" -typedef struct -{ - RelOptInfo *child; - RelOptInfo *parent; - int sublevels_up; -} ReplaceVarsContext; bool clause_contains_params(Node *clause); -List * build_index_tlist(PlannerInfo *root, IndexOptInfo *index, +List * build_index_tlist(PlannerInfo *root, + IndexOptInfo *index, Relation heapRelation); bool check_rinfo_for_partitioned_attr(List *rinfo, @@ -32,7 +28,15 @@ bool check_rinfo_for_partitioned_attr(List *rinfo, AttrNumber varattno); TriggerDesc * append_trigger_descs(TriggerDesc *src, - TriggerDesc *more, - bool *grown_up); + TriggerDesc *more, + bool *grown_up); + +Oid add_missing_partition(Oid partitioned_table, Const *value); + +void fill_type_cmp_fmgr_info(FmgrInfo *finfo, + Oid type1, + Oid type2); + +List * list_reverse(List *l); #endif diff --git a/src/worker.c b/src/worker.c index 79574cf673..795ee28b68 100644 --- a/src/worker.c +++ b/src/worker.c @@ -7,6 +7,7 @@ #include "access/xact.h" #include "utils/snapmgr.h" #include "utils/typcache.h" +#include "utils.h" /*------------------------------------------------------------------------- * @@ -28,11 +29,11 @@ typedef struct PartitionArgs { Oid dbid; Oid relid; - #ifdef HAVE_INT64_TIMESTAMP +#ifdef HAVE_INT64_TIMESTAMP int64 value; - #else +#else double value; - #endif +#endif Oid value_type; bool by_val; Oid result; @@ -142,7 +143,11 @@ bg_worker_main(Datum main_arg) PushActiveSnapshot(GetTransactionSnapshot()); /* Create partitions */ - args->result = create_partitions(args->relid, PATHMAN_GET_DATUM(args->value, args->by_val), args->value_type, &args->crashed); + args->result = create_partitions(args->relid, + PATHMAN_GET_DATUM(args->value, + args->by_val), + args->value_type, + &args->crashed); /* Cleanup */ SPI_finish(); @@ -158,18 +163,18 @@ bg_worker_main(Datum main_arg) Oid create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) { - int ret; - RangeEntry *ranges; - Datum vals[2]; - Oid oids[] = {OIDOID, value_type}; - bool nulls[] = {false, false}; - char *sql; - bool found; - int pos; - PartRelationInfo *prel; - RangeRelation *rangerel; - FmgrInfo cmp_func; - char *schema; + int ret; + RangeEntry *ranges; + Datum vals[2]; + Oid oids[] = {OIDOID, value_type}; + bool nulls[] = {false, false}; + char *sql; + int pos; + PartRelationInfo *prel; + RangeRelation *rangerel; + FmgrInfo cmp_func; + char *schema; + search_rangerel_result search_state; *crashed = false; schema = get_extension_schema(); @@ -178,7 +183,7 @@ create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) rangerel = get_pathman_range_relation(relid, NULL); /* Comparison function */ - cmp_func = *get_cmp_func(value_type, prel->atttype); + fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); vals[0] = ObjectIdGetDatum(relid); vals[1] = value; @@ -208,8 +213,9 @@ create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) /* Repeat binary search */ ranges = dsm_array_get_pointer(&rangerel->ranges); - pos = range_binary_search(rangerel, &cmp_func, value, &found); - if (found) + search_state = search_range_partition_eq(value, rangerel, + &cmp_func, &pos); + if (search_state == SEARCH_RANGEREL_FOUND) return ranges[pos].child_oid; return 0; From 7dc0118ee0e21063398fb71241e5f50fe3f2735e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 1 Jun 2016 14:57:23 +0300 Subject: [PATCH 010/184] change IndexRange's implementation --- src/hooks.c | 4 +- src/nodes_common.c | 6 +-- src/partition_filter.c | 1 + src/pathman.h | 51 ++++++++++++++++++-------- src/pg_pathman.c | 15 ++++---- src/rangeset.c | 83 ++++++++++++++++-------------------------- 6 files changed, 80 insertions(+), 80 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 1c262051b6..3ed06abfc9 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -220,7 +220,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb rte->inh = true; dsm_arr = (Oid *) dsm_array_get_pointer(&prel->children); - ranges = list_make1_int(make_irange(0, prel->children_count - 1, false)); + ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); /* Make wrappers over restrictions and collect final rangeset */ context.prel = prel; @@ -282,7 +282,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb { IndexRange irange = lfirst_irange(lc); - for (i = irange_lower(irange); i <= irange_upper(irange); i++) + for (i = irange.ir_lower; i <= irange.ir_upper; i++) append_child_relation(root, rel, rti, rte, i, dsm_arr[i], wrappers); } diff --git a/src/nodes_common.c b/src/nodes_common.c index 1e8d64eec0..9ea729113d 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -217,8 +217,8 @@ get_partition_oids(List *ranges, int *n, PartRelationInfo *prel) foreach (range_cell, ranges) { int i; - int a = irange_lower(lfirst_irange(range_cell)); - int b = irange_upper(lfirst_irange(range_cell)); + int a = lfirst_irange(range_cell).ir_lower; + int b = lfirst_irange(range_cell).ir_upper; for (i = a; i <= b; i++) { @@ -498,7 +498,7 @@ rescan_append_common(CustomScanState *node) int nparts; WalkerContext wcxt; - ranges = list_make1_int(make_irange(0, prel->children_count - 1, false)); + ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); wcxt.prel = prel; wcxt.econtext = econtext; diff --git a/src/partition_filter.c b/src/partition_filter.c index 1c9375f98c..6e80066944 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -173,6 +173,7 @@ partition_filter_exec(CustomScanState *node) else if (nparts == 0) selected_partid = add_missing_partition(state->partitioned_table, &state->temp_const); + else selected_partid = parts[0]; diff --git a/src/pathman.h b/src/pathman.h index 788837c8e5..66f290ebcd 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -139,27 +139,46 @@ extern PathmanState *pmstate; #define PATHMAN_GET_DATUM(value, by_val) ( (by_val) ? (value) : PointerGetDatum(&value) ) -typedef uint32 IndexRange; -#define RANGE_INFINITY 0x7FFF -#define RANGE_LOSSY 0x80000000 +typedef struct { + bool ir_valid : 1; + bool ir_lossy : 1; + uint32 ir_lower : 31; + uint32 ir_upper : 31; +} IndexRange; -#define make_irange(lower, upper, lossy) \ - (((lower) & RANGE_INFINITY) << 15 | ((upper) & RANGE_INFINITY) | ((lossy) ? RANGE_LOSSY : 0)) +#define RANGE_MASK 0xEFFFFFFF -#define irange_lower(irange) \ - (((irange) >> 15) & RANGE_INFINITY) +#define InvalidIndexRange { false, false, 0, 0 } -#define irange_upper(irange) \ - ((irange) & RANGE_INFINITY) +inline static IndexRange +make_irange(uint32 lower, uint32 upper, bool lossy) +{ + IndexRange result; + + result.ir_valid = true; + result.ir_lossy = lossy; + result.ir_lower = (lower & RANGE_MASK); + result.ir_upper = (upper & RANGE_MASK); + + return result; +} + +inline static IndexRange * +alloc_irange(IndexRange irange) +{ + IndexRange *result = (IndexRange *) palloc(sizeof(IndexRange)); + + memcpy((void *) result, (void *) &irange, sizeof(IndexRange)); -#define irange_is_lossy(irange) \ - ((irange) & RANGE_LOSSY) + return result; +} -#define lfirst_irange(lc) ((IndexRange)(lc)->data.int_value) -#define lappend_irange(list, irange) (lappend_int((list), (int)(irange))) -#define lcons_irange(irange, list) lcons_int((int)(irange), (list)) -#define list_make1_irange(irange) lcons_int((int)(irange), NIL) -#define llast_irange(l) (IndexRange)lfirst_int(list_tail(l)) +#define lfirst_irange(lc) ( *(IndexRange *) lfirst(lc) ) +#define lappend_irange(list, irange) ( lappend((list), alloc_irange(irange)) ) +#define lcons_irange(irange, list) ( lcons(alloc_irange(irange), (list)) ) +#define list_make1_irange(irange) ( lcons(alloc_irange(irange), NIL) ) +#define llast_irange(list) ( lfirst_irange(list_tail(list)) ) +#define linitial_irange(list) ( lfirst_irange(list_head(list)) ) /* rangeset.c */ bool irange_intersects(IndexRange a, IndexRange b); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index b32119e0c9..fae6fefd91 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -304,7 +304,7 @@ handle_modification_query(Query *parse) return; /* Parse syntax tree and extract partition ranges */ - ranges = list_make1_int(make_irange(0, prel->children_count - 1, false)); + ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); expr = (Expr *) eval_const_expressions(NULL, parse->jointree->quals); if (!expr) return; @@ -322,11 +322,11 @@ handle_modification_query(Query *parse) /* If only one partition is affected then substitute parent table with partition */ if (irange_list_length(ranges) == 1) { - IndexRange irange = (IndexRange) linitial_oid(ranges); - if (irange_lower(irange) == irange_upper(irange)) + IndexRange irange = linitial_irange(ranges); + if (irange.ir_lower == irange.ir_upper) { Oid *children = (Oid *) dsm_array_get_pointer(&prel->children); - rte->relid = children[irange_lower(irange)]; + rte->relid = children[irange.ir_lower]; rte->inh = false; } } @@ -1143,12 +1143,13 @@ search_range_partition_eq(const Datum value, } else { - IndexRange irange = lfirst_irange(list_head(result.rangeset)); + IndexRange irange = linitial_irange(result.rangeset); Assert(list_length(result.rangeset) == 1); - Assert(irange_lower(irange) == irange_upper(irange)); + Assert(irange.ir_lower == irange.ir_upper); + Assert(irange.ir_valid); - *part_idx = irange_lower(irange); + *part_idx = irange.ir_lower; return SEARCH_RANGEREL_FOUND; } } diff --git a/src/rangeset.c b/src/rangeset.c index cfc6c7072d..9c6e197031 100644 --- a/src/rangeset.c +++ b/src/rangeset.c @@ -13,58 +13,37 @@ bool irange_intersects(IndexRange a, IndexRange b) { - return (irange_lower(a) <= irange_upper(b)) && - (irange_lower(b) <= irange_upper(a)); + return (a.ir_lower <= b.ir_upper) && + (b.ir_lower <= a.ir_upper); } /* Check if two ranges are conjuncted */ bool irange_conjuncted(IndexRange a, IndexRange b) { - return (irange_lower(a) - 1 <= irange_upper(b)) && - (irange_lower(b) - 1 <= irange_upper(a)); + return (a.ir_lower - 1 <= b.ir_upper) && + (b.ir_lower - 1 <= a.ir_upper); } /* Make union of two ranges. They should have the same lossiness. */ IndexRange irange_union(IndexRange a, IndexRange b) { - Assert(irange_is_lossy(a) == irange_is_lossy(b)); - return make_irange(Min(irange_lower(a), irange_lower(b)), - Max(irange_upper(a), irange_upper(b)), - irange_is_lossy(a)); + Assert(a.ir_lossy == b.ir_lossy); + return make_irange(Min(a.ir_lower, b.ir_lower), + Max(a.ir_upper, b.ir_upper), + a.ir_lossy); } /* Get intersection of two ranges */ IndexRange irange_intersect(IndexRange a, IndexRange b) { - return make_irange(Max(irange_lower(a), irange_lower(b)), - Min(irange_upper(a), irange_upper(b)), - irange_is_lossy(a) || irange_is_lossy(b)); + return make_irange(Max(a.ir_lower, b.ir_lower), + Min(a.ir_upper, b.ir_upper), + a.ir_lossy || b.ir_lossy); } -#ifdef NOT_USED -/* Print range list in debug purposes */ -static char * -print_irange(List *l) -{ - ListCell *c; - StringInfoData str; - - initStringInfo(&str); - - foreach (c, l) - { - IndexRange ir = lfirst_irange(c); - - appendStringInfo(&str, "[%d,%d]%c ", irange_lower(ir), irange_upper(ir), - irange_is_lossy(ir) ? 'l' : 'e'); - } - return str.data; -} -#endif - /* * Make union of two index rage lists. */ @@ -74,7 +53,7 @@ irange_list_union(List *a, List *b) ListCell *ca, *cb; List *result = NIL; - IndexRange cur = 0; + IndexRange cur = InvalidIndexRange; bool have_cur = false; ca = list_head(a); @@ -82,12 +61,12 @@ irange_list_union(List *a, List *b) while (ca || cb) { - IndexRange next = 0; + IndexRange next = InvalidIndexRange; /* Fetch next range with lesser lower bound */ if (ca && cb) { - if (irange_lower(lfirst_irange(ca)) <= irange_lower(lfirst_irange(cb))) + if (lfirst_irange(ca).ir_lower <= lfirst_irange(cb).ir_lower) { next = lfirst_irange(ca); ca = lnext(ca); @@ -122,25 +101,25 @@ irange_list_union(List *a, List *b) /* * Ranges are conjuncted, try to unify them. */ - if (irange_is_lossy(next) == irange_is_lossy(cur)) + if (next.ir_lossy == cur.ir_lossy) { cur = irange_union(next, cur); } else { - if (!irange_is_lossy(cur)) + if (!cur.ir_lossy) { result = lappend_irange(result, cur); - cur = make_irange(irange_upper(cur) + 1, - irange_upper(next), - irange_is_lossy(next)); + cur = make_irange(cur.ir_upper + 1, + next.ir_upper, + next.ir_lossy); } else { - result = lappend_irange(result, - make_irange(irange_lower(cur), - irange_lower(next) - 1, - irange_is_lossy(cur))); + result = lappend_irange(result, + make_irange(cur.ir_lower, + next.ir_lower - 1, + cur.ir_lossy)); cur = next; } } @@ -196,10 +175,10 @@ irange_list_intersect(List *a, List *b) if (result != NIL) { last = llast_irange(result); - if (irange_conjuncted(last, intersect) && - irange_is_lossy(last) == irange_is_lossy(intersect)) + if (irange_conjuncted(last, intersect) && + last.ir_lossy == intersect.ir_lossy) { - llast_int(result) = irange_union(last, intersect); + llast(result) = alloc_irange(irange_union(last, intersect)); } else { @@ -217,9 +196,9 @@ irange_list_intersect(List *a, List *b) * which lists to fetch, since lower bound of next range is greater (or * equal) to upper bound of current. */ - if (irange_upper(ra) <= irange_upper(rb)) + if (ra.ir_upper <= rb.ir_upper) ca = lnext(ca); - if (irange_upper(ra) >= irange_upper(rb)) + if (ra.ir_upper >= rb.ir_upper) cb = lnext(cb); } return result; @@ -235,7 +214,7 @@ irange_list_length(List *rangeset) foreach (lc, rangeset) { IndexRange irange = lfirst_irange(lc); - result += irange_upper(irange) - irange_lower(irange) + 1; + result += irange.ir_upper - irange.ir_lower + 1; } return result; } @@ -249,10 +228,10 @@ irange_list_find(List *rangeset, int index, bool *lossy) foreach (lc, rangeset) { IndexRange irange = lfirst_irange(lc); - if (index >= irange_lower(irange) && index <= irange_upper(irange)) + if (index >= irange.ir_lower && index <= irange.ir_upper) { if (lossy) - *lossy = irange_is_lossy(irange) ? true : false; + *lossy = irange.ir_lossy; return true; } } From 9d8590c090f0343f47a0e400c88f77026c7ebf85 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 1 Jun 2016 17:04:16 +0300 Subject: [PATCH 011/184] remove redundant includes --- src/dsm_array.c | 4 +--- src/hooks.c | 1 - src/init.c | 3 --- src/nodes_common.c | 1 - src/partition_filter.c | 1 - src/partition_filter.h | 2 -- src/pathman.h | 10 ++++++---- src/pg_pathman.c | 6 +++--- src/pl_funcs.c | 7 +------ src/runtime_merge_append.c | 2 -- src/utils.c | 3 --- 11 files changed, 11 insertions(+), 29 deletions(-) diff --git a/src/dsm_array.c b/src/dsm_array.c index ac61a4ecad..c4fefa842e 100644 --- a/src/dsm_array.c +++ b/src/dsm_array.c @@ -12,8 +12,6 @@ #include "pathman.h" #include "storage/shmem.h" #include "storage/dsm.h" -#include "storage/lwlock.h" -#include static dsm_segment *segment = NULL; @@ -95,7 +93,7 @@ init_dsm_segment(size_t blocks_count, size_t block_size) ret = false; segment = dsm_attach(dsm_cfg->segment_handle); } - + /* * If segment hasn't been created yet or has already been destroyed * (it happens when last session detaches segment) then create new one diff --git a/src/hooks.c b/src/hooks.c index 3ed06abfc9..8cbfc38cc5 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -10,7 +10,6 @@ #include "postgres.h" #include "optimizer/cost.h" #include "optimizer/restrictinfo.h" -#include "utils/guc.h" #include "pathman.h" #include "hooks.h" #include "partition_filter.h" diff --git a/src/init.c b/src/init.c index f08b0b9394..492e53a68e 100644 --- a/src/init.c +++ b/src/init.c @@ -11,15 +11,12 @@ #include "miscadmin.h" #include "executor/spi.h" #include "catalog/pg_type.h" -#include "catalog/pg_class.h" #include "catalog/pg_constraint.h" -#include "catalog/pg_operator.h" #include "access/htup_details.h" #include "utils/syscache.h" #include "utils/builtins.h" #include "utils/typcache.h" #include "utils/lsyscache.h" -#include "utils/bytea.h" #include "utils/snapmgr.h" #include "optimizer/clauses.h" diff --git a/src/nodes_common.c b/src/nodes_common.c index 9ea729113d..731f75be20 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -8,7 +8,6 @@ * ------------------------------------------------------------------------ */ #include "postgres.h" -#include "optimizer/paths.h" #include "nodes_common.h" #include "runtimeappend.h" #include "optimizer/restrictinfo.h" diff --git a/src/partition_filter.c b/src/partition_filter.c index 6e80066944..6c05d2977e 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -1,7 +1,6 @@ #include "partition_filter.h" #include "utils.h" #include "utils/guc.h" -#include "utils/lsyscache.h" #include "nodes/nodeFuncs.h" diff --git a/src/partition_filter.h b/src/partition_filter.h index fb7b529ca2..a1128d0919 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -2,8 +2,6 @@ #define RUNTIME_INSERT_H #include "postgres.h" -#include "optimizer/paths.h" -#include "optimizer/pathnode.h" #include "pathman.h" #include "nodes_common.h" diff --git a/src/pathman.h b/src/pathman.h index 66f290ebcd..dc6060049a 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -180,6 +180,12 @@ alloc_irange(IndexRange irange) #define llast_irange(list) ( lfirst_irange(list_tail(list)) ) #define linitial_irange(list) ( lfirst_irange(list_head(list)) ) + +extern HTAB *relations; +extern HTAB *range_restrictions; +extern bool initialization_needed; + + /* rangeset.c */ bool irange_intersects(IndexRange a, IndexRange b); bool irange_conjuncted(IndexRange a, IndexRange b); @@ -202,10 +208,6 @@ void *dsm_array_get_pointer(const DsmArray* arr); dsm_handle get_dsm_array_segment(void); void attach_dsm_array_segment(void); -HTAB *relations; -HTAB *range_restrictions; -bool initialization_needed; - /* initialization functions */ Size pathman_memsize(void); void init_shmem_config(void); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index fae6fefd91..a030fb9b05 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -25,18 +25,15 @@ #include "optimizer/cost.h" #include "parser/analyze.h" #include "utils/hsearch.h" -#include "utils/tqual.h" #include "utils/rel.h" #include "utils/elog.h" #include "utils/array.h" -#include "utils/date.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/selfuncs.h" #include "access/heapam.h" #include "access/nbtree.h" #include "storage/ipc.h" -#include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "foreign/fdwapi.h" #include "hooks.h" @@ -47,16 +44,19 @@ PG_MODULE_MAGIC; + typedef struct { Oid old_varno; Oid new_varno; } change_varno_context; + bool inheritance_disabled; bool pg_pathman_enable; PathmanState *pmstate; + /* pg module functions */ void _PG_init(void); diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 371a987546..9bffae1aef 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -8,15 +8,10 @@ * ------------------------------------------------------------------------ */ #include "pathman.h" +#include "access/nbtree.h" #include "utils/lsyscache.h" #include "utils/typcache.h" #include "utils/array.h" -#include "utils/snapmgr.h" -#include "access/nbtree.h" -#include "access/xact.h" -#include "catalog/pg_type.h" -#include "executor/spi.h" -#include "storage/lmgr.h" #include "utils.h" diff --git a/src/runtime_merge_append.c b/src/runtime_merge_append.c index 8aac736c2b..20dd018aa8 100644 --- a/src/runtime_merge_append.c +++ b/src/runtime_merge_append.c @@ -12,9 +12,7 @@ #include "pathman.h" -#include "optimizer/clauses.h" #include "optimizer/cost.h" -#include "optimizer/restrictinfo.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" #include "optimizer/var.h" diff --git a/src/utils.c b/src/utils.c index dced66a71f..c8a208925a 100644 --- a/src/utils.c +++ b/src/utils.c @@ -14,11 +14,8 @@ #include "nodes/makefuncs.h" #include "optimizer/var.h" #include "optimizer/restrictinfo.h" -#include "parser/parse_param.h" -#include "utils/builtins.h" #include "utils/snapmgr.h" #include "utils/lsyscache.h" -#include "rewrite/rewriteManip.h" #include "catalog/heap.h" From d42fd8f631a8cf07d74397ced6bf1739afcf8817 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 1 Jun 2016 23:09:24 +0300 Subject: [PATCH 012/184] dsm_array_get_pointer() can return palloc'ed copy, new InitWalkerContext macro, WalkerContext caches RangeEntry array --- src/dsm_array.c | 70 +++++++++++++------- src/hooks.c | 7 +- src/init.c | 22 +++---- src/nodes_common.c | 24 +++++-- src/partition_filter.c | 26 ++++++-- src/partition_filter.h | 3 + src/pathman.h | 45 +++++++++---- src/pg_pathman.c | 142 +++++++++++++++++++++++++++-------------- src/pl_funcs.c | 125 ++++++++++++++++++------------------ src/runtimeappend.h | 4 ++ src/worker.c | 11 ++-- 11 files changed, 299 insertions(+), 180 deletions(-) diff --git a/src/dsm_array.c b/src/dsm_array.c index c4fefa842e..f28b4cfff6 100644 --- a/src/dsm_array.c +++ b/src/dsm_array.c @@ -26,7 +26,14 @@ typedef struct DsmConfig static DsmConfig *dsm_cfg = NULL; -typedef int BlockHeader; + +/* + * Block header + * + * Its size must be 4 bytes for 32bit and 8 bytes for 64bit. + * Otherwise it could screw up an alignment (for example on Sparc9) + */ +typedef uintptr_t BlockHeader; typedef BlockHeader* BlockHeaderPtr; #define FREE_BIT 0x80000000 @@ -144,20 +151,22 @@ init_dsm_table(size_t block_size, size_t start, size_t end) * Allocate array inside dsm_segment */ void -alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t length) +alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count) { - int i = 0; - int size_requested = entry_size * length; - int min_pos = 0; - int max_pos = 0; - bool found = false; - bool collecting_blocks = false; - size_t offset = -1; - size_t total_length = 0; + size_t i = 0; + size_t size_requested = entry_size * elem_count; + size_t min_pos = 0; + size_t max_pos = 0; + bool found = false; + bool collecting_blocks = false; + size_t offset = -1; + size_t total_length = 0; BlockHeaderPtr header; - char *ptr = dsm_segment_address(segment); + char *ptr = dsm_segment_address(segment); + + arr->entry_size = entry_size; - for (i = dsm_cfg->first_free; iblocks_count; ) + for (i = dsm_cfg->first_free; i < dsm_cfg->blocks_count; ) { header = (BlockHeaderPtr) &ptr[i * dsm_cfg->block_size]; if (is_free(header)) @@ -204,7 +213,7 @@ alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t length) dsm_cfg->blocks_count = new_blocks_count; /* try again */ - return alloc_dsm_array(arr, entry_size, length); + return alloc_dsm_array(arr, entry_size, elem_count); } /* look up for first free block */ @@ -233,7 +242,7 @@ alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t length) *header = set_length(header, max_pos - min_pos + 1); arr->offset = offset; - arr->length = length; + arr->elem_count = elem_count; } } @@ -258,19 +267,19 @@ free_dsm_array(DsmArray *arr) dsm_cfg->first_free = start; arr->offset = 0; - arr->length = 0; + arr->elem_count = 0; } void -resize_dsm_array(DsmArray *arr, size_t entry_size, size_t length) +resize_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count) { void *array_data; size_t array_data_size; void *buffer; /* Copy data from array to temporary buffer */ - array_data = dsm_array_get_pointer(arr); - array_data_size = arr->length * entry_size; + array_data = dsm_array_get_pointer(arr, false); + array_data_size = arr->elem_count * entry_size; buffer = palloc(array_data_size); memcpy(buffer, array_data, array_data_size); @@ -278,17 +287,34 @@ resize_dsm_array(DsmArray *arr, size_t entry_size, size_t length) free_dsm_array(arr); /* Allocate new array */ - alloc_dsm_array(arr, entry_size, length); + alloc_dsm_array(arr, entry_size, elem_count); /* Copy data to new array */ - array_data = dsm_array_get_pointer(arr); + array_data = dsm_array_get_pointer(arr, false); memcpy(array_data, buffer, array_data_size); pfree(buffer); } void * -dsm_array_get_pointer(const DsmArray *arr) +dsm_array_get_pointer(const DsmArray *arr, bool copy) { - return (char *) dsm_segment_address(segment) + arr->offset + sizeof(BlockHeader); + uint8 *segment_address, + *dsm_array, + *result; + size_t size; + + segment_address = (uint8 *) dsm_segment_address(segment); + dsm_array = segment_address + arr->offset + sizeof(BlockHeader); + + if (copy) + { + size = arr->elem_count * arr->entry_size; + result = palloc(size); + memcpy((void *) result, (void *) dsm_array, size); + } + else + result = dsm_array; + + return result; } diff --git a/src/hooks.c b/src/hooks.c index 8cbfc38cc5..c1ffd4ccb4 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -218,14 +218,11 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb } rte->inh = true; - dsm_arr = (Oid *) dsm_array_get_pointer(&prel->children); + dsm_arr = (Oid *) dsm_array_get_pointer(&prel->children, true); ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); /* Make wrappers over restrictions and collect final rangeset */ - context.prel = prel; - context.econtext = NULL; - context.hasLeast = false; - context.hasGreatest = false; + InitWalkerContext(&context, prel, NULL); wrappers = NIL; foreach(lc, rel->baserestrictinfo) { diff --git a/src/init.c b/src/init.c index 492e53a68e..81aaf4d6f5 100644 --- a/src/init.c +++ b/src/init.c @@ -98,19 +98,19 @@ load_config(void) * oid into it. This array contains databases oids * that have already been cached (to prevent repeat caching) */ - if (&pmstate->databases.length > 0) + if (&pmstate->databases.elem_count > 0) free_dsm_array(&pmstate->databases); alloc_dsm_array(&pmstate->databases, sizeof(Oid), 1); - databases = (Oid *) dsm_array_get_pointer(&pmstate->databases); + databases = (Oid *) dsm_array_get_pointer(&pmstate->databases, false); databases[0] = MyDatabaseId; } else { - int databases_count = pmstate->databases.length; + int databases_count = pmstate->databases.elem_count; int i; /* Check if we already cached config for current database */ - databases = (Oid *) dsm_array_get_pointer(&pmstate->databases); + databases = (Oid *) dsm_array_get_pointer(&pmstate->databases, false); for(i = 0; i < databases_count; i++) if (databases[i] == MyDatabaseId) { @@ -120,7 +120,7 @@ load_config(void) /* Put current database oid to databases list */ resize_dsm_array(&pmstate->databases, sizeof(Oid), databases_count + 1); - databases = (Oid *) dsm_array_get_pointer(&pmstate->databases); + databases = (Oid *) dsm_array_get_pointer(&pmstate->databases, false); databases[databases_count] = MyDatabaseId; } @@ -227,7 +227,7 @@ load_relations_hashtable(bool reinitialize) switch(prel->parttype) { case PT_RANGE: - if (reinitialize && prel->children.length > 0) + if (reinitialize && prel->children.elem_count > 0) { RangeRelation *rangerel = get_pathman_range_relation(oid, NULL); free_dsm_array(&prel->children); @@ -237,7 +237,7 @@ load_relations_hashtable(bool reinitialize) load_check_constraints(oid, GetCatalogSnapshot(oid)); break; case PT_HASH: - if (reinitialize && prel->children.length > 0) + if (reinitialize && prel->children.elem_count > 0) { free_dsm_array(&prel->children); prel->children_count = 0; @@ -286,7 +286,7 @@ load_check_constraints(Oid parent_oid, Snapshot snapshot) prel = get_pathman_relation_info(parent_oid, NULL); /* Skip if already loaded */ - if (prel->children.length > 0) + if (prel->children.elem_count > 0) return; plan = SPI_prepare("select pg_constraint.* " @@ -309,7 +309,7 @@ load_check_constraints(Oid parent_oid, Snapshot snapshot) int hash; alloc_dsm_array(&prel->children, sizeof(Oid), proc); - children = (Oid *) dsm_array_get_pointer(&prel->children); + children = (Oid *) dsm_array_get_pointer(&prel->children, false); if (prel->parttype == PT_RANGE) { @@ -322,7 +322,7 @@ load_check_constraints(Oid parent_oid, Snapshot snapshot) hash_search(range_restrictions, (void *) &key, HASH_ENTER, &found); alloc_dsm_array(&rangerel->ranges, sizeof(RangeEntry), proc); - ranges = (RangeEntry *) dsm_array_get_pointer(&rangerel->ranges); + ranges = (RangeEntry *) dsm_array_get_pointer(&rangerel->ranges, false); tce = lookup_type_cache(prel->atttype, 0); rangerel->by_val = tce->typbyval; @@ -535,7 +535,7 @@ validate_hash_constraint(Expr *expr, PartRelationInfo *prel, int *hash) return false; if ( ((Var*) left)->varattno != prel->attnum ) return false; - if (DatumGetInt32(((Const*) right)->constvalue) != prel->children.length) + if (DatumGetInt32(((Const*) right)->constvalue) != prel->children.elem_count) return false; if ( !IsA(lsecond(eqexpr->args), Const) ) diff --git a/src/nodes_common.c b/src/nodes_common.c index 731f75be20..a7e9fbe6f9 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -211,7 +211,7 @@ get_partition_oids(List *ranges, int *n, PartRelationInfo *prel) int allocated = INITIAL_ALLOC_NUM; int used = 0; Oid *result = (Oid *) palloc(allocated * sizeof(Oid)); - Oid *children = dsm_array_get_pointer(&prel->children); + Oid *children = dsm_array_get_pointer(&prel->children, true); foreach (range_cell, ranges) { @@ -495,20 +495,30 @@ rescan_append_common(CustomScanState *node) ListCell *lc; Oid *parts; int nparts; - WalkerContext wcxt; ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); - wcxt.prel = prel; - wcxt.econtext = econtext; - wcxt.hasLeast = false; - wcxt.hasGreatest = false; + /* + * We'd like to persist RangeEntry array + * in case of range partitioning, so 'wcxt' + * is stored inside of RuntimeAppendState + */ + if (!scan_state->wcxt_cached) + { + scan_state->wcxt.prel = prel; + scan_state->wcxt.econtext = econtext; + scan_state->wcxt.ranges = NULL; + + scan_state->wcxt_cached = true; + } + scan_state->wcxt.hasLeast = false; /* refresh runtime values */ + scan_state->wcxt.hasGreatest = false; foreach (lc, scan_state->custom_exprs) { WrapperNode *wn; - wn = walk_expr_tree((Expr *) lfirst(lc), &wcxt); + wn = walk_expr_tree((Expr *) lfirst(lc), &scan_state->wcxt); ranges = irange_list_intersect(ranges, wn->rangeset); } diff --git a/src/partition_filter.c b/src/partition_filter.c index 6c05d2977e..2f6811ce9b 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -126,6 +126,7 @@ partition_filter_exec(CustomScanState *node) PartitionFilterState *state = (PartitionFilterState *) node; + ExprContext *econtext = node->ss.ps.ps_ExprContext; EState *estate = node->ss.ps.state; PlanState *child_ps = (PlanState *) linitial(node->custom_ps); TupleTableSlot *slot; @@ -138,7 +139,6 @@ partition_filter_exec(CustomScanState *node) if (!TupIsNull(slot)) { - WalkerContext wcxt; List *ranges; int nparts; Oid *parts; @@ -159,20 +159,34 @@ partition_filter_exec(CustomScanState *node) CopyToTempConst(constlen, attlen); CopyToTempConst(constbyval, attbyval); - wcxt.prel = state->prel; - wcxt.econtext = NULL; - wcxt.hasLeast = false; - wcxt.hasGreatest = false; + /* + * We'd like to persist RangeEntry array + * in case of range partitioning, so 'wcxt' + * is stored inside of PartitionFilterState + */ + if (!state->wcxt_cached) + { + state->wcxt.prel = state->prel; + state->wcxt.econtext = econtext; + state->wcxt.ranges = NULL; - ranges = walk_expr_tree((Expr *) &state->temp_const, &wcxt)->rangeset; + state->wcxt_cached = true; + } + state->wcxt.hasLeast = false; /* refresh runtime values */ + state->wcxt.hasGreatest = false; + + ranges = walk_expr_tree((Expr *) &state->temp_const, &state->wcxt)->rangeset; parts = get_partition_oids(ranges, &nparts, state->prel); if (nparts > 1) elog(ERROR, "PartitionFilter selected more than one partition"); else if (nparts == 0) + { selected_partid = add_missing_partition(state->partitioned_table, &state->temp_const); + refresh_walker_context_ranges(&state->wcxt); + } else selected_partid = parts[0]; diff --git a/src/partition_filter.h b/src/partition_filter.h index a1128d0919..14480b48e6 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -27,6 +27,9 @@ typedef struct HTAB *result_rels_table; HASHCTL result_rels_table_config; + + WalkerContext wcxt; + bool wcxt_cached; } PartitionFilterState; diff --git a/src/pathman.h b/src/pathman.h index dc6060049a..9616934f36 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -48,7 +48,8 @@ typedef struct DsmArray { dsm_handle segment; size_t offset; - size_t length; + size_t elem_count; + size_t entry_size; } DsmArray; /* @@ -201,10 +202,10 @@ Size get_dsm_shared_size(void); void init_dsm_config(void); bool init_dsm_segment(size_t blocks_count, size_t block_size); void init_dsm_table(size_t block_size, size_t start, size_t end); -void alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t length); +void alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count); void free_dsm_array(DsmArray *arr); -void resize_dsm_array(DsmArray *arr, size_t entry_size, size_t length); -void *dsm_array_get_pointer(const DsmArray* arr); +void resize_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count); +void *dsm_array_get_pointer(const DsmArray *arr, bool copy); dsm_handle get_dsm_array_segment(void); void attach_dsm_array_segment(void); @@ -224,10 +225,10 @@ int append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte, int index, Oid childOID, List *wrappers); PartRelationInfo *get_pathman_relation_info(Oid relid, bool *found); RangeRelation *get_pathman_range_relation(Oid relid, bool *found); -search_rangerel_result search_range_partition_eq(Datum value, - const RangeRelation *rangerel, +search_rangerel_result search_range_partition_eq(const Datum value, FmgrInfo *cmp_func, - int *part_idx); + const RangeRelation *rangerel, + RangeEntry *out_rentry); char *get_extension_schema(void); Oid create_partitions_bg_worker(Oid relid, Datum value, Oid value_type, bool *crashed); Oid create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed); @@ -254,23 +255,41 @@ typedef struct typedef struct { - const PartRelationInfo *prel; + /* Main partitioning structure */ + PartRelationInfo *prel; + + /* Cached values */ + RangeEntry *ranges; /*cached RangeEntry array */ + size_t nranges; + ExprContext *econtext; + + /* Runtime values */ bool hasLeast, hasGreatest; Datum least, greatest; - - PlanState *pstate; - ExprContext *econtext; } WalkerContext; +#define InitWalkerContext(context, prel_info, ecxt) \ + do { \ + (context)->prel = (prel_info); \ + (context)->econtext = (ecxt); \ + (context)->ranges = NULL; \ + (context)->hasLeast = false; \ + (context)->hasGreatest = false; \ + } while (0) + void select_range_partitions(const Datum value, - const RangeRelation *rangerel, - const int strategy, + const bool byVal, FmgrInfo *cmp_func, + const RangeEntry *ranges, + const size_t nranges, + const int strategy, WrapperNode *result); WrapperNode *walk_expr_tree(Expr *expr, WalkerContext *context); void finish_least_greatest(WrapperNode *wrap, WalkerContext *context); +void refresh_walker_context_ranges(WalkerContext *context); +void clear_walker_context(WalkerContext *context); #endif /* PATHMAN_H */ diff --git a/src/pg_pathman.c b/src/pg_pathman.c index a030fb9b05..39389ce480 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -310,10 +310,7 @@ handle_modification_query(Query *parse) return; /* Parse syntax tree and extract partition ranges */ - context.prel = prel; - context.econtext = NULL; - context.hasLeast = false; - context.hasGreatest = false; + InitWalkerContext(&context, prel, NULL); wrap = walk_expr_tree(expr, &context); finish_least_greatest(wrap, &context); @@ -325,7 +322,7 @@ handle_modification_query(Query *parse) IndexRange irange = linitial_irange(ranges); if (irange.ir_lower == irange.ir_upper) { - Oid *children = (Oid *) dsm_array_get_pointer(&prel->children); + Oid *children = (Oid *) dsm_array_get_pointer(&prel->children, true); rte->relid = children[irange.ir_lower]; rte->inh = false; } @@ -713,6 +710,27 @@ change_varnos_in_restrinct_info(RestrictInfo *rinfo, change_varno_context *conte } } +void +refresh_walker_context_ranges(WalkerContext *context) +{ + RangeRelation *rangerel; + + rangerel = get_pathman_range_relation(context->prel->key.relid, NULL); + + context->ranges = dsm_array_get_pointer(&rangerel->ranges, true); + context->nranges = rangerel->ranges.elem_count; +} + +void +clear_walker_context(WalkerContext *context) +{ + if (context->ranges) + { + pfree(context->ranges); + context->ranges = NULL; + } +} + /* * Recursive function to walk through conditions tree */ @@ -729,25 +747,28 @@ walk_expr_tree(Expr *expr, WalkerContext *context) /* Useful for INSERT optimization */ case T_Const: return handle_const((Const *) expr, context); + /* AND, OR, NOT expressions */ case T_BoolExpr: boolexpr = (BoolExpr *) expr; return handle_boolexpr(boolexpr, context); + /* =, !=, <, > etc. */ case T_OpExpr: opexpr = (OpExpr *) expr; return handle_opexpr(opexpr, context); + /* IN expression */ case T_ScalarArrayOpExpr: arrexpr = (ScalarArrayOpExpr *) expr; return handle_arrexpr(arrexpr, context); + default: - result = (WrapperNode *)palloc(sizeof(WrapperNode)); - result->orig = (const Node *)expr; + result = (WrapperNode *) palloc(sizeof(WrapperNode)); + result->orig = (const Node *) expr; result->args = NIL; result->rangeset = list_make1_irange(make_irange(0, context->prel->children_count - 1, true)); result->paramsel = 1.0; - return result; } } @@ -822,34 +843,34 @@ decrease_hashable_value(const PartRelationInfo *prel, Datum value) void select_range_partitions(const Datum value, - const RangeRelation *rangerel, - const int strategy, + const bool byVal, FmgrInfo *cmp_func, + const RangeEntry *ranges, + const size_t nranges, + const int strategy, WrapperNode *result) { - RangeEntry *current_re; - bool lossy = false, - is_less, - is_greater; + const RangeEntry *current_re; + bool lossy = false, + is_less, + is_greater; #ifdef USE_ASSERT_CHECKING - bool found = false; - int counter = 0; + bool found = false; + int counter = 0; #endif - int i, - startidx = 0, - endidx = rangerel->ranges.length - 1, - cmp_min, - cmp_max; - - RangeEntry *ranges = dsm_array_get_pointer(&rangerel->ranges); - bool byVal = rangerel->by_val; + int i, + startidx = 0, + endidx = nranges - 1, + cmp_min, + cmp_max; + /* Initial value (no missing partitions found) */ result->found_gap = false; /* Check boundaries */ - if (rangerel->ranges.length == 0) + if (nranges == 0) { result->rangeset = NIL; return; @@ -901,7 +922,7 @@ select_range_partitions(const Datum value, Assert(cmp_func); i = startidx + (endidx - startidx) / 2; - Assert(i >= 0 && i < rangerel->ranges.length); + Assert(i >= 0 && i < nranges); current_re = &ranges[i]; @@ -973,18 +994,18 @@ select_range_partitions(const Datum value, if (lossy) { result->rangeset = list_make1_irange(make_irange(i, i, true)); - if (i < rangerel->ranges.length - 1) + if (i < nranges - 1) result->rangeset = lappend_irange(result->rangeset, make_irange(i + 1, - rangerel->ranges.length - 1, + nranges - 1, false)); } else { result->rangeset = list_make1_irange(make_irange(i, - rangerel->ranges.length - 1, + nranges - 1, false)); } break; @@ -1003,15 +1024,13 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, const Var *v, const Const *c) { HashRelationKey key; - RangeRelation *rangerel; - Datum value; int int_value, strategy; FmgrInfo cmp_func; Oid cmp_proc_oid; const OpExpr *expr = (const OpExpr *)result->orig; TypeCacheEntry *tce; - const PartRelationInfo *prel = context->prel; + PartRelationInfo *prel = context->prel; /* Determine operator type */ tce = lookup_type_cache(v->vartype, @@ -1064,11 +1083,18 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, } case PT_RANGE: - value = c->constvalue; - rangerel = get_pathman_range_relation(prel->key.relid, NULL); - if (rangerel) + if (get_pathman_range_relation(context->prel->key.relid, NULL)) { - select_range_partitions(value, rangerel, strategy, &cmp_func, result); + if (!context->ranges) + refresh_walker_context_ranges(context); + + select_range_partitions(c->constvalue, + c->constbyval, + &cmp_func, + context->ranges, + context->nranges, + strategy, + result); return; } } @@ -1119,26 +1145,35 @@ make_hash(const PartRelationInfo *prel, int value) search_rangerel_result search_range_partition_eq(const Datum value, - const RangeRelation *rangerel, FmgrInfo *cmp_func, - int *part_idx) + const RangeRelation *rangerel, + RangeEntry *out_rentry) /* actual result */ { + RangeEntry *ranges; + size_t nranges; WrapperNode result; Assert(rangerel); - select_range_partitions(value, rangerel, + ranges = dsm_array_get_pointer(&rangerel->ranges, true); + nranges = rangerel->ranges.elem_count; + + select_range_partitions(value, + rangerel->by_val, + cmp_func, + ranges, + nranges, BTEqualStrategyNumber, - cmp_func, &result); + &result); if (result.found_gap) { - *part_idx = -1; + pfree(ranges); return SEARCH_RANGEREL_GAP; } else if (result.rangeset == NIL) { - *part_idx = -1; + pfree(ranges); return SEARCH_RANGEREL_OUT_OF_RANGE; } else @@ -1149,7 +1184,13 @@ search_range_partition_eq(const Datum value, Assert(irange.ir_lower == irange.ir_upper); Assert(irange.ir_valid); - *part_idx = irange.ir_lower; + /* Write result to the 'out_rentry' if necessary */ + if (out_rentry) + memcpy((void *) out_rentry, + (void *) &ranges[irange.ir_lower], + sizeof(RangeEntry)); + + pfree(ranges); return SEARCH_RANGEREL_FOUND; } } @@ -1188,7 +1229,6 @@ handle_const(const Const *c, WalkerContext *context) { Oid cmp_proc_oid; FmgrInfo cmp_func; - RangeRelation *rangerel; TypeCacheEntry *tce; tce = lookup_type_cache(c->consttype, 0); @@ -1197,11 +1237,17 @@ handle_const(const Const *c, WalkerContext *context) c->consttype, BTORDER_PROC); fmgr_info(cmp_proc_oid, &cmp_func); - rangerel = get_pathman_range_relation(prel->key.relid, NULL); - select_range_partitions(c->constvalue, rangerel, - BTEqualStrategyNumber, - &cmp_func, result); + if (!context->ranges) + refresh_walker_context_ranges(context); + + select_range_partitions(c->constvalue, + c->constbyval, + &cmp_func, + context->ranges, + context->nranges, + BTEqualStrategyNumber, + result); } break; diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 9bffae1aef..dc00dac3f3 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -88,17 +88,16 @@ on_partitions_removed(PG_FUNCTION_ARGS) Datum find_or_create_range_partition(PG_FUNCTION_ARGS) { - int relid = DatumGetInt32(PG_GETARG_DATUM(0)); + Oid relid = DatumGetObjectId(PG_GETARG_DATUM(0)); Datum value = PG_GETARG_DATUM(1); Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); - int pos; RangeRelation *rangerel; - RangeEntry *ranges; TypeCacheEntry *tce; PartRelationInfo *prel; Oid cmp_proc_oid; FmgrInfo cmp_func; search_rangerel_result search_state; + RangeEntry found_re; tce = lookup_type_cache(value_type, TYPECACHE_EQ_OPR | TYPECACHE_LT_OPR | TYPECACHE_GT_OPR | @@ -116,15 +115,14 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) BTORDER_PROC); fmgr_info(cmp_proc_oid, &cmp_func); - ranges = dsm_array_get_pointer(&rangerel->ranges); - search_state = search_range_partition_eq(value, rangerel, - &cmp_func, &pos); + search_state = search_range_partition_eq(value, &cmp_func, + rangerel, &found_re); /* * If found then just return oid, else create new partitions */ if (search_state == SEARCH_RANGEREL_FOUND) - PG_RETURN_OID(ranges[pos].child_oid); + PG_RETURN_OID(found_re.child_oid); /* * If not found and value is between first and last partitions */ @@ -144,14 +142,13 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) /* * Check if someone else has already created partition. */ - ranges = dsm_array_get_pointer(&rangerel->ranges); - search_state = search_range_partition_eq(value, rangerel, - &cmp_func, &pos); + search_state = search_range_partition_eq(value, &cmp_func, + rangerel, &found_re); if (search_state == SEARCH_RANGEREL_FOUND) { LWLockRelease(pmstate->edit_partitions_lock); LWLockRelease(pmstate->load_config_lock); - PG_RETURN_OID(ranges[pos].child_oid); + PG_RETURN_OID(found_re.child_oid); } else Assert(search_state != SEARCH_RANGEREL_GAP); @@ -167,10 +164,8 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) } /* Repeat binary search */ - Assert(SEARCH_RANGEREL_FOUND == search_range_partition_eq(value, - rangerel, - &cmp_func, - &pos)); + Assert(SEARCH_RANGEREL_FOUND == search_range_partition_eq(value, &cmp_func, + rangerel, NULL)); PG_RETURN_OID(child_oid); } } @@ -185,11 +180,11 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) Datum get_partition_range(PG_FUNCTION_ARGS) { - int parent_oid = DatumGetInt32(PG_GETARG_DATUM(0)); - int child_oid = DatumGetInt32(PG_GETARG_DATUM(1)); - int nelems = 2; - int i; - bool found = false; + Oid parent_oid = DatumGetObjectId(PG_GETARG_DATUM(0)); + Oid child_oid = DatumGetObjectId(PG_GETARG_DATUM(1)); + int nelems = 2; + int i; + bool found = false; Datum *elems; PartRelationInfo *prel; RangeRelation *rangerel; @@ -204,11 +199,11 @@ get_partition_range(PG_FUNCTION_ARGS) if (!prel || !rangerel) PG_RETURN_NULL(); - ranges = dsm_array_get_pointer(&rangerel->ranges); + ranges = dsm_array_get_pointer(&rangerel->ranges, true); tce = lookup_type_cache(prel->atttype, 0); /* Looking for specified partition */ - for(i=0; iranges.length; i++) + for (i = 0; i < rangerel->ranges.elem_count; i++) if (ranges[i].child_oid == child_oid) { found = true; @@ -242,32 +237,34 @@ get_partition_range(PG_FUNCTION_ARGS) Datum get_range_by_idx(PG_FUNCTION_ARGS) { - int parent_oid = DatumGetInt32(PG_GETARG_DATUM(0)); - int idx = DatumGetInt32(PG_GETARG_DATUM(1)); - PartRelationInfo *prel; - RangeRelation *rangerel; - RangeEntry *ranges; - RangeEntry *re; - Datum *elems; - TypeCacheEntry *tce; + Oid parent_oid = DatumGetObjectId(PG_GETARG_DATUM(0)); + int idx = DatumGetInt32(PG_GETARG_DATUM(1)); + PartRelationInfo *prel; + RangeRelation *rangerel; + RangeEntry *ranges; + RangeEntry re; + Datum *elems; + TypeCacheEntry *tce; prel = get_pathman_relation_info(parent_oid, NULL); rangerel = get_pathman_range_relation(parent_oid, NULL); - if (!prel || !rangerel || idx >= (int)rangerel->ranges.length) + if (!prel || !rangerel || idx >= (int)rangerel->ranges.elem_count) PG_RETURN_NULL(); tce = lookup_type_cache(prel->atttype, 0); - ranges = dsm_array_get_pointer(&rangerel->ranges); + ranges = dsm_array_get_pointer(&rangerel->ranges, true); if (idx >= 0) - re = &ranges[idx]; + re = ranges[idx]; else - re = &ranges[rangerel->ranges.length - 1]; + re = ranges[rangerel->ranges.elem_count - 1]; elems = palloc(2 * sizeof(Datum)); - elems[0] = PATHMAN_GET_DATUM(re->min, rangerel->by_val); - elems[1] = PATHMAN_GET_DATUM(re->max, rangerel->by_val); + elems[0] = PATHMAN_GET_DATUM(re.min, rangerel->by_val); + elems[1] = PATHMAN_GET_DATUM(re.max, rangerel->by_val); + + pfree(ranges); PG_RETURN_ARRAYTYPE_P( construct_array(elems, 2, prel->atttype, @@ -280,18 +277,19 @@ get_range_by_idx(PG_FUNCTION_ARGS) Datum get_min_range_value(PG_FUNCTION_ARGS) { - int parent_oid = DatumGetInt32(PG_GETARG_DATUM(0)); - PartRelationInfo *prel; - RangeRelation *rangerel; - RangeEntry *ranges; + Oid parent_oid = DatumGetObjectId(PG_GETARG_DATUM(0)); + PartRelationInfo *prel; + RangeRelation *rangerel; + RangeEntry *ranges; prel = get_pathman_relation_info(parent_oid, NULL); rangerel = get_pathman_range_relation(parent_oid, NULL); - if (!prel || !rangerel || prel->parttype != PT_RANGE || rangerel->ranges.length == 0) + if (!prel || !rangerel || prel->parttype != PT_RANGE || rangerel->ranges.elem_count == 0) PG_RETURN_NULL(); - ranges = dsm_array_get_pointer(&rangerel->ranges); + ranges = dsm_array_get_pointer(&rangerel->ranges, true); + PG_RETURN_DATUM(PATHMAN_GET_DATUM(ranges[0].min, rangerel->by_val)); } @@ -301,19 +299,20 @@ get_min_range_value(PG_FUNCTION_ARGS) Datum get_max_range_value(PG_FUNCTION_ARGS) { - int parent_oid = DatumGetInt32(PG_GETARG_DATUM(0)); - PartRelationInfo *prel; - RangeRelation *rangerel; - RangeEntry *ranges; + Oid parent_oid = DatumGetObjectId(PG_GETARG_DATUM(0)); + PartRelationInfo *prel; + RangeRelation *rangerel; + RangeEntry *ranges; prel = get_pathman_relation_info(parent_oid, NULL); rangerel = get_pathman_range_relation(parent_oid, NULL); - if (!prel || !rangerel || prel->parttype != PT_RANGE || rangerel->ranges.length == 0) + if (!prel || !rangerel || prel->parttype != PT_RANGE || rangerel->ranges.elem_count == 0) PG_RETURN_NULL(); - ranges = dsm_array_get_pointer(&rangerel->ranges); - PG_RETURN_DATUM(PATHMAN_GET_DATUM(ranges[rangerel->ranges.length-1].max, rangerel->by_val)); + ranges = dsm_array_get_pointer(&rangerel->ranges, true); + + PG_RETURN_DATUM(PATHMAN_GET_DATUM(ranges[rangerel->ranges.elem_count - 1].max, rangerel->by_val)); } /* @@ -323,14 +322,14 @@ get_max_range_value(PG_FUNCTION_ARGS) Datum check_overlap(PG_FUNCTION_ARGS) { - int parent_oid = DatumGetInt32(PG_GETARG_DATUM(0)); - Datum p1 = PG_GETARG_DATUM(1); - Oid p1_type = get_fn_expr_argtype(fcinfo->flinfo, 1); - Datum p2 = PG_GETARG_DATUM(2); - Oid p2_type = get_fn_expr_argtype(fcinfo->flinfo, 2); - PartRelationInfo *prel; - RangeRelation *rangerel; - RangeEntry *ranges; + Oid parent_oid = DatumGetObjectId(PG_GETARG_DATUM(0)); + Datum p1 = PG_GETARG_DATUM(1); + Oid p1_type = get_fn_expr_argtype(fcinfo->flinfo, 1); + Datum p2 = PG_GETARG_DATUM(2); + Oid p2_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + PartRelationInfo *prel; + RangeRelation *rangerel; + RangeEntry *ranges; FmgrInfo cmp_func_1; FmgrInfo cmp_func_2; int i; @@ -347,18 +346,22 @@ check_overlap(PG_FUNCTION_ARGS) fill_type_cmp_fmgr_info(&cmp_func_2, p2_type, prel->atttype); byVal = rangerel->by_val; - ranges = (RangeEntry *) dsm_array_get_pointer(&rangerel->ranges); - for (i=0; iranges.length; i++) + ranges = (RangeEntry *) dsm_array_get_pointer(&rangerel->ranges, true); + for (i = 0; i < rangerel->ranges.elem_count; i++) { int c1 = FunctionCall2(&cmp_func_1, p1, - PATHMAN_GET_DATUM(ranges[i].max, byVal)); + PATHMAN_GET_DATUM(ranges[i].max, byVal)); int c2 = FunctionCall2(&cmp_func_2, p2, - PATHMAN_GET_DATUM(ranges[i].min, byVal)); + PATHMAN_GET_DATUM(ranges[i].min, byVal)); if (c1 < 0 && c2 > 0) + { + pfree(ranges); PG_RETURN_BOOL(true); + } } + pfree(ranges); PG_RETURN_BOOL(false); } diff --git a/src/runtimeappend.h b/src/runtimeappend.h index db2e4aca60..a1f4a03f42 100644 --- a/src/runtimeappend.h +++ b/src/runtimeappend.h @@ -51,6 +51,10 @@ typedef struct /* Last saved tuple (for SRF projections) */ TupleTableSlot *slot; + + /* Cached walker context */ + WalkerContext wcxt; + bool wcxt_cached; } RuntimeAppendState; diff --git a/src/worker.c b/src/worker.c index 795ee28b68..c478a0ebe2 100644 --- a/src/worker.c +++ b/src/worker.c @@ -164,17 +164,16 @@ Oid create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) { int ret; - RangeEntry *ranges; Datum vals[2]; Oid oids[] = {OIDOID, value_type}; bool nulls[] = {false, false}; char *sql; - int pos; PartRelationInfo *prel; RangeRelation *rangerel; FmgrInfo cmp_func; char *schema; search_rangerel_result search_state; + RangeEntry found_re; *crashed = false; schema = get_extension_schema(); @@ -211,12 +210,10 @@ create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) } PG_END_TRY(); - /* Repeat binary search */ - ranges = dsm_array_get_pointer(&rangerel->ranges); - search_state = search_range_partition_eq(value, rangerel, - &cmp_func, &pos); + search_state = search_range_partition_eq(value, &cmp_func, + rangerel, &found_re); if (search_state == SEARCH_RANGEREL_FOUND) - return ranges[pos].child_oid; + return found_re.child_oid; return 0; } From c79b3f7e85ce7a840a91ef663cb01a224f6f9122 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 2 Jun 2016 16:13:29 +0300 Subject: [PATCH 013/184] introduce InitWalkerContextCustomNode macro, new comments --- src/nodes_common.c | 18 +++--------------- src/partition_filter.c | 20 ++++---------------- src/partition_filter.h | 5 +++-- src/pathman.h | 30 +++++++++++++++++++++++++----- src/pg_pathman.c | 28 +++++++++++++++++++--------- src/pl_funcs.c | 1 - src/utils.c | 18 ++++++++++++++++++ 7 files changed, 72 insertions(+), 48 deletions(-) diff --git a/src/nodes_common.c b/src/nodes_common.c index a7e9fbe6f9..7750b5f225 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -481,6 +481,7 @@ end_append_common(CustomScanState *node) { RuntimeAppendState *scan_state = (RuntimeAppendState *) node; + clear_walker_context(&scan_state->wcxt); clear_plan_states(&scan_state->css); hash_destroy(scan_state->children_table); } @@ -498,21 +499,8 @@ rescan_append_common(CustomScanState *node) ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); - /* - * We'd like to persist RangeEntry array - * in case of range partitioning, so 'wcxt' - * is stored inside of RuntimeAppendState - */ - if (!scan_state->wcxt_cached) - { - scan_state->wcxt.prel = prel; - scan_state->wcxt.econtext = econtext; - scan_state->wcxt.ranges = NULL; - - scan_state->wcxt_cached = true; - } - scan_state->wcxt.hasLeast = false; /* refresh runtime values */ - scan_state->wcxt.hasGreatest = false; + InitWalkerContextCustomNode(&scan_state->wcxt, scan_state->prel, + econtext, &scan_state->wcxt_cached); foreach (lc, scan_state->custom_exprs) { diff --git a/src/partition_filter.c b/src/partition_filter.c index 2f6811ce9b..f9ab002343 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -159,21 +159,8 @@ partition_filter_exec(CustomScanState *node) CopyToTempConst(constlen, attlen); CopyToTempConst(constbyval, attbyval); - /* - * We'd like to persist RangeEntry array - * in case of range partitioning, so 'wcxt' - * is stored inside of PartitionFilterState - */ - if (!state->wcxt_cached) - { - state->wcxt.prel = state->prel; - state->wcxt.econtext = econtext; - state->wcxt.ranges = NULL; - - state->wcxt_cached = true; - } - state->wcxt.hasLeast = false; /* refresh runtime values */ - state->wcxt.hasGreatest = false; + InitWalkerContextCustomNode(&state->wcxt, state->prel, + econtext, &state->wcxt_cached); ranges = walk_expr_tree((Expr *) &state->temp_const, &state->wcxt)->rangeset; parts = get_partition_oids(ranges, &nparts, state->prel); @@ -215,11 +202,12 @@ partition_filter_end(CustomScanState *node) heap_close(rri_handle->resultRelInfo->ri_RelationDesc, RowExclusiveLock); } - hash_destroy(state->result_rels_table); Assert(list_length(node->custom_ps) == 1); ExecEndNode((PlanState *) linitial(node->custom_ps)); + + clear_walker_context(&state->wcxt); } void diff --git a/src/partition_filter.h b/src/partition_filter.h index 14480b48e6..71b3a89f0e 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -23,13 +23,14 @@ typedef struct ResultRelInfo *savedRelInfo; Plan *subplan; - Const temp_const; /* temporary const for expr walker */ + Const temp_const; /* temporary const for expr walker */ HTAB *result_rels_table; HASHCTL result_rels_table_config; WalkerContext wcxt; - bool wcxt_cached; + bool wcxt_cached; /* does wcxt contain cached data, + e.g. RangeEntry array? */ } PartitionFilterState; diff --git a/src/pathman.h b/src/pathman.h index 9616934f36..6c72984be8 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -78,7 +78,6 @@ typedef struct PartRelationInfo PartType parttype; Index attnum; Oid atttype; - } PartRelationInfo; /* @@ -256,12 +255,12 @@ typedef struct typedef struct { /* Main partitioning structure */ - PartRelationInfo *prel; + const PartRelationInfo *prel; /* Cached values */ - RangeEntry *ranges; /*cached RangeEntry array */ - size_t nranges; - ExprContext *econtext; + const RangeEntry *ranges; /* cached RangeEntry array (copy) */ + size_t nranges; /* number of RangeEntries */ + ExprContext *econtext; /* for ExecEvalExpr() */ /* Runtime values */ bool hasLeast, @@ -270,6 +269,9 @@ typedef struct greatest; } WalkerContext; +/* + * Usual initialization procedure for WalkerContext + */ #define InitWalkerContext(context, prel_info, ecxt) \ do { \ (context)->prel = (prel_info); \ @@ -279,6 +281,24 @@ typedef struct (context)->hasGreatest = false; \ } while (0) +/* + * We'd like to persist RangeEntry (ranges) array + * in case of range partitioning, so 'wcxt' is stored + * inside of Custom Node + */ +#define InitWalkerContextCustomNode(context, prel_info, ecxt, isCached) \ + do { \ + if (!*isCached) \ + { \ + (context)->prel = prel_info; \ + (context)->econtext = ecxt; \ + (context)->ranges = NULL; \ + *isCached = true; \ + } \ + (context)->hasLeast = false; \ + (context)->hasGreatest = false; \ + } while (0) + void select_range_partitions(const Datum value, const bool byVal, FmgrInfo *cmp_func, diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 39389ce480..bcb66d36f8 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -313,6 +313,7 @@ handle_modification_query(Query *parse) InitWalkerContext(&context, prel, NULL); wrap = walk_expr_tree(expr, &context); finish_least_greatest(wrap, &context); + clear_walker_context(&context); ranges = irange_list_intersect(ranges, wrap->rangeset); @@ -710,6 +711,12 @@ change_varnos_in_restrinct_info(RestrictInfo *rinfo, change_varno_context *conte } } +/* + * Refresh cached RangeEntry array within WalkerContext + * + * This is essential when we add new partitions + * while executing INSERT query on partitioned table. + */ void refresh_walker_context_ranges(WalkerContext *context) { @@ -721,12 +728,15 @@ refresh_walker_context_ranges(WalkerContext *context) context->nranges = rangerel->ranges.elem_count; } +/* + * Free all temporary data cached by WalkerContext + */ void clear_walker_context(WalkerContext *context) { if (context->ranges) { - pfree(context->ranges); + pfree((void *) context->ranges); context->ranges = NULL; } } @@ -1023,14 +1033,14 @@ static void handle_binary_opexpr(WalkerContext *context, WrapperNode *result, const Var *v, const Const *c) { - HashRelationKey key; - int int_value, - strategy; - FmgrInfo cmp_func; - Oid cmp_proc_oid; - const OpExpr *expr = (const OpExpr *)result->orig; - TypeCacheEntry *tce; - PartRelationInfo *prel = context->prel; + HashRelationKey key; + int int_value, + strategy; + TypeCacheEntry *tce; + FmgrInfo cmp_func; + Oid cmp_proc_oid; + const OpExpr *expr = (const OpExpr *) result->orig; + const PartRelationInfo *prel = context->prel; /* Determine operator type */ tce = lookup_type_cache(v->vartype, diff --git a/src/pl_funcs.c b/src/pl_funcs.c index dc00dac3f3..cc5bcac0f8 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -381,4 +381,3 @@ release_partitions_lock(PG_FUNCTION_ARGS) LWLockRelease(pmstate->edit_partitions_lock); PG_RETURN_NULL(); } - diff --git a/src/utils.c b/src/utils.c index c8a208925a..ee9a74a219 100644 --- a/src/utils.c +++ b/src/utils.c @@ -21,6 +21,10 @@ static bool clause_contains_params_walker(Node *node, void *context); + +/* + * Check whether clause contains PARAMs or not + */ bool clause_contains_params(Node *clause) { @@ -59,6 +63,11 @@ bms_print(Bitmapset *bms) return str.data; } +/* + * Copied from util/plancat.c + * + * Build a targetlist representing the columns of the specified index. + */ List * build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation) @@ -143,6 +152,12 @@ check_rinfo_for_partitioned_attr(List *rinfo, Index varno, AttrNumber varattno) return false; } +/* + * Append trigger info contained in 'more' to 'src'. + * + * This allows us to execute some of main table's triggers on children. + * See ExecInsert() for more details. + */ TriggerDesc * append_trigger_descs(TriggerDesc *src, TriggerDesc *more, bool *grown_up) { @@ -234,6 +249,9 @@ add_missing_partition(Oid partitioned_table, Const *value) return result; } +/* + * Get BTORDER_PROC for two types described by Oids + */ void fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, Oid type2) { From 421b2eda92e2a8d3d15998df8bc40cbfa9141bc2 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 2 Jun 2016 19:21:31 +0300 Subject: [PATCH 014/184] improve lookup_type_cache() usage --- range.sql | 6 +++--- src/init.c | 2 +- src/pg_pathman.c | 23 ++++------------------- src/pl_funcs.c | 12 +----------- src/utils.c | 5 +---- 5 files changed, 10 insertions(+), 38 deletions(-) diff --git a/range.sql b/range.sql index d6527b10a9..8a8fe6f704 100644 --- a/range.sql +++ b/range.sql @@ -1134,9 +1134,9 @@ BEGIN SELECT * INTO schema, relname FROM @extschema@.get_plain_schema_and_relname(relation); - EXECUTE format('DROP TRIGGER IF EXISTS %s ON %s CASCADE' - , format('"%s_%s_insert_trigger"', schema, relname) - , relation::TEXT); + --EXECUTE format('DROP TRIGGER IF EXISTS %s ON %s CASCADE' + -- , format('"%s_%s_insert_trigger"', schema, relname) + -- , relation::TEXT); END $$ LANGUAGE plpgsql; diff --git a/src/init.c b/src/init.c index 81aaf4d6f5..861c0bee17 100644 --- a/src/init.c +++ b/src/init.c @@ -395,7 +395,7 @@ load_check_constraints(Oid parent_oid, Snapshot snapshot) bool byVal = rangerel->by_val; /* Sort ascending */ - tce = lookup_type_cache(prel->atttype, TYPECACHE_CMP_PROC | TYPECACHE_CMP_PROC_FINFO); + tce = lookup_type_cache(prel->atttype, TYPECACHE_CMP_PROC_FINFO); qsort_type_cmp_func = &tce->cmp_proc_finfo; globalByVal = byVal; qsort(ranges, proc, sizeof(RangeEntry), cmp_range_entries); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index bcb66d36f8..536a2ea40c 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -1038,20 +1038,12 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, strategy; TypeCacheEntry *tce; FmgrInfo cmp_func; - Oid cmp_proc_oid; const OpExpr *expr = (const OpExpr *) result->orig; const PartRelationInfo *prel = context->prel; - /* Determine operator type */ - tce = lookup_type_cache(v->vartype, - TYPECACHE_BTREE_OPFAMILY | TYPECACHE_CMP_PROC | TYPECACHE_CMP_PROC_FINFO); - + tce = lookup_type_cache(v->vartype, TYPECACHE_BTREE_OPFAMILY); strategy = get_op_opfamily_strategy(expr->opno, tce->btree_opf); - cmp_proc_oid = get_opfamily_proc(tce->btree_opf, - c->consttype, - prel->atttype, - BTORDER_PROC); - fmgr_info(cmp_proc_oid, &cmp_func); + fill_type_cmp_fmgr_info(&cmp_func, c->consttype, prel->atttype); switch (prel->parttype) { @@ -1237,23 +1229,16 @@ handle_const(const Const *c, WalkerContext *context) case PT_RANGE: { - Oid cmp_proc_oid; - FmgrInfo cmp_func; TypeCacheEntry *tce; - tce = lookup_type_cache(c->consttype, 0); - cmp_proc_oid = get_opfamily_proc(tce->btree_opf, - c->consttype, - c->consttype, - BTORDER_PROC); - fmgr_info(cmp_proc_oid, &cmp_func); + tce = lookup_type_cache(c->consttype, TYPECACHE_CMP_PROC_FINFO); if (!context->ranges) refresh_walker_context_ranges(context); select_range_partitions(c->constvalue, c->constbyval, - &cmp_func, + &tce->cmp_proc_finfo, context->ranges, context->nranges, BTEqualStrategyNumber, diff --git a/src/pl_funcs.c b/src/pl_funcs.c index cc5bcac0f8..1f4b806063 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -92,28 +92,18 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) Datum value = PG_GETARG_DATUM(1); Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); RangeRelation *rangerel; - TypeCacheEntry *tce; PartRelationInfo *prel; - Oid cmp_proc_oid; FmgrInfo cmp_func; search_rangerel_result search_state; RangeEntry found_re; - tce = lookup_type_cache(value_type, - TYPECACHE_EQ_OPR | TYPECACHE_LT_OPR | TYPECACHE_GT_OPR | - TYPECACHE_CMP_PROC | TYPECACHE_CMP_PROC_FINFO); - prel = get_pathman_relation_info(relid, NULL); rangerel = get_pathman_range_relation(relid, NULL); if (!prel || !rangerel) PG_RETURN_NULL(); - cmp_proc_oid = get_opfamily_proc(tce->btree_opf, - value_type, - prel->atttype, - BTORDER_PROC); - fmgr_info(cmp_proc_oid, &cmp_func); + fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); search_state = search_range_partition_eq(value, &cmp_func, rangerel, &found_re); diff --git a/src/utils.c b/src/utils.c index ee9a74a219..2034b8d63e 100644 --- a/src/utils.c +++ b/src/utils.c @@ -258,10 +258,7 @@ fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, Oid type2) Oid cmp_proc_oid; TypeCacheEntry *tce; - tce = lookup_type_cache(type1, - TYPECACHE_BTREE_OPFAMILY | - TYPECACHE_CMP_PROC | - TYPECACHE_CMP_PROC_FINFO); + tce = lookup_type_cache(type1, TYPECACHE_BTREE_OPFAMILY); cmp_proc_oid = get_opfamily_proc(tce->btree_opf, type1, From 97c31cd225a44cf4f4447a1e798ee3994f0ad874 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 2 Jun 2016 19:54:59 +0300 Subject: [PATCH 015/184] introduce drop_range_triggers() function --- hash.sql | 23 +---------------------- init.sql | 23 +++++++++++++++++++++-- range.sql | 23 +---------------------- 3 files changed, 23 insertions(+), 46 deletions(-) diff --git a/hash.sql b/hash.sql index fcddc24e1c..6039a16525 100644 --- a/hash.sql +++ b/hash.sql @@ -83,7 +83,7 @@ BEGIN v_relname := @extschema@.validate_relname(relation); /* Drop trigger first */ - PERFORM @extschema@.drop_hash_triggers(relation); + PERFORM @extschema@.drop_triggers(relation); DELETE FROM @extschema@.pathman_config WHERE relname::regclass = relation; FOR v_rec in (SELECT inhrelid::regclass::text AS tbl @@ -108,27 +108,6 @@ BEGIN END $$ LANGUAGE plpgsql; -/* - * Drops hash trigger - */ -CREATE OR REPLACE FUNCTION @extschema@.drop_hash_triggers(IN relation REGCLASS) -RETURNS VOID AS -$$ -DECLARE - relname TEXT; - schema TEXT; - funcname TEXT; -BEGIN - SELECT * INTO schema, relname - FROM @extschema@.get_plain_schema_and_relname(relation); - - funcname := schema || '.' || quote_ident(format('%s_insert_trigger_func', relname)); - EXECUTE format('DROP FUNCTION IF EXISTS %s() CASCADE', funcname); - funcname := schema || '.' || quote_ident(format('%s_update_trigger_func', relname)); - EXECUTE format('DROP FUNCTION IF EXISTS %s() CASCADE', funcname); -END -$$ LANGUAGE plpgsql; - /* * Creates an update trigger */ diff --git a/init.sql b/init.sql index 56c591b6e0..4d8984b8e8 100644 --- a/init.sql +++ b/init.sql @@ -127,9 +127,9 @@ BEGIN DELETE FROM @extschema@.pathman_config WHERE relname = relation; IF parttype = 1 THEN - PERFORM @extschema@.drop_hash_triggers(relation); + PERFORM @extschema@.drop_triggers(relation); ELSIF parttype = 2 THEN - PERFORM @extschema@.drop_range_triggers(relation); + PERFORM @extschema@.drop_triggers(relation); END IF; /* Notify backend about changes */ @@ -340,3 +340,22 @@ RETURNS VOID AS 'pg_pathman', 'acquire_partitions_lock' LANGUAGE C STRICT; */ CREATE OR REPLACE FUNCTION @extschema@.release_partitions_lock() RETURNS VOID AS 'pg_pathman', 'release_partitions_lock' LANGUAGE C STRICT; + +/* + * Drop trigger + */ +CREATE OR REPLACE FUNCTION @extschema@.drop_triggers(IN relation REGCLASS) +RETURNS VOID AS +$$ +DECLARE + relname TEXT; + schema TEXT; + funcname TEXT; +BEGIN + SELECT * INTO schema, relname + FROM @extschema@.get_plain_schema_and_relname(relation); + + funcname := schema || '.' || quote_ident(format('%s_update_trigger_func', relname)); + EXECUTE format('DROP FUNCTION IF EXISTS %s() CASCADE', funcname); +END +$$ LANGUAGE plpgsql; diff --git a/range.sql b/range.sql index 8a8fe6f704..7d6a3ff3a5 100644 --- a/range.sql +++ b/range.sql @@ -1094,7 +1094,7 @@ BEGIN v_relname := @extschema@.validate_relname(relation); /* Drop trigger first */ - PERFORM @extschema@.drop_range_triggers(relation); + PERFORM @extschema@.drop_triggers(relation); FOR v_rec IN (SELECT inhrelid::regclass::text AS tbl FROM pg_inherits WHERE inhparent::regclass = relation) @@ -1120,27 +1120,6 @@ BEGIN END $$ LANGUAGE plpgsql; - -/* - * Drop trigger - */ -CREATE OR REPLACE FUNCTION @extschema@.drop_range_triggers(IN relation REGCLASS) -RETURNS VOID AS -$$ -DECLARE - schema TEXT; - relname TEXT; -BEGIN - SELECT * INTO schema, relname - FROM @extschema@.get_plain_schema_and_relname(relation); - - --EXECUTE format('DROP TRIGGER IF EXISTS %s ON %s CASCADE' - -- , format('"%s_%s_insert_trigger"', schema, relname) - -- , relation::TEXT); -END -$$ LANGUAGE plpgsql; - - /* * Internal function used to create new partitions on insert or update trigger. * Invoked from C-function find_or_create_range_partition(). From 32d7514e63d07108d24b56592aac77f0065e9558 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 3 Jun 2016 16:02:49 +0300 Subject: [PATCH 016/184] improve create_partitions_bg_worker(), introduce drop_triggers() --- init.sql | 9 +-- range.sql | 2 +- src/init.c | 4 +- src/partition_filter.c | 5 +- src/pathman.h | 2 +- src/pl_funcs.c | 23 +------ src/utils.c | 24 +------ src/utils.h | 2 - src/worker.c | 139 +++++++++++++++++++++++++---------------- 9 files changed, 95 insertions(+), 115 deletions(-) diff --git a/init.sql b/init.sql index 4d8984b8e8..f7aa514c4e 100644 --- a/init.sql +++ b/init.sql @@ -119,18 +119,11 @@ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION @extschema@.disable_partitioning(IN relation TEXT) RETURNS VOID AS $$ -DECLARE - parttype INTEGER; BEGIN relation := @extschema@.validate_relname(relation); - parttype := parttype FROM pathman_config WHERE relname = relation; DELETE FROM @extschema@.pathman_config WHERE relname = relation; - IF parttype = 1 THEN - PERFORM @extschema@.drop_triggers(relation); - ELSIF parttype = 2 THEN - PERFORM @extschema@.drop_triggers(relation); - END IF; + PERFORM @extschema@.drop_triggers(relation); /* Notify backend about changes */ PERFORM on_remove_partitions(relation::regclass::integer); diff --git a/range.sql b/range.sql index 7d6a3ff3a5..4523802d62 100644 --- a/range.sql +++ b/range.sql @@ -1194,7 +1194,7 @@ BEGIN RAISE NOTICE 'partition % created', v_part; END LOOP; ELSE - RAISE NOTICE 'Not implemented yet'; + RAISE EXCEPTION 'Could not create partition'; END IF; IF i > 0 THEN diff --git a/src/init.c b/src/init.c index 861c0bee17..5a390dc4b8 100644 --- a/src/init.c +++ b/src/init.c @@ -198,11 +198,11 @@ load_relations_hashtable(bool reinitialize) TupleDesc tupdesc = SPI_tuptable->tupdesc; SPITupleTable *tuptable = SPI_tuptable; - for (i=0; ivals[i]; - int oid = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 1, &isnull)); + Oid oid = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 1, &isnull)); key.dbid = MyDatabaseId; key.relid = oid; diff --git a/src/partition_filter.c b/src/partition_filter.c index f9ab002343..af51a58b7a 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -169,8 +169,9 @@ partition_filter_exec(CustomScanState *node) elog(ERROR, "PartitionFilter selected more than one partition"); else if (nparts == 0) { - selected_partid = add_missing_partition(state->partitioned_table, - &state->temp_const); + selected_partid = create_partitions_bg_worker(state->partitioned_table, + state->temp_const.constvalue, + state->temp_const.consttype); refresh_walker_context_ranges(&state->wcxt); } diff --git a/src/pathman.h b/src/pathman.h index 6c72984be8..4bb03efd74 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -229,7 +229,7 @@ search_rangerel_result search_range_partition_eq(const Datum value, const RangeRelation *rangerel, RangeEntry *out_rentry); char *get_extension_schema(void); -Oid create_partitions_bg_worker(Oid relid, Datum value, Oid value_type, bool *crashed); +Oid create_partitions_bg_worker(Oid relid, Datum value, Oid value_type); Oid create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed); void handle_modification_query(Query *parse); diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 1f4b806063..094ae89c3d 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -121,13 +121,6 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) else { Oid child_oid; - bool crashed = false; - - /* Lock config before appending new partitions */ - LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); - - /* Restrict concurrent partition creation */ - LWLockAcquire(pmstate->edit_partitions_lock, LW_EXCLUSIVE); /* * Check if someone else has already created partition. @@ -136,26 +129,12 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) rangerel, &found_re); if (search_state == SEARCH_RANGEREL_FOUND) { - LWLockRelease(pmstate->edit_partitions_lock); - LWLockRelease(pmstate->load_config_lock); PG_RETURN_OID(found_re.child_oid); } - else - Assert(search_state != SEARCH_RANGEREL_GAP); /* Start background worker to create new partitions */ - child_oid = create_partitions_bg_worker(relid, value, value_type, &crashed); - - /* Release locks */ - if (!crashed) - { - LWLockRelease(pmstate->edit_partitions_lock); - LWLockRelease(pmstate->load_config_lock); - } + child_oid = create_partitions_bg_worker(relid, value, value_type); - /* Repeat binary search */ - Assert(SEARCH_RANGEREL_FOUND == search_range_partition_eq(value, &cmp_func, - rangerel, NULL)); PG_RETURN_OID(child_oid); } } diff --git a/src/utils.c b/src/utils.c index 2034b8d63e..fd41b7a640 100644 --- a/src/utils.c +++ b/src/utils.c @@ -153,7 +153,7 @@ check_rinfo_for_partitioned_attr(List *rinfo, Index varno, AttrNumber varattno) } /* - * Append trigger info contained in 'more' to 'src'. + * Append trigger info contained in 'more' to 'src', both remain unmodified. * * This allows us to execute some of main table's triggers on children. * See ExecInsert() for more details. @@ -227,28 +227,6 @@ append_trigger_descs(TriggerDesc *src, TriggerDesc *more, bool *grown_up) return new_desc; } -Oid -add_missing_partition(Oid partitioned_table, Const *value) -{ - bool crashed; - Oid result = InvalidOid; - - SPI_connect(); - PushActiveSnapshot(GetTransactionSnapshot()); - - /* Create partitions */ - result = create_partitions(partitioned_table, - value->constvalue, - value->consttype, - &crashed); - - /* Cleanup */ - SPI_finish(); - PopActiveSnapshot(); - - return result; -} - /* * Get BTORDER_PROC for two types described by Oids */ diff --git a/src/utils.h b/src/utils.h index 69e8545500..cf150c20bb 100644 --- a/src/utils.h +++ b/src/utils.h @@ -31,8 +31,6 @@ TriggerDesc * append_trigger_descs(TriggerDesc *src, TriggerDesc *more, bool *grown_up); -Oid add_missing_partition(Oid partitioned_table, Const *value); - void fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, Oid type2); diff --git a/src/worker.c b/src/worker.c index c478a0ebe2..2c3a7a7504 100644 --- a/src/worker.c +++ b/src/worker.c @@ -45,17 +45,31 @@ typedef struct PartitionArgs * waits till it finishes the job and returns the result (new partition oid) */ Oid -create_partitions_bg_worker(Oid relid, Datum value, Oid value_type, bool *crashed) +create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) { +#define HandleError(condition, new_state) \ + if (condition) { exec_state = (new_state); goto handle_exec_state; } + + /* Execution state to be checked */ + enum + { + BGW_OK = 0, /* everything is fine (default) */ + BGW_COULD_NOT_START, /* could not start worker */ + BGW_PM_DIED, /* postmaster died */ + BGW_CRASHED /* worker crashed */ + } exec_state = BGW_OK; + BackgroundWorker worker; - BackgroundWorkerHandle *worker_handle; - BgwHandleStatus status; - dsm_segment *segment; - dsm_handle segment_handle; - pid_t pid; - PartitionArgs *args; - Oid child_oid; - TypeCacheEntry *tce; + BackgroundWorkerHandle *bgw_handle; + BgwHandleStatus bgw_status; + bool bgw_started; + dsm_segment *segment; + dsm_handle segment_handle; + pid_t pid; + PartitionArgs *args; + TypeCacheEntry *tce; + Oid child_oid = InvalidOid; + /* Create a dsm segment for the worker to pass arguments */ segment = dsm_create(sizeof(PartitionArgs), 0); @@ -84,34 +98,53 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type, bool *crashe worker.bgw_main_arg = Int32GetDatum(segment_handle); worker.bgw_notify_pid = MyProcPid; + LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); + LWLockAcquire(pmstate->edit_partitions_lock, LW_EXCLUSIVE); + /* Start dynamic worker */ - if (!RegisterDynamicBackgroundWorker(&worker, &worker_handle)) - { - elog(WARNING, "Unable to create background worker for pg_pathman"); - } + bgw_started = RegisterDynamicBackgroundWorker(&worker, &bgw_handle); + HandleError(bgw_started == false, BGW_COULD_NOT_START); - status = WaitForBackgroundWorkerStartup(worker_handle, &pid); - if (status == BGWH_POSTMASTER_DIED) - { - ereport(WARNING, - (errmsg("Postmaster died during the pg_pathman background worker process"), - errhint("More details may be available in the server log."))); - } + /* Wait till the worker starts */ + bgw_status = WaitForBackgroundWorkerStartup(bgw_handle, &pid); + HandleError(bgw_status == BGWH_POSTMASTER_DIED, BGW_PM_DIED); - /* Wait till the worker finishes its job */ - status = WaitForBackgroundWorkerShutdown(worker_handle); - if (status == BGWH_POSTMASTER_DIED) - { - ereport(WARNING, - (errmsg("Postmaster died during the pg_pathman background worker process"), - errhint("More details may be available in the server log."))); - } - *crashed = args->crashed; + /* Wait till the worker finishes job */ + bgw_status = WaitForBackgroundWorkerShutdown(bgw_handle); + HandleError(bgw_status == BGWH_POSTMASTER_DIED, BGW_PM_DIED); + + /* Save the result (partition Oid) */ child_oid = args->result; + +/* end execution */ +handle_exec_state: + LWLockRelease(pmstate->load_config_lock); + LWLockRelease(pmstate->edit_partitions_lock); + /* Free dsm segment */ dsm_detach(segment); + switch (exec_state) + { + case BGW_COULD_NOT_START: + elog(ERROR, "Unable to create background worker for pg_pathman"); + break; + + case BGW_PM_DIED: + ereport(ERROR, + (errmsg("Postmaster died during the pg_pathman background worker process"), + errhint("More details may be available in the server log."))); + break; + + case BGW_CRASHED: + elog(ERROR, "Could not create partition due to background worker crash"); + break; + + default: + break; + } + return child_oid; } @@ -129,10 +162,8 @@ bg_worker_main(Datum main_arg) /* Attach to dynamic shared memory */ if (!handle) - { - ereport(WARNING, - (errmsg("pg_pathman worker: invalid dsm_handle"))); - } + ereport(WARNING, (errmsg("pg_pathman worker: invalid dsm_handle"))); + segment = dsm_attach(handle); args = dsm_segment_address(segment); @@ -163,19 +194,16 @@ bg_worker_main(Datum main_arg) Oid create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) { - int ret; - Datum vals[2]; - Oid oids[] = {OIDOID, value_type}; - bool nulls[] = {false, false}; + Oid oids[] = { OIDOID, value_type }; + Datum vals[] = { ObjectIdGetDatum(relid), value }; + bool nulls[] = { false, false }; char *sql; PartRelationInfo *prel; RangeRelation *rangerel; FmgrInfo cmp_func; char *schema; - search_rangerel_result search_state; - RangeEntry found_re; - *crashed = false; + *crashed = true; schema = get_extension_schema(); prel = get_pathman_relation_info(relid, NULL); @@ -184,36 +212,39 @@ create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) /* Comparison function */ fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); - vals[0] = ObjectIdGetDatum(relid); - vals[1] = value; - /* Perform PL procedure */ sql = psprintf("SELECT %s.append_partitions_on_demand_internal($1, $2)", schema); PG_TRY(); { + int ret; + Oid partid = InvalidOid; + bool isnull; + ret = SPI_execute_with_args(sql, 2, oids, vals, nulls, false, 0); if (ret > 0) { + TupleDesc tupdesc = SPI_tuptable->tupdesc; + HeapTuple tuple = SPI_tuptable->vals[0]; + + Assert(SPI_processed == 1); + + partid = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 1, &isnull)); + /* Update relation info */ free_dsm_array(&rangerel->ranges); free_dsm_array(&prel->children); load_check_constraints(relid, GetCatalogSnapshot(relid)); } + + *crashed = false; + return partid; } PG_CATCH(); { - elog(WARNING, "Attempt to create new partitions failed"); - if (crashed != NULL) - *crashed = true; - return 0; + elog(ERROR, "Attempt to create new partitions failed"); + + return InvalidOid; /* compiler should be happy */ } PG_END_TRY(); - - search_state = search_range_partition_eq(value, &cmp_func, - rangerel, &found_re); - if (search_state == SEARCH_RANGEREL_FOUND) - return found_re.child_oid; - - return 0; } From 92e46ac299dad9879c3f65aa87a4fa3fa1793c6c Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 8 Jun 2016 14:12:25 +0300 Subject: [PATCH 017/184] introduce fixed function drop_partitions() --- hash.sql | 42 ------------------------------------ init.sql | 54 ++++++++++++++++++++++++++++++++++++++++++++++ range.sql | 45 -------------------------------------- sql/pg_pathman.sql | 16 +++++++------- 4 files changed, 62 insertions(+), 95 deletions(-) diff --git a/hash.sql b/hash.sql index 6039a16525..9505a6ae63 100644 --- a/hash.sql +++ b/hash.sql @@ -66,48 +66,6 @@ BEGIN END $$ LANGUAGE plpgsql; -/* - * Drops all partitions for specified relation - */ -CREATE OR REPLACE FUNCTION @extschema@.drop_hash_partitions( - IN relation REGCLASS - , delete_data BOOLEAN DEFAULT FALSE) -RETURNS INTEGER AS -$$ -DECLARE - v_relname TEXT; - v_rec RECORD; - v_rows INTEGER; - v_part_count INTEGER := 0; -BEGIN - v_relname := @extschema@.validate_relname(relation); - - /* Drop trigger first */ - PERFORM @extschema@.drop_triggers(relation); - DELETE FROM @extschema@.pathman_config WHERE relname::regclass = relation; - - FOR v_rec in (SELECT inhrelid::regclass::text AS tbl - FROM pg_inherits WHERE inhparent = relation::oid) - LOOP - IF NOT delete_data THEN - EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) - INSERT INTO %s SELECT * FROM part_data' - , v_rec.tbl - , relation::text); - GET DIAGNOSTICS v_rows = ROW_COUNT; - RAISE NOTICE '% rows copied from %', v_rows, v_rec.tbl; - END IF; - EXECUTE format('DROP TABLE %s', v_rec.tbl); - v_part_count := v_part_count + 1; - END LOOP; - - /* Notify backend about changes */ - PERFORM @extschema@.on_remove_partitions(relation::oid); - - RETURN v_part_count; -END -$$ LANGUAGE plpgsql; - /* * Creates an update trigger */ diff --git a/init.sql b/init.sql index f7aa514c4e..573a23a5b6 100644 --- a/init.sql +++ b/init.sql @@ -352,3 +352,57 @@ BEGIN EXECUTE format('DROP FUNCTION IF EXISTS %s() CASCADE', funcname); END $$ LANGUAGE plpgsql; + +/* + * Drop partitions + * If delete_data set to TRUE then partitions will be dropped with all the data + */ +CREATE OR REPLACE FUNCTION @extschema@.drop_partitions( + relation REGCLASS + , delete_data BOOLEAN DEFAULT FALSE) +RETURNS INTEGER AS +$$ +DECLARE + v_rec RECORD; + v_rows INTEGER; + v_part_count INTEGER := 0; + v_relname TEXT; + conf_num_del INTEGER; +BEGIN + v_relname := @extschema@.validate_relname(relation); + + /* Drop trigger first */ + PERFORM @extschema@.drop_triggers(relation); + + WITH config_num_deleted AS (DELETE FROM @extschema@.pathman_config + WHERE relname::regclass = relation + RETURNING *) + SELECT count(*) from config_num_deleted INTO conf_num_del; + + IF conf_num_del = 0 THEN + RAISE EXCEPTION 'table % has no partitions', relation::text; + END IF; + + FOR v_rec IN (SELECT inhrelid::regclass::text AS tbl + FROM pg_inherits WHERE inhparent::regclass = relation) + LOOP + IF NOT delete_data THEN + EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) + INSERT INTO %s SELECT * FROM part_data' + , v_rec.tbl + , relation::text); + GET DIAGNOSTICS v_rows = ROW_COUNT; + RAISE NOTICE '% rows copied from %', v_rows, v_rec.tbl; + END IF; + EXECUTE format('DROP TABLE %s', v_rec.tbl); + v_part_count := v_part_count + 1; + END LOOP; + + /* Notify backend about changes */ + PERFORM @extschema@.on_remove_partitions(relation::oid); + + RETURN v_part_count; +END +$$ LANGUAGE plpgsql +SET pg_pathman.enable_partitionfilter = off; + diff --git a/range.sql b/range.sql index 4523802d62..ae5a3b856d 100644 --- a/range.sql +++ b/range.sql @@ -1075,51 +1075,6 @@ BEGIN END $$ LANGUAGE plpgsql; - -/* - * Drop partitions - * If delete_data set to TRUE then partitions will be dropped with all the data - */ -CREATE OR REPLACE FUNCTION @extschema@.drop_range_partitions( - relation REGCLASS - , delete_data BOOLEAN DEFAULT FALSE) -RETURNS INTEGER AS -$$ -DECLARE - v_rec RECORD; - v_rows INTEGER; - v_part_count INTEGER := 0; - v_relname TEXT; -BEGIN - v_relname := @extschema@.validate_relname(relation); - - /* Drop trigger first */ - PERFORM @extschema@.drop_triggers(relation); - - FOR v_rec IN (SELECT inhrelid::regclass::text AS tbl - FROM pg_inherits WHERE inhparent::regclass = relation) - LOOP - IF NOT delete_data THEN - EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) - INSERT INTO %s SELECT * FROM part_data' - , v_rec.tbl - , relation::text); - GET DIAGNOSTICS v_rows = ROW_COUNT; - RAISE NOTICE '% rows copied from %', v_rows, v_rec.tbl; - END IF; - EXECUTE format('DROP TABLE %s', v_rec.tbl); - v_part_count := v_part_count + 1; - END LOOP; - - DELETE FROM @extschema@.pathman_config WHERE relname::regclass = relation; - - /* Notify backend about changes */ - PERFORM @extschema@.on_remove_partitions(relation::oid); - - RETURN v_part_count; -END -$$ LANGUAGE plpgsql; - /* * Internal function used to create new partitions on insert or update trigger. * Invoked from C-function find_or_create_range_partition(). diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 84530a4bb8..4b6376ee17 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -396,14 +396,14 @@ SELECT * FROM test.hash_rel WHERE id = 123; /* * Clean up */ -SELECT pathman.drop_hash_partitions('test.hash_rel'); +SELECT pathman.drop_partitions('test.hash_rel'); SELECT COUNT(*) FROM ONLY test.hash_rel; SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); -SELECT pathman.drop_hash_partitions('test.hash_rel', TRUE); +SELECT pathman.drop_partitions('test.hash_rel', TRUE); SELECT COUNT(*) FROM ONLY test.hash_rel; DROP TABLE test.hash_rel CASCADE; -SELECT pathman.drop_range_partitions('test.num_range_rel'); +SELECT pathman.drop_partitions('test.num_range_rel'); DROP TABLE test.num_range_rel CASCADE; DROP TABLE test.range_rel CASCADE; @@ -453,7 +453,7 @@ UPDATE test."TeSt" SET a = 1; SELECT * FROM test."TeSt"; SELECT * FROM test."TeSt" WHERE a = 1; EXPLAIN (COSTS OFF) SELECT * FROM test."TeSt" WHERE a = 1; -SELECT pathman.drop_hash_partitions('test."TeSt"'); +SELECT pathman.drop_partitions('test."TeSt"'); SELECT * FROM test."TeSt"; CREATE TABLE test."RangeRel" ( @@ -467,7 +467,7 @@ SELECT pathman.append_range_partition('test."RangeRel"'); SELECT pathman.prepend_range_partition('test."RangeRel"'); SELECT pathman.merge_range_partitions('test."RangeRel_1"', 'test."RangeRel_' || currval('test."RangeRel_seq"') || '"'); SELECT pathman.split_range_partition('test."RangeRel_1"', '2015-01-01'::DATE); -SELECT pathman.drop_range_partitions('test."RangeRel"'); +SELECT pathman.drop_partitions('test."RangeRel"'); SELECT pathman.create_partitions_from_range('test."RangeRel"', 'dt', '2015-01-01'::DATE, '2015-01-05'::DATE, '1 day'::INTERVAL); DROP TABLE test."RangeRel" CASCADE; SELECT * FROM pathman.pathman_config; @@ -476,7 +476,7 @@ CREATE TABLE test."RangeRel" ( dt TIMESTAMP NOT NULL, txt TEXT); SELECT pathman.create_range_partitions('test."RangeRel"', 'id', 1, 100, 3); -SELECT pathman.drop_range_partitions('test."RangeRel"'); +SELECT pathman.drop_partitions('test."RangeRel"'); SELECT pathman.create_partitions_from_range('test."RangeRel"', 'id', 1, 300, 100); DROP TABLE test."RangeRel" CASCADE; @@ -524,9 +524,9 @@ EXPLAIN (COSTS OFF) DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02 DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02' AND r.id = t.id; /* Create range partitions from whole range */ -SELECT drop_range_partitions('range_rel'); +SELECT drop_partitions('range_rel'); SELECT create_partitions_from_range('range_rel', 'id', 1, 1000, 100); -SELECT drop_range_partitions('range_rel', TRUE); +SELECT drop_partitions('range_rel', TRUE); SELECT create_partitions_from_range('range_rel', 'dt', '2015-01-01'::date, '2015-12-01'::date, '1 month'::interval); EXPLAIN (COSTS OFF) SELECT * FROM range_rel WHERE dt = '2015-12-15'; From 487ec8eec635cc8ceed2638684ccf7a46ed90766 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 10 Jun 2016 00:54:36 +0300 Subject: [PATCH 018/184] fix create_append_plan_common() for 'for share\update', add regression tests --- expected/pg_pathman.out | 161 +++++++++++++++++++++++++++------------- sql/pg_pathman.sql | 66 +++++++++++++++- src/hooks.c | 3 + src/init.c | 4 +- src/nodes_common.c | 96 ++++++++++++------------ src/pathman.h | 1 + 6 files changed, 233 insertions(+), 98 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 75198c7d3b..d5b9924382 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -12,8 +12,6 @@ SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); ERROR: Partitioning key 'value' must be NOT NULL ALTER TABLE test.hash_rel ALTER COLUMN value SET NOT NULL; SELECT pathman.create_hash_partitions('test.hash_rel', 'Value', 3); -NOTICE: function test.hash_rel_insert_trigger_func() does not exist, skipping -NOTICE: function test.hash_rel_update_trigger_func() does not exist, skipping NOTICE: Copying data to partitions... create_hash_partitions ------------------------ @@ -728,12 +726,60 @@ begin return 'ok'; end; $$ language plpgsql; +create or replace function test.pathman_test_5() returns text as $$ +declare + res record; +begin + select + from test.runtime_test_3 + where id = (select * from test.vals order by val limit 1) + limit 1 + into res; /* test empty tlist */ + + + select id, generate_series(1, 2) gen, val + from test.runtime_test_3 + where id = any (select * from test.vals order by val limit 5) + order by id, gen, val + offset 1 limit 1 + into res; /* without IndexOnlyScan */ + + perform test.pathman_equal(res.id::text, '1', 'id is incorrect (t2)'); + perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t2)'); + perform test.pathman_equal(res.val::text, 'k = 1', 'val is incorrect (t2)'); + + + select id + from test.runtime_test_3 + where id = any (select * from test.vals order by val limit 5) + order by id + offset 3 limit 1 + into res; /* with IndexOnlyScan */ + + perform test.pathman_equal(res.id::text, '4', 'id is incorrect (t3)'); + + + select v.val v1, generate_series(2, 2) gen, t.val v2 + from test.runtime_test_3 t join test.vals v on id = v.val + order by v1, gen, v2 + limit 1 + into res; + + perform test.pathman_equal(res.v1::text, '1', 'v1 is incorrect (t4)'); + perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t4)'); + perform test.pathman_equal(res.v2::text, 'k = 1', 'v2 is incorrect (t4)'); + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_hashjoin = off +set enable_mergejoin = off; +NOTICE: RuntimeAppend, RuntimeMergeAppend and PartitionFilter nodes have been enabled create table test.run_values as select generate_series(1, 10000) val; create table test.runtime_test_1(id serial primary key, val real); insert into test.runtime_test_1 select generate_series(1, 10000), random(); select pathman.create_hash_partitions('test.runtime_test_1', 'id', 6); -NOTICE: function test.runtime_test_1_insert_trigger_func() does not exist, skipping -NOTICE: function test.runtime_test_1_update_trigger_func() does not exist, skipping NOTICE: Copying data to partitions... create_hash_partitions ------------------------ @@ -745,16 +791,29 @@ create table test.runtime_test_2 (id serial, category_id int not null, name text insert into test.runtime_test_2 (select id, (id % 6) + 1 as category_id, 'good' || id::text as name, random() as rating from generate_series(1, 100000) id); create index on test.runtime_test_2 (category_id, rating); select pathman.create_hash_partitions('test.runtime_test_2', 'category_id', 6); -NOTICE: function test.runtime_test_2_insert_trigger_func() does not exist, skipping -NOTICE: function test.runtime_test_2_update_trigger_func() does not exist, skipping NOTICE: Copying data to partitions... create_hash_partitions ------------------------ 6 (1 row) +create table test.vals as (select generate_series(1, 10000) as val); +create table test.runtime_test_3(val text, id serial not null); +insert into test.runtime_test_3(id, val) select * from generate_series(1, 10000) k, format('k = %s', k); +select pathman.create_hash_partitions('test.runtime_test_3', 'id', 4); +NOTICE: Copying data to partitions... + create_hash_partitions +------------------------ + 4 +(1 row) + +create index on test.runtime_test_3 (id); +create index on test.runtime_test_3_0 (id); analyze test.run_values; analyze test.runtime_test_1; +analyze test.runtime_test_2; +analyze test.runtime_test_3; +analyze test.runtime_test_3_0; set enable_mergejoin = off; set enable_hashjoin = off; set pg_pathman.enable_runtimeappend = on; @@ -783,12 +842,18 @@ select test.pathman_test_4(); /* RuntimeMergeAppend (lateral) */ ok (1 row) +select test.pathman_test_5(); /* projection tests for RuntimeXXX nodes */ + pathman_test_5 +---------------- + ok +(1 row) + set pg_pathman.enable_runtimeappend = off; set pg_pathman.enable_runtimemergeappend = off; set enable_mergejoin = on; set enable_hashjoin = on; -drop table test.run_values, test.runtime_test_1, test.runtime_test_2 cascade; -NOTICE: drop cascades to 12 other objects +drop table test.run_values, test.runtime_test_1, test.runtime_test_2, test.runtime_test_3, test.vals cascade; +NOTICE: drop cascades to 16 other objects /* * Test split and merge */ @@ -1009,15 +1074,14 @@ SELECT * FROM test.hash_rel WHERE id = 123; /* * Clean up */ -SELECT pathman.drop_hash_partitions('test.hash_rel'); -NOTICE: drop cascades to trigger test_hash_rel_insert_trigger on table test.hash_rel +SELECT pathman.drop_partitions('test.hash_rel'); NOTICE: drop cascades to 3 other objects NOTICE: 2 rows copied from test.hash_rel_2 NOTICE: 3 rows copied from test.hash_rel_1 NOTICE: 2 rows copied from test.hash_rel_0 - drop_hash_partitions ----------------------- - 3 + drop_partitions +----------------- + 3 (1 row) SELECT COUNT(*) FROM ONLY test.hash_rel; @@ -1027,20 +1091,17 @@ SELECT COUNT(*) FROM ONLY test.hash_rel; (1 row) SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); -NOTICE: function test.hash_rel_insert_trigger_func() does not exist, skipping -NOTICE: function test.hash_rel_update_trigger_func() does not exist, skipping NOTICE: Copying data to partitions... create_hash_partitions ------------------------ 3 (1 row) -SELECT pathman.drop_hash_partitions('test.hash_rel', TRUE); -NOTICE: drop cascades to trigger test_hash_rel_insert_trigger on table test.hash_rel +SELECT pathman.drop_partitions('test.hash_rel', TRUE); NOTICE: function test.hash_rel_update_trigger_func() does not exist, skipping - drop_hash_partitions ----------------------- - 3 + drop_partitions +----------------- + 3 (1 row) SELECT COUNT(*) FROM ONLY test.hash_rel; @@ -1050,15 +1111,16 @@ SELECT COUNT(*) FROM ONLY test.hash_rel; (1 row) DROP TABLE test.hash_rel CASCADE; -SELECT pathman.drop_range_partitions('test.num_range_rel'); +SELECT pathman.drop_partitions('test.num_range_rel'); +NOTICE: drop cascades to 4 other objects NOTICE: 0 rows copied from test.num_range_rel_6 NOTICE: 2 rows copied from test.num_range_rel_4 NOTICE: 1000 rows copied from test.num_range_rel_3 NOTICE: 1000 rows copied from test.num_range_rel_2 NOTICE: 998 rows copied from test.num_range_rel_1 - drop_range_partitions ------------------------ - 5 + drop_partitions +----------------- + 5 (1 row) DROP TABLE test.num_range_rel CASCADE; @@ -1172,8 +1234,6 @@ CREATE TABLE test."TeSt" (a INT NOT NULL, b INT); SELECT pathman.create_hash_partitions('test.TeSt', 'a', 3); ERROR: relation "test.test" does not exist at character 39 SELECT pathman.create_hash_partitions('test."TeSt"', 'a', 3); -NOTICE: function test.TeSt_insert_trigger_func() does not exist, skipping -NOTICE: function test.TeSt_update_trigger_func() does not exist, skipping NOTICE: Copying data to partitions... create_hash_partitions ------------------------ @@ -1222,15 +1282,14 @@ EXPLAIN (COSTS OFF) SELECT * FROM test."TeSt" WHERE a = 1; Filter: (a = 1) (3 rows) -SELECT pathman.drop_hash_partitions('test."TeSt"'); -NOTICE: drop cascades to trigger test_TeSt_insert_trigger on table test."TeSt" +SELECT pathman.drop_partitions('test."TeSt"'); NOTICE: drop cascades to 3 other objects NOTICE: 0 rows copied from test."TeSt_2" NOTICE: 3 rows copied from test."TeSt_1" NOTICE: 0 rows copied from test."TeSt_0" - drop_hash_partitions ----------------------- - 3 + drop_partitions +----------------- + 3 (1 row) SELECT * FROM test."TeSt"; @@ -1291,15 +1350,16 @@ NOTICE: Done! {12-31-2014,01-02-2015} (1 row) -SELECT pathman.drop_range_partitions('test."RangeRel"'); +SELECT pathman.drop_partitions('test."RangeRel"'); +NOTICE: function test.RangeRel_update_trigger_func() does not exist, skipping NOTICE: 1 rows copied from test."RangeRel_6" NOTICE: 0 rows copied from test."RangeRel_4" NOTICE: 1 rows copied from test."RangeRel_3" NOTICE: 1 rows copied from test."RangeRel_2" NOTICE: 0 rows copied from test."RangeRel_1" - drop_range_partitions ------------------------ - 5 + drop_partitions +----------------- + 5 (1 row) SELECT pathman.create_partitions_from_range('test."RangeRel"', 'dt', '2015-01-01'::DATE, '2015-01-05'::DATE, '1 day'::INTERVAL); @@ -1314,7 +1374,7 @@ NOTICE: drop cascades to 5 other objects SELECT * FROM pathman.pathman_config; id | relname | attname | parttype | range_interval ----+--------------------+---------+----------+---------------- - 8 | test.num_range_rel | id | 2 | 1000 + 9 | test.num_range_rel | id | 2 | 1000 (1 row) CREATE TABLE test."RangeRel" ( @@ -1328,13 +1388,14 @@ NOTICE: Copying data to partitions... 3 (1 row) -SELECT pathman.drop_range_partitions('test."RangeRel"'); +SELECT pathman.drop_partitions('test."RangeRel"'); +NOTICE: function test.RangeRel_update_trigger_func() does not exist, skipping NOTICE: 0 rows copied from test."RangeRel_3" NOTICE: 0 rows copied from test."RangeRel_2" NOTICE: 0 rows copied from test."RangeRel_1" - drop_range_partitions ------------------------ - 3 + drop_partitions +----------------- + 3 (1 row) SELECT pathman.create_partitions_from_range('test."RangeRel"', 'id', 1, 300, 100); @@ -1355,8 +1416,6 @@ CREATE TABLE hash_rel ( value INTEGER NOT NULL); INSERT INTO hash_rel (value) SELECT g FROM generate_series(1, 10000) as g; SELECT create_hash_partitions('hash_rel', 'value', 3); -NOTICE: function public.hash_rel_insert_trigger_func() does not exist, skipping -NOTICE: function public.hash_rel_update_trigger_func() does not exist, skipping NOTICE: Copying data to partitions... create_hash_partitions ------------------------ @@ -1503,7 +1562,8 @@ EXPLAIN (COSTS OFF) DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02 DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02' AND r.id = t.id; /* Create range partitions from whole range */ -SELECT drop_range_partitions('range_rel'); +SELECT drop_partitions('range_rel'); +NOTICE: function public.range_rel_update_trigger_func() does not exist, skipping NOTICE: 0 rows copied from range_rel_15 NOTICE: 0 rows copied from range_rel_14 NOTICE: 14 rows copied from range_rel_13 @@ -1518,9 +1578,9 @@ NOTICE: 31 rows copied from range_rel_5 NOTICE: 30 rows copied from range_rel_4 NOTICE: 31 rows copied from range_rel_3 NOTICE: 44 rows copied from range_rel_1 - drop_range_partitions ------------------------ - 14 + drop_partitions +----------------- + 14 (1 row) SELECT create_partitions_from_range('range_rel', 'id', 1, 1000, 100); @@ -1530,10 +1590,11 @@ NOTICE: Copying data to partitions... 10 (1 row) -SELECT drop_range_partitions('range_rel', TRUE); - drop_range_partitions ------------------------ - 10 +SELECT drop_partitions('range_rel', TRUE); +NOTICE: function public.range_rel_update_trigger_func() does not exist, skipping + drop_partitions +----------------- + 10 (1 row) SELECT create_partitions_from_range('range_rel', 'dt', '2015-01-01'::date, '2015-12-01'::date, '1 month'::interval); diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 4b6376ee17..7c918bdb04 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -307,6 +307,57 @@ begin end; $$ language plpgsql; +create or replace function test.pathman_test_5() returns text as $$ +declare + res record; +begin + select + from test.runtime_test_3 + where id = (select * from test.vals order by val limit 1) + limit 1 + into res; /* test empty tlist */ + + + select id, generate_series(1, 2) gen, val + from test.runtime_test_3 + where id = any (select * from test.vals order by val limit 5) + order by id, gen, val + offset 1 limit 1 + into res; /* without IndexOnlyScan */ + + perform test.pathman_equal(res.id::text, '1', 'id is incorrect (t2)'); + perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t2)'); + perform test.pathman_equal(res.val::text, 'k = 1', 'val is incorrect (t2)'); + + + select id + from test.runtime_test_3 + where id = any (select * from test.vals order by val limit 5) + order by id + offset 3 limit 1 + into res; /* with IndexOnlyScan */ + + perform test.pathman_equal(res.id::text, '4', 'id is incorrect (t3)'); + + + select v.val v1, generate_series(2, 2) gen, t.val v2 + from test.runtime_test_3 t join test.vals v on id = v.val + order by v1, gen, v2 + limit 1 + into res; + + perform test.pathman_equal(res.v1::text, '1', 'v1 is incorrect (t4)'); + perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t4)'); + perform test.pathman_equal(res.v2::text, 'k = 1', 'v2 is incorrect (t4)'); + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_hashjoin = off +set enable_mergejoin = off; + + create table test.run_values as select generate_series(1, 10000) val; create table test.runtime_test_1(id serial primary key, val real); @@ -319,8 +370,20 @@ insert into test.runtime_test_2 (select id, (id % 6) + 1 as category_id, 'good' create index on test.runtime_test_2 (category_id, rating); select pathman.create_hash_partitions('test.runtime_test_2', 'category_id', 6); +create table test.vals as (select generate_series(1, 10000) as val); +create table test.runtime_test_3(val text, id serial not null); +insert into test.runtime_test_3(id, val) select * from generate_series(1, 10000) k, format('k = %s', k); +select pathman.create_hash_partitions('test.runtime_test_3', 'id', 4); +create index on test.runtime_test_3 (id); +create index on test.runtime_test_3_0 (id); + + analyze test.run_values; analyze test.runtime_test_1; +analyze test.runtime_test_2; +analyze test.runtime_test_3; +analyze test.runtime_test_3_0; + set enable_mergejoin = off; set enable_hashjoin = off; @@ -330,13 +393,14 @@ select test.pathman_test_1(); /* RuntimeAppend (select ... where id = (subquery) select test.pathman_test_2(); /* RuntimeAppend (select ... where id = any(subquery)) */ select test.pathman_test_3(); /* RuntimeAppend (a join b on a.id = b.val) */ select test.pathman_test_4(); /* RuntimeMergeAppend (lateral) */ +select test.pathman_test_5(); /* projection tests for RuntimeXXX nodes */ set pg_pathman.enable_runtimeappend = off; set pg_pathman.enable_runtimemergeappend = off; set enable_mergejoin = on; set enable_hashjoin = on; -drop table test.run_values, test.runtime_test_1, test.runtime_test_2 cascade; +drop table test.run_values, test.runtime_test_1, test.runtime_test_2, test.runtime_test_3, test.vals cascade; /* * Test split and merge diff --git a/src/hooks.c b/src/hooks.c index c1ffd4ccb4..80330aa371 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -378,12 +378,14 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) case CMD_SELECT: disable_inheritance(parse); break; + case CMD_UPDATE: case CMD_DELETE: disable_inheritance_cte(parse); disable_inheritance_subselect(parse); handle_modification_query(parse); break; + case CMD_INSERT: { ListCell *lc; @@ -396,6 +398,7 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) return result; } + default: break; } diff --git a/src/init.c b/src/init.c index 5a390dc4b8..6969bd781d 100644 --- a/src/init.c +++ b/src/init.c @@ -171,7 +171,8 @@ load_relations_hashtable(bool reinitialize) ListCell *lc; char *schema; PartRelationInfo *prel; - char sql[] = "SELECT pg_class.oid, pg_attribute.attnum, cfg.parttype, pg_attribute.atttypid " + char sql[] = "SELECT pg_class.oid, pg_attribute.attnum," + "cfg.parttype, pg_attribute.atttypid, pg_attribute.atttypmod " "FROM %s.pathman_config as cfg " "JOIN pg_class ON pg_class.oid = cfg.relname::regclass::oid " "JOIN pg_attribute ON pg_attribute.attname = lower(cfg.attname) " @@ -212,6 +213,7 @@ load_relations_hashtable(bool reinitialize) prel->attnum = DatumGetInt32(SPI_getbinval(tuple, tupdesc, 2, &isnull)); prel->parttype = DatumGetInt32(SPI_getbinval(tuple, tupdesc, 3, &isnull)); prel->atttype = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 4, &isnull)); + prel->atttypmod = DatumGetInt32(SPI_getbinval(tuple, tupdesc, 5, &isnull)); part_oids = lappend_int(part_oids, oid); } diff --git a/src/nodes_common.c b/src/nodes_common.c index 7750b5f225..a0c790a42e 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -137,6 +137,42 @@ replace_tlist_varnos(List *child_tlist, RelOptInfo *parent) return result; } +/* Append partition attribute in case it's not present in target list */ +static List * +append_part_attr_to_tlist(List *tlist, Index relno, PartRelationInfo *prel) +{ + ListCell *lc; + bool part_attr_found = false; + + foreach (lc, tlist) + { + TargetEntry *te = (TargetEntry *) lfirst(lc); + Var *var = (Var *) te->expr; + + if (IsA(var, Var) && var->varattno == prel->attnum) + part_attr_found = true; + } + + if (!part_attr_found) + { + /* TODO: how about collation support? */ + Var *newvar = makeVar(relno, + prel->attnum, + prel->atttype, + prel->atttypmod, + InvalidOid, + 0); + + Index last_item = list_length(tlist) + 1; + + tlist = lappend(tlist, makeTargetEntry((Expr *) newvar, + last_item, + NULL, false)); + } + + return tlist; +} + static void pack_runtimeappend_private(CustomScan *cscan, RuntimeAppendPath *path) { @@ -313,9 +349,11 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, { RuntimeAppendPath *rpath = (RuntimeAppendPath *) best_path; CustomScan *cscan; + PartRelationInfo *prel = get_pathman_relation_info(rpath->relid, NULL); cscan = makeNode(CustomScan); cscan->custom_scan_tlist = NIL; /* initial value (empty list) */ + cscan->scan.plan.targetlist = NIL; if (custom_plans) { @@ -327,59 +365,25 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, Plan *child_plan = (Plan *) lfirst(lc2); RelOptInfo *child_rel = ((Path *) lfirst(lc1))->parent; - /* We enforce IndexOnlyScans to return all available columns */ - if (IsA(child_plan, IndexOnlyScan)) - { - IndexOptInfo *indexinfo = ((IndexPath *) lfirst(lc1))->indexinfo; - RangeTblEntry *rentry = root->simple_rte_array[child_rel->relid]; - Relation child_relation; - - child_relation = heap_open(rentry->relid, NoLock); - child_plan->targetlist = build_index_tlist(root, indexinfo, - child_relation); - heap_close(child_relation, NoLock); - - if (!cscan->custom_scan_tlist) - { - /* Set appropriate tlist for child scans */ - cscan->custom_scan_tlist = - replace_tlist_varnos(child_plan->targetlist, rel); - - /* Replace parent's tlist as well */ - tlist = cscan->custom_scan_tlist; - } - } - /* Don't generate useless physical tlists that will be replaced */ - else if (!cscan->custom_scan_tlist) - child_plan->targetlist = build_physical_tlist(root, child_rel); - } + /* Replace rel's tlist with a matching one */ + if (!cscan->scan.plan.targetlist) + tlist = replace_tlist_varnos(child_plan->targetlist, rel); - /* - * Go through the other (non-IOS) plans and replace their - * physical tlists with the new 'custom_scan_tlist'. - */ - if (cscan->custom_scan_tlist) - forboth (lc1, rpath->cpath.custom_paths, lc2, custom_plans) - { - Plan *child_plan = (Plan *) lfirst(lc2); - RelOptInfo *child_rel = ((Path *) lfirst(lc1))->parent; + /* Add partition attribute if necessary (for ExecQual()) */ + child_plan->targetlist = append_part_attr_to_tlist(child_plan->targetlist, + child_rel->relid, + prel); - if (!IsA(child_plan, IndexOnlyScan)) - child_plan->targetlist = - replace_tlist_varnos(cscan->custom_scan_tlist, child_rel); - } + /* Now make custom_scan_tlist match child plans' targetlists */ + if (!cscan->custom_scan_tlist) + cscan->custom_scan_tlist = replace_tlist_varnos(child_plan->targetlist, + rel); + } } cscan->scan.plan.qual = NIL; cscan->scan.plan.targetlist = tlist; - /* - * Initialize custom_scan_tlist if it's not - * ready yet (there are no IndexOnlyScans). - */ - if (!cscan->custom_scan_tlist) - cscan->custom_scan_tlist = tlist; - /* Since we're not scanning any real table directly */ cscan->scan.scanrelid = 0; diff --git a/src/pathman.h b/src/pathman.h index 4bb03efd74..515d204f8b 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -78,6 +78,7 @@ typedef struct PartRelationInfo PartType parttype; Index attnum; Oid atttype; + int32 atttypmod; } PartRelationInfo; /* From fa9d20900b30a0bf3fe522c3fbe064306723c7ad Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 17 Jun 2016 00:41:40 +0300 Subject: [PATCH 019/184] attempt to fix broken 'FOR UPDATE' feature (working prototype stage) --- src/hooks.c | 11 ++ src/partition_filter.c | 100 ++++++----------- src/pg_pathman.c | 62 +++++++++-- src/utils.c | 242 +++++++++++++++++++++++++++++++++++++++++ src/utils.h | 10 ++ 5 files changed, 348 insertions(+), 77 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 80330aa371..7d66de15a2 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -377,6 +377,7 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) { case CMD_SELECT: disable_inheritance(parse); + rowmark_add_tableoids(parse); /* add attributes for rowmarks */ break; case CMD_UPDATE: @@ -410,6 +411,16 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) else result = standard_planner(parse, cursorOptions, boundParams); + if (pg_pathman_enable) + { + ListCell *lc; + + /* Give rowmark-related attributes correct names */ + postprocess_lock_rows(result->rtable, result->planTree); + foreach (lc, result->subplans) + postprocess_lock_rows(result->rtable, (Plan *) lfirst(lc)); + } + return result; } diff --git a/src/partition_filter.c b/src/partition_filter.c index af51a58b7a..9e03c0db0b 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -42,7 +42,7 @@ init_partition_filter_static_data(void) Plan * make_partition_filter(Plan *subplan, Oid partitioned_table, - OnConflictAction conflict_action) + OnConflictAction conflict_action) { CustomScan *cscan = makeNode(CustomScan); @@ -327,78 +327,44 @@ pfilter_build_tlist(List *tlist) } /* - * Add PartitionFilter nodes to the plan tree + * Add partition filters to ModifyTable node's children + * + * 'context' should point to the PlannedStmt->rtable */ -void -add_partition_filters(List *rtable, Plan *plan) +static void +partition_filter_visitor(Plan *plan, void *context) { - ListCell *l; + List *rtable = (List *) context; + ModifyTable *modify_table = (ModifyTable *) plan; + ListCell *lc1, + *lc2; + + Assert(rtable && IsA(rtable, List)); - if (plan == NULL || !pg_pathman_enable_partition_filter) + /* Skip if not ModifyTable with 'INSERT' command */ + if (!IsA(modify_table, ModifyTable) || modify_table->operation != CMD_INSERT) return; - /* Plan-type-specific fixes*/ - switch (nodeTag(plan)) + forboth (lc1, modify_table->plans, lc2, modify_table->resultRelations) { - case T_SubqueryScan: - add_partition_filters(rtable, ((SubqueryScan *) plan)->subplan); - break; - - case T_CustomScan: - foreach(l, ((CustomScan *) plan)->custom_plans) - add_partition_filters(rtable, (Plan *) lfirst(l)); - break; - - /* - * Add proxy PartitionFilter nodes - * to subplans of ModifyTable node - */ - case T_ModifyTable: - { - ModifyTable *modify_table = ((ModifyTable *) plan); - ListCell *lc1, - *lc2; - - if (modify_table->operation != CMD_INSERT) - break; - - forboth (lc1, modify_table->plans, lc2, modify_table->resultRelations) - { - Index rindex = lfirst_int(lc2); - Oid relid = getrelid(rindex, rtable); - PartRelationInfo *prel = get_pathman_relation_info(relid, NULL); - - add_partition_filters(rtable, (Plan *) lfirst(lc1)); - - if (prel) - lfirst(lc1) = make_partition_filter((Plan *) lfirst(lc1), - relid, - modify_table->onConflictAction); - } - } - break; - - /* Since they look alike */ - case T_MergeAppend: - case T_Append: - foreach(l, ((Append *) plan)->appendplans) - add_partition_filters(rtable, (Plan *) lfirst(l)); - break; - - case T_BitmapAnd: - foreach(l, ((BitmapAnd *) plan)->bitmapplans) - add_partition_filters(rtable, (Plan *) lfirst(l)); - break; - - case T_BitmapOr: - foreach(l, ((BitmapOr *) plan)->bitmapplans) - add_partition_filters(rtable, (Plan *) lfirst(l)); - break; - - default: - break; + Index rindex = lfirst_int(lc2); + Oid relid = getrelid(rindex, rtable); + PartRelationInfo *prel = get_pathman_relation_info(relid, NULL); + + /* Check that table is partitioned */ + if (prel) + lfirst(lc1) = make_partition_filter((Plan *) lfirst(lc1), + relid, + modify_table->onConflictAction); } +} - add_partition_filters(rtable, plan->lefttree); - add_partition_filters(rtable, plan->righttree); +/* + * Add PartitionFilter nodes to the plan tree + */ +void +add_partition_filters(List *rtable, Plan *plan) +{ + if (pg_pathman_enable_partition_filter) + plan_tree_walker(plan, partition_filter_visitor, rtable); } diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 536a2ea40c..ef40861ff2 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -21,6 +21,7 @@ #include "optimizer/paths.h" #include "optimizer/pathnode.h" #include "optimizer/planner.h" +#include "optimizer/prep.h" #include "optimizer/restrictinfo.h" #include "optimizer/cost.h" #include "parser/analyze.h" @@ -33,6 +34,7 @@ #include "utils/selfuncs.h" #include "access/heapam.h" #include "access/nbtree.h" +#include "access/sysattr.h" #include "storage/ipc.h" #include "catalog/pg_type.h" #include "foreign/fdwapi.h" @@ -207,7 +209,7 @@ disable_inheritance(Query *parse) foreach(lc, parse->rtable) { - rte = (RangeTblEntry*) lfirst(lc); + rte = (RangeTblEntry *) lfirst(lc); switch(rte->rtekind) { case RTE_RELATION: @@ -380,13 +382,17 @@ int append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte, int index, Oid childOid, List *wrappers) { - RangeTblEntry *childrte; - RelOptInfo *childrel; - Index childRTindex; - AppendRelInfo *appinfo; - Node *node; - ListCell *lc, *lc2; - Relation newrelation; + RangeTblEntry *childrte; + RelOptInfo *childrel; + Index childRTindex; + AppendRelInfo *appinfo; + Node *node; + ListCell *lc, + *lc2; + Relation newrelation; + PlanRowMark *parent_rowmark; + PlanRowMark *child_rowmark; + AttrNumber i; newrelation = heap_open(childOid, NoLock); @@ -418,8 +424,18 @@ append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, childrel->reltargetlist = lappend(childrel->reltargetlist, new_target); } - /* Copy attr_needed (used in build_joinrel_tlist() function) */ - childrel->attr_needed = rel->attr_needed; + /* Copy attr_needed & attr_widths */ + childrel->attr_needed = (Relids *) + palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids)); + childrel->attr_widths = (int32 *) + palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32)); + + for (i = 0; i < rel->max_attr - rel->min_attr + 1; i++) + childrel->attr_needed[i] = bms_copy(rel->attr_needed[i]); + + memcpy(childrel->attr_widths, rel->attr_widths, + (rel->max_attr - rel->min_attr + 1) * sizeof(int32)); + /* Copy restrictions */ childrel->baserestrictinfo = NIL; @@ -502,6 +518,32 @@ append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, heap_close(newrelation, NoLock); + + /* Create rowmarks required for child rels */ + parent_rowmark = get_plan_rowmark(root->rowMarks, rti); + if (parent_rowmark) + { + child_rowmark = makeNode(PlanRowMark); + + child_rowmark->rti = childRTindex; + child_rowmark->prti = rti; + child_rowmark->rowmarkId = parent_rowmark->rowmarkId; + /* Reselect rowmark type, because relkind might not match parent */ + child_rowmark->markType = select_rowmark_type(childrte, + parent_rowmark->strength); + child_rowmark->allMarkTypes = (1 << child_rowmark->markType); + child_rowmark->strength = parent_rowmark->strength; + child_rowmark->waitPolicy = parent_rowmark->waitPolicy; + child_rowmark->isParent = false; + + /* Include child's rowmark type in parent's allMarkTypes */ + parent_rowmark->allMarkTypes |= child_rowmark->allMarkTypes; + + root->rowMarks = lappend(root->rowMarks, child_rowmark); + + parent_rowmark->isParent = true; + } + return childRTindex; } diff --git a/src/utils.c b/src/utils.c index fd41b7a640..d0d9e8f8c3 100644 --- a/src/utils.c +++ b/src/utils.c @@ -9,18 +9,28 @@ */ #include "utils.h" #include "access/nbtree.h" +#include "access/sysattr.h" #include "executor/spi.h" +#include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "nodes/makefuncs.h" #include "optimizer/var.h" #include "optimizer/restrictinfo.h" +#include "utils/builtins.h" #include "utils/snapmgr.h" #include "utils/lsyscache.h" #include "catalog/heap.h" +#include "catalog/pg_type.h" + static bool clause_contains_params_walker(Node *node, void *context); +static List *get_tableoids_list(List *tlist); +static void lock_rows_visitor(Plan *plan, void *context); + +#define TABLEOID_STR(subst) ( "pathman_tableoid" subst ) +#define TABLEOID_STR_BASE_LEN ( sizeof(TABLEOID_STR("")) - 1 ) /* * Check whether clause contains PARAMs or not @@ -45,6 +55,93 @@ clause_contains_params_walker(Node *node, void *context) context); } +/* + * Extract target entries with resnames beginning with TABLEOID_STR + * and var->varoattno == TableOidAttributeNumber + */ +static List * +get_tableoids_list(List *tlist) +{ + List *result = NIL; + ListCell *lc; + + foreach (lc, tlist) + { + TargetEntry *te = (TargetEntry *) lfirst(lc); + Var *var = (Var *) te->expr; + + if (!IsA(var, Var)) + continue; + + if (strlen(te->resname) > TABLEOID_STR_BASE_LEN && + 0 == strncmp(te->resname, TABLEOID_STR(""), TABLEOID_STR_BASE_LEN) && + var->varoattno == TableOidAttributeNumber) + { + result = lappend(result, te); + } + } + + return result; +} + +/* + * Find 'TABLEOID_STR%u' attributes that were manually + * created for partitioned tables and replace Oids + * (used for '%u') with expected rc->rowmarkIds + */ +static void +lock_rows_visitor(Plan *plan, void *context) +{ + List *rtable = (List *) context; + LockRows *lock_rows = (LockRows *) plan; + Plan *lock_child = outerPlan(plan); + List *tableoids; + ListCell *lc; + + if (!IsA(lock_rows, LockRows)) + return; + + Assert(rtable && IsA(rtable, List) && lock_child); + + /* Select tableoid attributes that must be renamed */ + tableoids = get_tableoids_list(lock_child->targetlist); + Assert(tableoids); + + foreach (lc, lock_rows->rowMarks) + { + PlanRowMark *rc = (PlanRowMark *) lfirst(lc); + Oid parent_oid = getrelid(rc->rti, rtable); + ListCell *mark_lc; + List *finished_tes = NIL; /* postprocessed target entries */ + + foreach (mark_lc, tableoids) + { + TargetEntry *te = (TargetEntry *) lfirst(mark_lc); + Oid cur_oid; + + cur_oid = str_to_oid(&(te->resname[TABLEOID_STR_BASE_LEN])); + + if (cur_oid == parent_oid) + { + char resname[64]; + + /* Replace 'TABLEOID_STR:Oid' with 'tableoid:rowmarkId' */ + snprintf(resname, sizeof(resname), "tableoid%u", rc->rowmarkId); + te->resname = pstrdup(resname); + + finished_tes = lappend(finished_tes, te); + } + } + + /* Remove target entries that have been processed in this step */ + foreach (mark_lc, finished_tes) + tableoids = list_delete_ptr(tableoids, lfirst(mark_lc)); + + if (list_length(tableoids) == 0) + break; /* nothing to do */ + } +} + /* NOTE: Used for debug */ #ifdef __GNUC__ __attribute__((unused)) @@ -259,3 +356,148 @@ list_reverse(List *l) } return result; } + +Oid +str_to_oid(const char *cstr) +{ + Datum result = DirectFunctionCall1(oidin, CStringGetDatum(cstr)); + + return DatumGetObjectId(result); +} + +/* + * Basic plan tree walker + * + * 'visitor' is applied right before return + */ +void +plan_tree_walker(Plan *plan, + void (*visitor) (Plan *plan, void *context), + void *context) +{ + ListCell *l; + + if (plan == NULL) + return; + + check_stack_depth(); + + /* Plan-type-specific fixes */ + switch (nodeTag(plan)) + { + case T_SubqueryScan: + plan_tree_walker(((SubqueryScan *) plan)->subplan, visitor, context); + break; + + case T_CustomScan: + foreach(l, ((CustomScan *) plan)->custom_plans) + plan_tree_walker((Plan *) lfirst(l), visitor, context); + break; + + /* + * Add proxy PartitionFilter nodes + * to subplans of ModifyTable node + */ + case T_ModifyTable: + foreach (l, ((ModifyTable *) plan)->plans) + plan_tree_walker((Plan *) lfirst(l), visitor, context); + break; + + /* Since they look alike */ + case T_MergeAppend: + case T_Append: + foreach(l, ((Append *) plan)->appendplans) + plan_tree_walker((Plan *) lfirst(l), visitor, context); + break; + + case T_BitmapAnd: + foreach(l, ((BitmapAnd *) plan)->bitmapplans) + plan_tree_walker((Plan *) lfirst(l), visitor, context); + break; + + case T_BitmapOr: + foreach(l, ((BitmapOr *) plan)->bitmapplans) + plan_tree_walker((Plan *) lfirst(l), visitor, context); + break; + + default: + break; + } + + plan_tree_walker(plan->lefttree, visitor, context); + plan_tree_walker(plan->righttree, visitor, context); + + visitor(plan, context); +} + +/* + * Add missing 'TABLEOID_STR%u' junk attributes for inherited partitions + * + * This is necessary since preprocess_targetlist() heavily + * depends on the 'inh' flag which we have to unset. + * + * postprocess_lock_rows() will later transform 'TABLEOID_STR:Oid' + * relnames into 'tableoid:rowmarkId'. + */ +void +rowmark_add_tableoids(Query *parse) +{ + ListCell *lc; + + check_stack_depth(); + + foreach(lc, parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + switch(rte->rtekind) + { + case RTE_SUBQUERY: + rowmark_add_tableoids(rte->subquery); + break; + + default: + break; + } + } + + /* Generate 'tableoid' for partitioned table rowmark */ + foreach (lc, parse->rowMarks) + { + RowMarkClause *rc = (RowMarkClause *) lfirst(lc); + Oid parent = getrelid(rc->rti, parse->rtable); + Var *var; + TargetEntry *tle; + char resname[64]; + + if (!get_pathman_relation_info(parent, NULL)) + continue; + + var = makeVar(rc->rti, + TableOidAttributeNumber, + OIDOID, + -1, + InvalidOid, + 0); + + /* Use parent's Oid as TABLEOID_STR's key (%u) */ + snprintf(resname, sizeof(resname), TABLEOID_STR("%u"), parent); + + tle = makeTargetEntry((Expr *) var, + list_length(parse->targetList) + 1, + pstrdup(resname), + true); + + /* There's no problem here since new attribute is junk */ + parse->targetList = lappend(parse->targetList, tle); + } +} + +/* + * Final rowmark processing for partitioned tables + */ +void +postprocess_lock_rows(List *rtable, Plan *plan) +{ + plan_tree_walker(plan, lock_rows_visitor, rtable); +} diff --git a/src/utils.h b/src/utils.h index cf150c20bb..7d0f2dab8e 100644 --- a/src/utils.h +++ b/src/utils.h @@ -37,4 +37,14 @@ void fill_type_cmp_fmgr_info(FmgrInfo *finfo, List * list_reverse(List *l); +Oid str_to_oid(const char * cstr); + +void plan_tree_walker(Plan *plan, + void (*visitor)(Plan *, void *), + void *context); + +void rowmark_add_tableoids(Query *parse); + +void postprocess_lock_rows(List *rtable, Plan *plan); + #endif From 1f9d1763cfb32d3cff7b1c4bf63bc9000178cc48 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 17 Jun 2016 03:03:57 +0300 Subject: [PATCH 020/184] use var->varoattno instead of varattno --- src/nodes_common.c | 2 +- src/pg_pathman.c | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/nodes_common.c b/src/nodes_common.c index a0c790a42e..7b09c243df 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -149,7 +149,7 @@ append_part_attr_to_tlist(List *tlist, Index relno, PartRelationInfo *prel) TargetEntry *te = (TargetEntry *) lfirst(lc); Var *var = (Var *) te->expr; - if (IsA(var, Var) && var->varattno == prel->attnum) + if (IsA(var, Var) && var->varoattno == prel->attnum) part_attr_found = true; } diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 91a285c8ed..95e6a4d3a2 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -1117,12 +1117,15 @@ handle_opexpr(const OpExpr *expr, WalkerContext *context) /* * Checks if expression is a KEY OP PARAM or PARAM OP KEY, * where KEY is partition key (it could be Var or RelableType) and PARAM is - * whatever. Function returns variable (or RelableType) and param via var_ptr + * whatever. Function returns variable (or RelableType) and param via var_ptr * and param_ptr pointers. If partition key isn't in expression then function * returns false. */ static bool -pull_var_param(const WalkerContext *ctx, const OpExpr *expr, Node **var_ptr, Node **param_ptr) +pull_var_param(const WalkerContext *ctx, + const OpExpr *expr, + Node **var_ptr, + Node **param_ptr) { Node *left = linitial(expr->args), *right = lsecond(expr->args); @@ -1135,14 +1138,14 @@ pull_var_param(const WalkerContext *ctx, const OpExpr *expr, Node **var_ptr, Nod (Var *) left : (Var *) ((RelabelType *) left)->arg; - if (v->varattno == ctx->prel->attnum) + if (v->varoattno == ctx->prel->attnum) { *var_ptr = left; *param_ptr = right; return true; } } - + /* ... variable is on the right side */ if (IsA(right, Var) || IsA(right, RelabelType)) { @@ -1150,7 +1153,7 @@ pull_var_param(const WalkerContext *ctx, const OpExpr *expr, Node **var_ptr, Nod (Var *) right : (Var *) ((RelabelType *) right)->arg; - if (v->varattno == ctx->prel->attnum) + if (v->varoattno == ctx->prel->attnum) { *var_ptr = right; *param_ptr = left; @@ -1245,7 +1248,7 @@ handle_arrexpr(const ScalarArrayOpExpr *expr, WalkerContext *context) var = !IsA(varnode, RelabelType) ? (Var *) varnode : (Var *) ((RelabelType *) varnode)->arg; - if (var->varattno != prel->attnum) + if (var->varoattno != prel->attnum) goto handle_arrexpr_return; } else From 2b16fcd4558ea81518b15fca1241a75965cee196 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sat, 18 Jun 2016 19:44:16 +0300 Subject: [PATCH 021/184] code cleanup --- src/utils.c | 83 ++++++++++++++++++++++++++--------------------------- src/utils.h | 15 +++------- 2 files changed, 44 insertions(+), 54 deletions(-) diff --git a/src/utils.c b/src/utils.c index 3716f8da0f..608088e271 100644 --- a/src/utils.c +++ b/src/utils.c @@ -25,7 +25,8 @@ static bool clause_contains_params_walker(Node *node, void *context); -static void change_varnos_in_restrinct_info(RestrictInfo *rinfo, change_varno_context *context); +static void change_varnos_in_restrinct_info(RestrictInfo *rinfo, + change_varno_context *context); static bool change_varno_walker(Node *node, change_varno_context *context); static List *get_tableoids_list(List *tlist); static void lock_rows_visitor(Plan *plan, void *context); @@ -440,6 +441,43 @@ change_varno_walker(Node *node, change_varno_context *context) return expression_tree_walker(node, change_varno_walker, (void *) context); } +static void +change_varnos_in_restrinct_info(RestrictInfo *rinfo, change_varno_context *context) +{ + ListCell *lc; + + change_varno_walker((Node *) rinfo->clause, context); + if (rinfo->left_em) + change_varno_walker((Node *) rinfo->left_em->em_expr, context); + + if (rinfo->right_em) + change_varno_walker((Node *) rinfo->right_em->em_expr, context); + + if (rinfo->orclause) + foreach(lc, ((BoolExpr *) rinfo->orclause)->args) + { + Node *node = (Node *) lfirst(lc); + change_varno_walker(node, context); + } + + /* TODO: find some elegant way to do this */ + if (bms_is_member(context->old_varno, rinfo->clause_relids)) + { + rinfo->clause_relids = bms_del_member(rinfo->clause_relids, context->old_varno); + rinfo->clause_relids = bms_add_member(rinfo->clause_relids, context->new_varno); + } + if (bms_is_member(context->old_varno, rinfo->left_relids)) + { + rinfo->left_relids = bms_del_member(rinfo->left_relids, context->old_varno); + rinfo->left_relids = bms_add_member(rinfo->left_relids, context->new_varno); + } + if (bms_is_member(context->old_varno, rinfo->right_relids)) + { + rinfo->right_relids = bms_del_member(rinfo->right_relids, context->old_varno); + rinfo->right_relids = bms_add_member(rinfo->right_relids, context->new_varno); + } +} + Oid str_to_oid(const char *cstr) { @@ -477,10 +515,6 @@ plan_tree_walker(Plan *plan, plan_tree_walker((Plan *) lfirst(l), visitor, context); break; - /* - * Add proxy PartitionFilter nodes - * to subplans of ModifyTable node - */ case T_ModifyTable: foreach (l, ((ModifyTable *) plan)->plans) plan_tree_walker((Plan *) lfirst(l), visitor, context); @@ -510,47 +544,10 @@ plan_tree_walker(Plan *plan, plan_tree_walker(plan->lefttree, visitor, context); plan_tree_walker(plan->righttree, visitor, context); + /* Apply visitor to the current node */ visitor(plan, context); } - -static void -change_varnos_in_restrinct_info(RestrictInfo *rinfo, change_varno_context *context) -{ - ListCell *lc; - - change_varno_walker((Node *) rinfo->clause, context); - if (rinfo->left_em) - change_varno_walker((Node *) rinfo->left_em->em_expr, context); - - if (rinfo->right_em) - change_varno_walker((Node *) rinfo->right_em->em_expr, context); - - if (rinfo->orclause) - foreach(lc, ((BoolExpr *) rinfo->orclause)->args) - { - Node *node = (Node *) lfirst(lc); - change_varno_walker(node, context); - } - - /* TODO: find some elegant way to do this */ - if (bms_is_member(context->old_varno, rinfo->clause_relids)) - { - rinfo->clause_relids = bms_del_member(rinfo->clause_relids, context->old_varno); - rinfo->clause_relids = bms_add_member(rinfo->clause_relids, context->new_varno); - } - if (bms_is_member(context->old_varno, rinfo->left_relids)) - { - rinfo->left_relids = bms_del_member(rinfo->left_relids, context->old_varno); - rinfo->left_relids = bms_add_member(rinfo->left_relids, context->new_varno); - } - if (bms_is_member(context->old_varno, rinfo->right_relids)) - { - rinfo->right_relids = bms_del_member(rinfo->right_relids, context->old_varno); - rinfo->right_relids = bms_add_member(rinfo->right_relids, context->new_varno); - } -} - /* * Add missing 'TABLEOID_STR%u' junk attributes for inherited partitions * diff --git a/src/utils.h b/src/utils.h index 7acdfb6c9a..c67671f7dd 100644 --- a/src/utils.h +++ b/src/utils.h @@ -19,15 +19,8 @@ typedef struct { - RelOptInfo *child; - RelOptInfo *parent; - int sublevels_up; -} ReplaceVarsContext; - -typedef struct -{ - Oid old_varno; - Oid new_varno; + Oid old_varno; + Oid new_varno; } change_varno_context; @@ -53,10 +46,10 @@ List * list_reverse(List *l); void change_varnos(Node *node, Oid old_varno, Oid new_varno); -Oid str_to_oid(const char * cstr); +Oid str_to_oid(const char *cstr); void plan_tree_walker(Plan *plan, - void (*visitor)(Plan *, void *), + void (*visitor) (Plan *plan, void *context), void *context); void rowmark_add_tableoids(Query *parse); From 720629cef07e2037fa5e67964ea0111642b027ba Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 22 Jun 2016 14:33:56 +0300 Subject: [PATCH 022/184] new isolation test for 'FOR UPDATE' feature, rename 'insert_trigger.spec' --- .gitignore | 1 + Makefile | 2 +- expected/for_update.out | 38 +++++++++++++++++++ .../{insert_trigger.out => insert_nodes.out} | 0 specs/for_update.spec | 32 ++++++++++++++++ ...{insert_trigger.spec => insert_nodes.spec} | 2 +- 6 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 expected/for_update.out rename expected/{insert_trigger.out => insert_nodes.out} (100%) create mode 100644 specs/for_update.spec rename specs/{insert_trigger.spec => insert_nodes.spec} (97%) diff --git a/.gitignore b/.gitignore index 7671c4d178..b2853d5b29 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .deps +isolation_output results/pg_pathman.out regression.diffs regression.out diff --git a/Makefile b/Makefile index d403f63321..4c9c6052c4 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ endif $(EXTENSION)--$(EXTVERSION).sql: init.sql hash.sql range.sql cat $^ > $@ -ISOLATIONCHECKS=insert_trigger +ISOLATIONCHECKS=insert_nodes for_update submake-isolation: $(MAKE) -C $(top_builddir)/src/test/isolation all diff --git a/expected/for_update.out b/expected/for_update.out new file mode 100644 index 0000000000..3e41031ee3 --- /dev/null +++ b/expected/for_update.out @@ -0,0 +1,38 @@ +Parsed test spec with 2 sessions + +starting permutation: s1_b s1_update s2_select s1_r +create_range_partitions + +10 +step s1_b: begin; +step s1_update: update test_tbl set id = 2 where id = 1; +step s2_select: select * from test_tbl where id = 1; +id val + +1 1 +step s1_r: rollback; + +starting permutation: s1_b s1_update s2_select_locked s1_r +create_range_partitions + +10 +step s1_b: begin; +step s1_update: update test_tbl set id = 2 where id = 1; +step s2_select_locked: select * from test_tbl where id = 1 for share; +step s1_r: rollback; +step s2_select_locked: <... completed> +id val + +1 1 + +starting permutation: s1_b s1_update s2_select_locked s1_c +create_range_partitions + +10 +step s1_b: begin; +step s1_update: update test_tbl set id = 2 where id = 1; +step s2_select_locked: select * from test_tbl where id = 1 for share; +step s1_c: commit; +step s2_select_locked: <... completed> +id val + diff --git a/expected/insert_trigger.out b/expected/insert_nodes.out similarity index 100% rename from expected/insert_trigger.out rename to expected/insert_nodes.out diff --git a/specs/for_update.spec b/specs/for_update.spec new file mode 100644 index 0000000000..55ea24af3a --- /dev/null +++ b/specs/for_update.spec @@ -0,0 +1,32 @@ +setup +{ + create extension pg_pathman; + create table test_tbl(id int not null, val real); + insert into test_tbl select i, i from generate_series(1, 1000) as i; + select create_range_partitions('test_tbl', 'id', 1, 100, 10); +} + +teardown +{ + drop table test_tbl cascade; + drop extension pg_pathman; +} + +session "s1" +step "s1_b" { begin; } +step "s1_c" { commit; } +step "s1_r" { rollback; } +step "s1_update" { update test_tbl set id = 2 where id = 1; } + +session "s2" +step "s2_b" { begin; } +step "s2_c" { commit; } +step "s2_select_locked" { select * from test_tbl where id = 1 for share; } +step "s2_select" { select * from test_tbl where id = 1; } + + +permutation "s1_b" "s1_update" "s2_select" "s1_r" + +permutation "s1_b" "s1_update" "s2_select_locked" "s1_r" + +permutation "s1_b" "s1_update" "s2_select_locked" "s1_c" diff --git a/specs/insert_trigger.spec b/specs/insert_nodes.spec similarity index 97% rename from specs/insert_trigger.spec rename to specs/insert_nodes.spec index 126c900afd..93df4102f6 100644 --- a/specs/insert_trigger.spec +++ b/specs/insert_nodes.spec @@ -7,7 +7,7 @@ setup teardown { - SELECT drop_range_partitions('range_rel'); + SELECT drop_partitions('range_rel'); DROP TABLE range_rel CASCADE; DROP EXTENSION pg_pathman; } From 9757fb88a54c77e2b4d84aa789b80a8effb1f6ed Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 23 Jun 2016 15:52:24 +0300 Subject: [PATCH 023/184] fix lock_rows_visitor() for non-partitioned tables --- src/utils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index 608088e271..e6f5aa2438 100644 --- a/src/utils.c +++ b/src/utils.c @@ -108,7 +108,8 @@ lock_rows_visitor(Plan *plan, void *context) /* Select tableoid attributes that must be renamed */ tableoids = get_tableoids_list(lock_child->targetlist); - Assert(tableoids); + if (!tableoids) + return; /* this LockRows has nothing to do with partitioned table */ foreach (lc, lock_rows->rowMarks) { From a301663ea86c26f6b72ff770f5c587f7a30c1983 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 24 Jun 2016 07:48:16 +0300 Subject: [PATCH 024/184] refactoring, improve memory management inside PartitionFilter --- range.sql | 2 -- src/hooks.c | 14 ++++++++------ src/nodes_common.c | 3 ++- src/partition_filter.c | 15 ++++++++++++++- src/pathman.h | 14 ++++++++++++-- src/pg_pathman.c | 19 +++++++++++++++---- 6 files changed, 51 insertions(+), 16 deletions(-) diff --git a/range.sql b/range.sql index b4ded8e491..0a405101a3 100644 --- a/range.sql +++ b/range.sql @@ -184,8 +184,6 @@ BEGIN p_start_value := p_start_value + p_interval; END LOOP; - /* Create triggers */ - -- PERFORM create_hash_update_trigger(relation, attribute, partitions_count); /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(p_relation::regclass::oid); diff --git a/src/hooks.c b/src/hooks.c index 77f3a79036..89e0cff378 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -48,6 +48,7 @@ pathman_join_pathlist_hook(PlannerInfo *root, ListCell *lc; double paramsel; WalkerContext context; + bool context_initialized; bool innerrel_rinfo_contains_part_attr; if (set_join_pathlist_next) @@ -83,15 +84,16 @@ pathman_join_pathlist_hook(PlannerInfo *root, otherclauses = NIL; } + context_initialized = false; paramsel = 1.0; foreach (lc, joinclauses) { - WrapperNode *wrap; + WrapperNode *wrap; - context.prel = inner_prel; - context.econtext = NULL; - context.hasLeast = false; - context.hasGreatest = false; + /* We aim to persist cached context->ranges */ + InitWalkerContextCustomNode(&context, inner_prel, + NULL, CurrentMemoryContext, + &context_initialized); wrap = walk_expr_tree((Expr *) lfirst(lc), &context); paramsel *= wrap->paramsel; @@ -223,7 +225,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); /* Make wrappers over restrictions and collect final rangeset */ - InitWalkerContext(&context, prel, NULL); + InitWalkerContext(&context, prel, NULL, CurrentMemoryContext); wrappers = NIL; foreach(lc, rel->baserestrictinfo) { diff --git a/src/nodes_common.c b/src/nodes_common.c index 7b09c243df..e16b5d5750 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -504,7 +504,8 @@ rescan_append_common(CustomScanState *node) ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); InitWalkerContextCustomNode(&scan_state->wcxt, scan_state->prel, - econtext, &scan_state->wcxt_cached); + econtext, CurrentMemoryContext, + &scan_state->wcxt_cached); foreach (lc, scan_state->custom_exprs) { diff --git a/src/partition_filter.c b/src/partition_filter.c index 9e03c0db0b..88f1777490 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -1,6 +1,7 @@ #include "partition_filter.h" #include "utils.h" #include "utils/guc.h" +#include "utils/memutils.h" #include "nodes/nodeFuncs.h" @@ -139,6 +140,8 @@ partition_filter_exec(CustomScanState *node) if (!TupIsNull(slot)) { + MemoryContext old_cxt; + List *ranges; int nparts; Oid *parts; @@ -160,11 +163,16 @@ partition_filter_exec(CustomScanState *node) CopyToTempConst(constbyval, attbyval); InitWalkerContextCustomNode(&state->wcxt, state->prel, - econtext, &state->wcxt_cached); + econtext, CurrentMemoryContext, + &state->wcxt_cached); + + /* Switch to per-tuple context */ + old_cxt = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); ranges = walk_expr_tree((Expr *) &state->temp_const, &state->wcxt)->rangeset; parts = get_partition_oids(ranges, &nparts, state->prel); + if (nparts > 1) elog(ERROR, "PartitionFilter selected more than one partition"); else if (nparts == 0) @@ -173,11 +181,16 @@ partition_filter_exec(CustomScanState *node) state->temp_const.constvalue, state->temp_const.consttype); + /* Now we have to refresh state->wcxt->ranges manually */ refresh_walker_context_ranges(&state->wcxt); } else selected_partid = parts[0]; + /* Switch back and clean up per-tuple context */ + MemoryContextSwitchTo(old_cxt); + ResetExprContext(econtext); + /* Replace main table with suitable partition */ estate->es_result_relation_info = getResultRelInfo(selected_partid, state); diff --git a/src/pathman.h b/src/pathman.h index 37b3af6209..231a48d8b4 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -276,6 +276,9 @@ typedef struct /* Main partitioning structure */ const PartRelationInfo *prel; + /* Long-living context for cached values */ + MemoryContext persistent_mcxt; + /* Cached values */ const RangeEntry *ranges; /* cached RangeEntry array (copy) */ size_t nranges; /* number of RangeEntries */ @@ -291,13 +294,15 @@ typedef struct /* * Usual initialization procedure for WalkerContext */ -#define InitWalkerContext(context, prel_info, ecxt) \ +#define InitWalkerContext(context, prel_info, ecxt, mcxt) \ do { \ (context)->prel = (prel_info); \ (context)->econtext = (ecxt); \ (context)->ranges = NULL; \ + (context)->nranges = 0; \ (context)->hasLeast = false; \ (context)->hasGreatest = false; \ + (context)->persistent_mcxt = (mcxt); \ } while (0) /* @@ -305,19 +310,24 @@ typedef struct * in case of range partitioning, so 'wcxt' is stored * inside of Custom Node */ -#define InitWalkerContextCustomNode(context, prel_info, ecxt, isCached) \ +#define InitWalkerContextCustomNode(context, prel_info, ecxt, mcxt, isCached) \ do { \ if (!*isCached) \ { \ (context)->prel = prel_info; \ (context)->econtext = ecxt; \ (context)->ranges = NULL; \ + (context)->nranges = 0; \ + (context)->persistent_mcxt = (mcxt); \ *isCached = true; \ } \ (context)->hasLeast = false; \ (context)->hasGreatest = false; \ } while (0) +/* Check that WalkerContext contains ExprContext (plan execution stage) */ +#define WcxtHasExprContext(wcxt) ( (wcxt)->econtext ) + void select_range_partitions(const Datum value, const bool byVal, FmgrInfo *cmp_func, diff --git a/src/pg_pathman.c b/src/pg_pathman.c index ff12aa634f..8fe9b5f756 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -100,8 +100,6 @@ static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo #define check_gt(flinfo, arg1, arg2) \ ((int) FunctionCall2(cmp_func, arg1, arg2) > 0) -#define WcxtHasExprContext(wcxt) ( (wcxt)->econtext ) - /* We can transform Param into Const provided that 'econtext' is available */ #define IsConstValue(wcxt, node) \ ( IsA((node), Const) || (WcxtHasExprContext(wcxt) ? IsA((node), Param) : false) ) @@ -331,7 +329,7 @@ handle_modification_query(Query *parse) return; /* Parse syntax tree and extract partition ranges */ - InitWalkerContext(&context, prel, NULL); + InitWalkerContext(&context, prel, NULL, CurrentMemoryContext); wrap = walk_expr_tree(expr, &context); finish_least_greatest(wrap, &context); clear_walker_context(&context); @@ -663,12 +661,22 @@ wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysTrue) void refresh_walker_context_ranges(WalkerContext *context) { - RangeRelation *rangerel; + RangeRelation *rangerel; + MemoryContext old_mcxt; rangerel = get_pathman_range_relation(context->prel->key.relid, NULL); + /* Clear old cached data */ + clear_walker_context(context); + + /* Switch to long-living context which should store data */ + old_mcxt = MemoryContextSwitchTo(context->persistent_mcxt); + context->ranges = dsm_array_get_pointer(&rangerel->ranges, true); context->nranges = rangerel->ranges.elem_count; + + /* Switch back */ + MemoryContextSwitchTo(old_mcxt); } /* @@ -799,6 +807,7 @@ select_range_partitions(const Datum value, } else { + Assert(ranges); Assert(cmp_func); /* Corner cases */ @@ -978,6 +987,7 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, case PT_RANGE: if (get_pathman_range_relation(context->prel->key.relid, NULL)) { + /* Refresh 'ranges' cache if necessary */ if (!context->ranges) refresh_walker_context_ranges(context); @@ -1129,6 +1139,7 @@ handle_const(const Const *c, WalkerContext *context) tce = lookup_type_cache(c->consttype, TYPECACHE_CMP_PROC_FINFO); + /* Refresh 'ranges' cache if necessary */ if (!context->ranges) refresh_walker_context_ranges(context); From 039ade149b38d557d2ef5d3ac75fa992224c92bb Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 28 Jun 2016 14:38:25 +0300 Subject: [PATCH 025/184] add PartitionFilter info to README.md --- README.md | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 12222de9fe..d6430c3b69 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,6 @@ Based on the partitioning type and condition's operator, `pg_pathman` searches f More interesting features are yet to come. Stay tuned! ## Roadmap - * Replace INSERT triggers with a custom node (aka **PartitionFilter**) * Implement concurrent partitioning (much more responsive) * Implement HASH-patitioning for non-integer attributes * Optimize hash join (both tables are partitioned by join key) @@ -162,6 +161,22 @@ Permanently disable `pg_pathman` partitioning mechanism for the specified parent - `RuntimeAppend` (overrides `Append` plan node) - `RuntimeMergeAppend` (overrides `MergeAppend` plan node) +- `PartitionFilter` (drop-in replacement for INSERT triggers) + +`PartitionFilter` acts as a *proxy node* for INSERT's child scan, which means it can redirect output tuples to the corresponding partition: + +``` +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_table +SELECT generate_series(1, 10), random(); + QUERY PLAN +----------------------------------------- + Insert on partitioned_table + -> Custom Scan (PartitionFilter) + -> Subquery Scan on "*SELECT*" + -> Result +(4 rows) +``` `RuntimeAppend` and `RuntimeMergeAppend` have much in common: they come in handy in a case when WHERE condition takes form of: ``` @@ -417,6 +432,7 @@ There are several user-accessible [GUC](https://www.postgresql.org/docs/9.5/stat - `pg_pathman.enable` --- disable (or enable) `pg_pathman` completely - `pg_pathman.enable_runtimeappend` --- toggle `RuntimeAppend` custom node on\off - `pg_pathman.enable_runtimemergeappend` --- toggle `RuntimeMergeAppend` custom node on\off + - `pg_pathman.enable_partitionfilter` --- toggle `PartitionFilter` custom node on\off To **permanently** disable `pg_pathman` for some previously partitioned table, use the `disable_partitioning()` function: ``` @@ -428,6 +444,6 @@ All sections and data will remain unchanged and will be handled by the standard Do not hesitate to post your issues, questions and new ideas at the [issues](https://github.com/postgrespro/pg_pathman/issues) page. ## Authors -Ildar Musin Postgres Professional Ltd., Russia -Alexander Korotkov Postgres Professional Ltd., Russia -Dmitry Ivanov Postgres Professional Ltd., Russia +Ildar Musin Postgres Professional Ltd., Russia +Alexander Korotkov Postgres Professional Ltd., Russia +Dmitry Ivanov Postgres Professional Ltd., Russia From 143fb2542b3b52fd1bfc38a23187d76bba683898 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sun, 17 Jul 2016 01:45:42 +0300 Subject: [PATCH 026/184] fix isolation tests for branch 'picky_nodes' --- expected/rollback_on_create_partitions.out | 48 +++++++++++----------- specs/rollback_on_create_partitions.spec | 3 +- 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/expected/rollback_on_create_partitions.out b/expected/rollback_on_create_partitions.out index 3c3e2d933a..8d314634fc 100644 --- a/expected/rollback_on_create_partitions.out +++ b/expected/rollback_on_create_partitions.out @@ -74,8 +74,8 @@ create_range_partitions 10 step savepoint_b: SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; @@ -100,8 +100,8 @@ create_range_partitions 10 step savepoint_b: SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; @@ -124,8 +124,8 @@ create_range_partitions 10 step savepoint_b: SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 step savepoint_c: SAVEPOINT c; @@ -161,8 +161,8 @@ create_range_partitions 10 step savepoint_b: SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 step savepoint_c: SAVEPOINT c; @@ -207,8 +207,8 @@ create_range_partitions 10 step savepoint_b: SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; @@ -238,8 +238,8 @@ create_range_partitions 10 step savepoint_b: SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; @@ -269,8 +269,8 @@ create_range_partitions 10 step savepoint_b: SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; @@ -280,8 +280,8 @@ Seq Scan on range_rel step savepoint_c: SAVEPOINT c; WARNING: All changes in partitioned table 'range_rel' will be discarded step rollback_b: ROLLBACK TO SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; @@ -305,8 +305,8 @@ create_range_partitions 10 step savepoint_b: SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; @@ -316,8 +316,8 @@ Seq Scan on range_rel step savepoint_c: SAVEPOINT c; WARNING: All changes in partitioned table 'range_rel' will be discarded step rollback_b: ROLLBACK TO SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; @@ -339,8 +339,8 @@ create_range_partitions 10 step savepoint_b: SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 WARNING: All changes in partitioned table 'range_rel' will be discarded @@ -380,8 +380,8 @@ create_range_partitions 10 step savepoint_b: SAVEPOINT b; -step drop_partitions: SELECT drop_range_partitions('range_rel'); -drop_range_partitions +step drop_partitions: SELECT drop_partitions('range_rel'); +drop_partitions 10 WARNING: All changes in partitioned table 'range_rel' will be discarded diff --git a/specs/rollback_on_create_partitions.spec b/specs/rollback_on_create_partitions.spec index 83e634e632..41fc48d114 100644 --- a/specs/rollback_on_create_partitions.spec +++ b/specs/rollback_on_create_partitions.spec @@ -6,7 +6,6 @@ setup teardown { - SELECT drop_range_partitions('range_rel'); DROP TABLE range_rel CASCADE; DROP EXTENSION pg_pathman; } @@ -17,7 +16,7 @@ step "rollback" { ROLLBACK; } step "commit" { COMMIT; } step "insert_data" { INSERT INTO range_rel SELECT generate_series(1, 10000); } step "create_partitions" { SELECT create_range_partitions('range_rel', 'id', 1, 1000); } -step "drop_partitions" { SELECT drop_range_partitions('range_rel'); } +step "drop_partitions" { SELECT drop_partitions('range_rel'); } step "savepoint_a" { SAVEPOINT a; } step "rollback_a" { ROLLBACK TO SAVEPOINT a; } step "savepoint_b" { SAVEPOINT b; } From 5f6fed0e6cd73b7ce441337a062ca9fc208e9217 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 18 Jul 2016 17:47:27 +0300 Subject: [PATCH 027/184] make handle_const() check for 'for_insert' flag --- src/hooks.c | 6 +++--- src/nodes_common.c | 2 +- src/partition_filter.c | 4 ++-- src/pathman.h | 8 ++++++-- src/pg_pathman.c | 19 ++++++++++++++++--- 5 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 357b13d8c5..4ef66675e8 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -91,8 +91,8 @@ pathman_join_pathlist_hook(PlannerInfo *root, WrapperNode *wrap; /* We aim to persist cached context->ranges */ - InitWalkerContextCustomNode(&context, inner_prel, - NULL, CurrentMemoryContext, + InitWalkerContextCustomNode(&context, inner_prel, NULL, + CurrentMemoryContext, false, &context_initialized); wrap = walk_expr_tree((Expr *) lfirst(lc), &context); @@ -225,7 +225,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); /* Make wrappers over restrictions and collect final rangeset */ - InitWalkerContext(&context, prel, NULL, CurrentMemoryContext); + InitWalkerContext(&context, prel, NULL, CurrentMemoryContext, false); wrappers = NIL; foreach(lc, rel->baserestrictinfo) { diff --git a/src/nodes_common.c b/src/nodes_common.c index b47b1a5e2a..cacd21205d 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -503,7 +503,7 @@ rescan_append_common(CustomScanState *node) ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); InitWalkerContextCustomNode(&scan_state->wcxt, scan_state->prel, - econtext, CurrentMemoryContext, + econtext, CurrentMemoryContext, false, &scan_state->wcxt_cached); foreach (lc, scan_state->custom_exprs) diff --git a/src/partition_filter.c b/src/partition_filter.c index 88f1777490..5f13dc96ff 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -162,8 +162,8 @@ partition_filter_exec(CustomScanState *node) CopyToTempConst(constlen, attlen); CopyToTempConst(constbyval, attbyval); - InitWalkerContextCustomNode(&state->wcxt, state->prel, - econtext, CurrentMemoryContext, + InitWalkerContextCustomNode(&state->wcxt, state->prel, econtext, + CurrentMemoryContext, true, &state->wcxt_cached); /* Switch to per-tuple context */ diff --git a/src/pathman.h b/src/pathman.h index d5c6f4a1c2..6e3a203974 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -289,12 +289,14 @@ typedef struct hasGreatest; Datum least, greatest; + + bool for_insert; /* are we in PartitionFilter now? */ } WalkerContext; /* * Usual initialization procedure for WalkerContext */ -#define InitWalkerContext(context, prel_info, ecxt, mcxt) \ +#define InitWalkerContext(context, prel_info, ecxt, mcxt, for_ins) \ do { \ (context)->prel = (prel_info); \ (context)->econtext = (ecxt); \ @@ -303,6 +305,7 @@ typedef struct (context)->hasLeast = false; \ (context)->hasGreatest = false; \ (context)->persistent_mcxt = (mcxt); \ + (context)->for_insert = (for_ins); \ } while (0) /* @@ -310,7 +313,7 @@ typedef struct * in case of range partitioning, so 'wcxt' is stored * inside of Custom Node */ -#define InitWalkerContextCustomNode(context, prel_info, ecxt, mcxt, isCached) \ +#define InitWalkerContextCustomNode(context, prel_info, ecxt, mcxt, for_ins, isCached) \ do { \ if (!*isCached) \ { \ @@ -319,6 +322,7 @@ typedef struct (context)->ranges = NULL; \ (context)->nranges = 0; \ (context)->persistent_mcxt = (mcxt); \ + (context)->for_insert = (for_ins); \ *isCached = true; \ } \ (context)->hasLeast = false; \ diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 5f1aadc97f..3bdd9b8a55 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -331,7 +331,7 @@ handle_modification_query(Query *parse) return; /* Parse syntax tree and extract partition ranges */ - InitWalkerContext(&context, prel, NULL, CurrentMemoryContext); + InitWalkerContext(&context, prel, NULL, CurrentMemoryContext, false); wrap = walk_expr_tree(expr, &context); finish_least_greatest(wrap, &context); clear_walker_context(&context); @@ -1125,6 +1125,20 @@ handle_const(const Const *c, WalkerContext *context) const PartRelationInfo *prel = context->prel; WrapperNode *result = (WrapperNode *) palloc(sizeof(WrapperNode)); + /* + * Had to add this check for queries like: + * select * from test.hash_rel where txt = NULL; + */ + if (!context->for_insert) + { + result->rangeset = list_make1_irange(make_irange(0, + prel->children_count - 1, + true)); + result->paramsel = 1.0; + + return result; + } + switch (prel->parttype) { case PT_HASH: @@ -1156,8 +1170,7 @@ handle_const(const Const *c, WalkerContext *context) break; default: - result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, true)); - result->paramsel = 1.0; + elog(ERROR, "Unknown partitioning type %u", prel->parttype); break; } From 08b0125bab6219458e4c3eba00fcfe11f1f20489 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Mon, 18 Jul 2016 18:17:53 +0300 Subject: [PATCH 028/184] set result->orig in handle_const() --- src/pg_pathman.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 3bdd9b8a55..6e4dacc7dd 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -1125,6 +1125,8 @@ handle_const(const Const *c, WalkerContext *context) const PartRelationInfo *prel = context->prel; WrapperNode *result = (WrapperNode *) palloc(sizeof(WrapperNode)); + result->orig = (const Node *) c; + /* * Had to add this check for queries like: * select * from test.hash_rel where txt = NULL; From 57305e210e2eef17c583436f20b3a408fb356117 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 3 Aug 2016 18:16:05 +0300 Subject: [PATCH 029/184] [WIP] huge refactoring, use local process cache instead of shared memory --- Makefile | 11 +- hash.sql | 71 +-- init.sql | 52 +- range.sql | 366 +++++++------ sql/pg_pathman.sql | 16 +- src/dsm_array.c | 19 +- src/dsm_array.h | 45 ++ src/hooks.c | 212 +++++--- src/hooks.h | 3 + src/init.c | 1051 +++++++++++++++++------------------- src/init.h | 47 ++ src/nodes_common.c | 11 +- src/nodes_common.h | 6 +- src/partition_filter.c | 16 +- src/partition_filter.h | 18 +- src/pathman.h | 209 ++----- src/pg_pathman.c | 143 +++-- src/pl_funcs.c | 386 +++---------- src/rangeset.c | 5 +- src/rangeset.h | 75 +++ src/relation_info.c | 388 +++++++++++++ src/relation_info.h | 122 +++++ src/runtime_merge_append.c | 5 +- src/runtime_merge_append.h | 4 +- src/runtimeappend.c | 5 +- src/runtimeappend.h | 7 +- src/utils.c | 63 ++- src/utils.h | 6 +- src/worker.c | 261 ++++++--- 29 files changed, 2093 insertions(+), 1530 deletions(-) create mode 100644 src/dsm_array.h create mode 100644 src/init.h create mode 100644 src/rangeset.h create mode 100644 src/relation_info.c create mode 100644 src/relation_info.h diff --git a/Makefile b/Makefile index 542c8fe213..b9d0226275 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,9 @@ # contrib/pg_pathman/Makefile MODULE_big = pg_pathman -OBJS = src/init.o src/utils.o src/partition_filter.o src/runtimeappend.o src/runtime_merge_append.o src/pg_pathman.o src/dsm_array.o \ - src/rangeset.o src/pl_funcs.o src/worker.o src/hooks.o src/nodes_common.o $(WIN32RES) +OBJS = src/init.o src/relation_info.o src/utils.o src/partition_filter.o src/runtimeappend.o \ + src/runtime_merge_append.o src/pg_pathman.o src/dsm_array.o src/rangeset.o src/pl_funcs.o \ + src/worker.o src/hooks.o src/nodes_common.o $(WIN32RES) EXTENSION = pg_pathman EXTVERSION = 0.1 @@ -35,6 +36,6 @@ submake-isolation: isolationcheck: | submake-isolation $(MKDIR_P) isolation_output $(pg_isolation_regress_check) \ - --temp-config=$(top_srcdir)/$(subdir)/conf.add \ - --outputdir=./isolation_output \ - $(ISOLATIONCHECKS) + --temp-config=$(top_srcdir)/$(subdir)/conf.add \ + --outputdir=./isolation_output \ + $(ISOLATIONCHECKS) diff --git a/hash.sql b/hash.sql index 280f5ee11b..e15fcc505a 100644 --- a/hash.sql +++ b/hash.sql @@ -36,6 +36,10 @@ BEGIN v_hashfunc := @extschema@.get_type_hash_func(v_type::regtype::oid)::regproc; + /* Insert new entry to pathman config */ + INSERT INTO @extschema@.pathman_config (partrel, attname, parttype) + VALUES (relation, attribute, 1); + /* Create partitions and update pg_pathman configuration */ FOR partnum IN 0..partitions_count-1 LOOP @@ -43,27 +47,18 @@ BEGIN v_plain_schema, quote_ident(v_plain_relname || '_' || partnum)); - EXECUTE format('CREATE TABLE %s (LIKE %s INCLUDING ALL)' - , v_child_relname - , v_relname); - - EXECUTE format('ALTER TABLE %s INHERIT %s' + EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)' , v_child_relname , v_relname); - EXECUTE format('ALTER TABLE %s ADD CHECK (@extschema@.get_hash(%s(%s), %s) = %s)' + EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (@extschema@.get_hash(%s(%s), %s) = %s)' , v_child_relname + , @extschema@.build_check_constraint_name(v_child_relname::regclass, attribute) , v_hashfunc , attribute , partitions_count , partnum); END LOOP; - INSERT INTO @extschema@.pathman_config (relname, attname, parttype) - VALUES (v_relname, attribute, 1); - - /* Create triggers */ - /* Do not create update trigger by default */ - -- PERFORM @extschema@.create_hash_update_trigger(relation, attribute, partitions_count); /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(relation::oid); @@ -83,25 +78,36 @@ CREATE OR REPLACE FUNCTION @extschema@.create_hash_update_trigger( RETURNS VOID AS $$ DECLARE - func TEXT := ' - CREATE OR REPLACE FUNCTION %s() - RETURNS TRIGGER AS - $body$ - DECLARE old_hash INTEGER; new_hash INTEGER; q TEXT; - BEGIN - old_hash := @extschema@.get_hash(%9$s(OLD.%2$s), %3$s); - new_hash := @extschema@.get_hash(%9$s(NEW.%2$s), %3$s); - IF old_hash = new_hash THEN RETURN NEW; END IF; - q := format(''DELETE FROM %8$s WHERE %4$s'', old_hash); - EXECUTE q USING %5$s; - q := format(''INSERT INTO %8$s VALUES (%6$s)'', new_hash); - EXECUTE q USING %7$s; - RETURN NULL; - END $body$ LANGUAGE plpgsql'; - trigger TEXT := ' - CREATE TRIGGER %s - BEFORE UPDATE ON %s - FOR EACH ROW EXECUTE PROCEDURE %s()'; + func TEXT := 'CREATE OR REPLACE FUNCTION %s() + RETURNS TRIGGER AS + $body$ + DECLARE + old_hash INTEGER; + new_hash INTEGER; + q TEXT; + + BEGIN + old_hash := @extschema@.get_hash(%9$s(OLD.%2$s), %3$s); + new_hash := @extschema@.get_hash(%9$s(NEW.%2$s), %3$s); + + IF old_hash = new_hash THEN + RETURN NEW; + END IF; + + q := format(''DELETE FROM %8$s WHERE %4$s'', old_hash); + EXECUTE q USING %5$s; + + q := format(''INSERT INTO %8$s VALUES (%6$s)'', new_hash); + EXECUTE q USING %7$s; + + RETURN NULL; + END $body$ + LANGUAGE plpgsql'; + + trigger TEXT := 'CREATE TRIGGER %s + BEFORE UPDATE ON %s + FOR EACH ROW EXECUTE PROCEDURE %s()'; + att_names TEXT; old_fields TEXT; new_fields TEXT; @@ -117,6 +123,7 @@ DECLARE child_relname_format TEXT; atttype TEXT; hashfunc TEXT; + BEGIN SELECT * INTO plain_schema, plain_relname FROM @extschema@.get_plain_schema_and_relname(relation); @@ -136,7 +143,7 @@ BEGIN att_val_fmt, att_fmt; - attr := attname FROM @extschema@.pathman_config WHERE relname::regclass = relation; + attr := attname FROM @extschema@.pathman_config WHERE partrel = relation; IF attr IS NULL THEN RAISE EXCEPTION 'Table % is not partitioned', quote_ident(relation::TEXT); diff --git a/init.sql b/init.sql index 170d127e5f..22d0707253 100644 --- a/init.sql +++ b/init.sql @@ -10,20 +10,23 @@ /* * Pathman config - * relname - schema qualified relation name - * attname - partitioning key - * parttype - partitioning type: - * 1 - HASH - * 2 - RANGE - * range_interval - base interval for RANGE partitioning in string representation + * partrel - regclass (relation type, stored as Oid) + * attname - partitioning key + * parttype - partitioning type: + * 1 - HASH + * 2 - RANGE + * range_interval - base interval for RANGE partitioning as string */ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config ( id SERIAL PRIMARY KEY, - relname VARCHAR(127), - attname VARCHAR(127), - parttype INTEGER, - range_interval TEXT + partrel REGCLASS NOT NULL, + attname TEXT NOT NULL, + parttype INTEGER NOT NULL, + range_interval TEXT, + + CHECK (parttype >= 1 OR parttype <= 2) /* check for allowed part types */ ); + SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config', ''); CREATE OR REPLACE FUNCTION @extschema@.on_create_partitions(relid OID) @@ -112,13 +115,13 @@ LANGUAGE plpgsql; /* * Disable pathman partitioning for specified relation */ -CREATE OR REPLACE FUNCTION @extschema@.disable_partitioning(IN relation TEXT) +CREATE OR REPLACE FUNCTION @extschema@.disable_partitioning(relation regclass) RETURNS VOID AS $$ BEGIN relation := @extschema@.validate_relname(relation); - DELETE FROM @extschema@.pathman_config WHERE relname = relation; + DELETE FROM @extschema@.pathman_config WHERE partrel = relation; PERFORM @extschema@.drop_triggers(relation); /* Notify backend about changes */ @@ -176,7 +179,7 @@ DECLARE v_rec RECORD; is_referenced BOOLEAN; BEGIN - IF EXISTS (SELECT * FROM @extschema@.pathman_config WHERE relname::regclass = p_relation) THEN + IF EXISTS (SELECT * FROM @extschema@.pathman_config WHERE partrel = p_relation) THEN RAISE EXCEPTION 'Relation "%" has already been partitioned', p_relation; END IF; @@ -298,7 +301,7 @@ $$ LANGUAGE plpgsql; /* - * DDL trigger that deletes entry from pathman_config + * DDL trigger that deletes entry from pathman_config table */ CREATE OR REPLACE FUNCTION @extschema@.pathman_ddl_trigger_func() RETURNS event_trigger AS @@ -307,11 +310,12 @@ DECLARE obj record; BEGIN FOR obj IN SELECT * FROM pg_event_trigger_dropped_objects() as events - JOIN @extschema@.pathman_config as cfg ON cfg.relname = events.object_identity + JOIN @extschema@.pathman_config as cfg + ON partrel::oid = events.objid LOOP IF obj.object_type = 'table' THEN - EXECUTE 'DELETE FROM @extschema@.pathman_config WHERE relname = $1' - USING obj.object_identity; + EXECUTE 'DELETE FROM @extschema@.pathman_config WHERE partrel = $1' + USING obj.objid; END IF; END LOOP; END @@ -375,7 +379,7 @@ BEGIN PERFORM @extschema@.drop_triggers(relation); WITH config_num_deleted AS (DELETE FROM @extschema@.pathman_config - WHERE relname::regclass = relation + WHERE partrel = relation RETURNING *) SELECT count(*) from config_num_deleted INTO conf_num_del; @@ -417,3 +421,15 @@ RETURNS OID AS 'pg_pathman', 'get_type_hash_func' LANGUAGE C STRICT; */ CREATE OR REPLACE FUNCTION @extschema@.get_hash(INTEGER, INTEGER) RETURNS INTEGER AS 'pg_pathman', 'get_hash' LANGUAGE C STRICT; + + +/* + * Build check constraint name for a specified relation's column + */ +CREATE OR REPLACE FUNCTION @extschema@.build_check_constraint_name(REGCLASS, INT2) +RETURNS TEXT AS 'pg_pathman', 'build_check_constraint_name_attnum' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION @extschema@.build_check_constraint_name(REGCLASS, TEXT) +RETURNS TEXT AS 'pg_pathman', 'build_check_constraint_name_attname' +LANGUAGE C STRICT; diff --git a/range.sql b/range.sql index 4d71893533..4b450363a0 100644 --- a/range.sql +++ b/range.sql @@ -19,7 +19,6 @@ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION @extschema@.create_or_replace_sequence(plain_schema TEXT, plain_relname TEXT, OUT seq_name TEXT) AS $$ -DECLARE BEGIN seq_name := @extschema@.get_sequence_name(plain_schema, plain_relname); EXECUTE format('DROP SEQUENCE IF EXISTS %s', seq_name); @@ -40,13 +39,14 @@ CREATE OR REPLACE FUNCTION @extschema@.create_range_partitions( RETURNS INTEGER AS $$ DECLARE - v_relname TEXT; - v_rows_count INTEGER; - v_max p_start_value%TYPE; - v_cur_value p_start_value%TYPE := p_start_value; - v_plain_relname TEXT; - v_plain_schema TEXT; - i INTEGER; + v_relname TEXT; + v_rows_count INTEGER; + v_max p_start_value%TYPE; + v_cur_value p_start_value%TYPE := p_start_value; + v_plain_relname TEXT; + v_plain_schema TEXT; + i INTEGER; + BEGIN v_relname := @extschema@.validate_relname(p_relation); p_attribute := lower(p_attribute); @@ -83,8 +83,8 @@ BEGIN PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); /* Insert new entry to pathman config */ - INSERT INTO @extschema@.pathman_config (relname, attname, parttype, range_interval) - VALUES (v_relname, p_attribute, 2, p_interval::text); + INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) + VALUES (p_relation, p_attribute, 2, p_interval::text); /* create first partition */ FOR i IN 1..p_count @@ -95,8 +95,6 @@ BEGIN p_start_value := p_start_value + p_interval; END LOOP; - /* Create triggers */ - -- PERFORM create_hash_update_trigger(relation, attribute, partitions_count); /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(p_relation::oid); @@ -122,15 +120,15 @@ CREATE OR REPLACE FUNCTION @extschema@.create_range_partitions( RETURNS INTEGER AS $$ DECLARE - v_relname TEXT; - v_rows_count INTEGER; - v_max p_start_value%TYPE; - v_cur_value p_start_value%TYPE := p_start_value; - i INTEGER; - v_plain_schema TEXT; - v_plain_relname TEXT; + v_rows_count INTEGER; + v_max p_start_value%TYPE; + v_cur_value p_start_value%TYPE := p_start_value; + v_plain_schema TEXT; + v_plain_relname TEXT; + i INTEGER; + BEGIN - v_relname := @extschema@.validate_relname(p_relation); + PERFORM @extschema@.validate_relname(p_relation); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(p_relation, p_attribute); @@ -171,8 +169,8 @@ BEGIN PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); /* Insert new entry to pathman config */ - INSERT INTO @extschema@.pathman_config (relname, attname, parttype, range_interval) - VALUES (v_relname, p_attribute, 2, p_interval::text); + INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) + VALUES (p_relation, p_attribute, 2, p_interval::text); /* create first partition */ FOR i IN 1..p_count @@ -208,12 +206,12 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( RETURNS INTEGER AS $$ DECLARE - v_relname TEXT; - v_plain_schema TEXT; - v_plain_relname TEXT; - i INTEGER := 0; + v_plain_schema TEXT; + v_plain_relname TEXT; + i INTEGER := 0; + BEGIN - v_relname := @extschema@.validate_relname(p_relation); + PERFORM @extschema@.validate_relname(p_relation); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(p_relation, p_attribute); @@ -232,8 +230,8 @@ BEGIN , p_end_value); /* Insert new entry to pathman config */ - INSERT INTO @extschema@.pathman_config (relname, attname, parttype, range_interval) - VALUES (v_relname, p_attribute, 2, p_interval::text); + INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) + VALUES (p_relation, p_attribute, 2, p_interval::text); WHILE p_start_value <= p_end_value LOOP @@ -271,12 +269,12 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( RETURNS INTEGER AS $$ DECLARE - v_relname TEXT; - v_plain_schema TEXT; - v_plain_relname TEXT; - i INTEGER := 0; + v_plain_schema TEXT; + v_plain_relname TEXT; + i INTEGER := 0; + BEGIN - v_relname := @extschema@.validate_relname(p_relation); + PERFORM @extschema@.validate_relname(p_relation); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(p_relation, p_attribute); @@ -284,15 +282,15 @@ BEGIN SELECT * INTO v_plain_schema, v_plain_relname FROM @extschema@.get_plain_schema_and_relname(p_relation); PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); - /* check boundaries */ + /* Check boundaries */ PERFORM @extschema@.check_boundaries(p_relation , p_attribute , p_start_value , p_end_value); /* Insert new entry to pathman config */ - INSERT INTO @extschema@.pathman_config (relname, attname, parttype, range_interval) - VALUES (v_relname, p_attribute, 2, p_interval::text); + INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) + VALUES (p_relation, p_attribute, 2, p_interval::text); WHILE p_start_value <= p_end_value LOOP @@ -308,7 +306,7 @@ BEGIN /* Copy data */ PERFORM @extschema@.partition_data(p_relation); - RETURN i; + RETURN i; /* number of created partitions */ EXCEPTION WHEN others THEN RAISE EXCEPTION '%', SQLERRM; @@ -326,9 +324,10 @@ CREATE OR REPLACE FUNCTION @extschema@.check_boundaries( RETURNS VOID AS $$ DECLARE - v_min p_start_value%TYPE; - v_max p_start_value%TYPE; - v_count INTEGER; + v_min p_start_value%TYPE; + v_max p_start_value%TYPE; + v_count INTEGER; + BEGIN /* Get min and max values */ EXECUTE format('SELECT count(*), min(%s), max(%s) FROM %s WHERE NOT %s IS NULL', @@ -364,8 +363,9 @@ CREATE OR REPLACE FUNCTION @extschema@.get_range_condition( RETURNS TEXT AS $$ DECLARE - v_type REGTYPE; - v_sql TEXT; + v_type REGTYPE; + v_sql TEXT; + BEGIN /* determine the type of values */ v_type := pg_typeof(p_start_value); @@ -397,19 +397,20 @@ CREATE OR REPLACE FUNCTION @extschema@.create_single_range_partition( RETURNS TEXT AS $$ DECLARE - v_part_num INT; - v_child_relname TEXT; - v_plain_child_relname TEXT; - v_attname TEXT; - v_sql TEXT; - v_cond TEXT; - v_plain_schema TEXT; - v_plain_relname TEXT; - v_child_relname_exists INTEGER := 1; - v_seq_name TEXT; + v_part_num INT; + v_child_relname TEXT; + v_plain_child_relname TEXT; + v_attname TEXT; + v_sql TEXT; + v_cond TEXT; + v_plain_schema TEXT; + v_plain_relname TEXT; + v_child_relname_exists BOOL; + v_seq_name TEXT; + BEGIN v_attname := attname FROM @extschema@.pathman_config - WHERE relname::regclass = p_parent; + WHERE partrel = p_parent; IF v_attname IS NULL THEN RAISE EXCEPTION 'Table % is not partitioned', quote_ident(p_parent::TEXT); @@ -425,27 +426,24 @@ BEGIN v_part_num := nextval(v_seq_name); v_plain_child_relname := format('%s_%s', v_plain_relname, v_part_num); v_child_relname := format('%s.%s', - v_plain_schema, + quote_ident(v_plain_schema), quote_ident(v_plain_child_relname)); - v_child_relname_exists := count(*) + v_child_relname_exists := count(*) > 0 FROM pg_class - WHERE relnamespace::regnamespace || '.' || relname = v_child_relname + WHERE v_child_relname = quote_ident(relnamespace::regnamespace::text) || + '.' || quote_ident(relname) LIMIT 1; - EXIT WHEN v_child_relname_exists = 0; + EXIT WHEN v_child_relname_exists = false; END LOOP; - EXECUTE format('CREATE TABLE %s (LIKE %s INCLUDING ALL)' - , v_child_relname - , p_parent); - - EXECUTE format('ALTER TABLE %s INHERIT %s' + EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)' , v_child_relname - , p_parent); + , @extschema@.get_schema_qualified_name(p_parent, '.')); v_cond := @extschema@.get_range_condition(v_attname, p_start_value, p_end_value); v_sql := format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)' , v_child_relname - , quote_ident(format('%s_%s_check', v_plain_schema, v_plain_child_relname)) + , @extschema@.build_check_constraint_name(v_child_relname::regclass, v_attname) , v_cond); EXECUTE v_sql; @@ -463,16 +461,17 @@ CREATE OR REPLACE FUNCTION @extschema@.split_range_partition( RETURNS ANYARRAY AS $$ DECLARE - v_parent_relid OID; - v_child_relid OID := p_partition::oid; - v_attname TEXT; - v_cond TEXT; - v_new_partition TEXT; - v_part_type INTEGER; - v_part_relname TEXT; - v_plain_schema TEXT; - v_plain_relname TEXT; - v_check_name TEXT; + v_parent_relid OID; + v_child_relid OID := p_partition::oid; + v_attname TEXT; + v_cond TEXT; + v_new_partition TEXT; + v_part_type INTEGER; + v_part_relname TEXT; + v_plain_schema TEXT; + v_plain_relname TEXT; + v_check_name TEXT; + BEGIN v_part_relname := @extschema@.validate_relname(p_partition); @@ -482,7 +481,7 @@ BEGIN SELECT attname, parttype FROM @extschema@.pathman_config - WHERE relname::regclass = v_parent_relid::regclass + WHERE partrel = v_parent_relid INTO v_attname, v_part_type; IF v_attname IS NULL THEN @@ -532,12 +531,12 @@ BEGIN /* Alter original partition */ RAISE NOTICE 'Altering original partition...'; v_cond := @extschema@.get_range_condition(v_attname, p_range[1], p_value); - v_check_name := quote_ident(format('%s_%s_check', v_plain_schema, v_plain_relname)); + v_check_name := @extschema@.build_check_constraint_name(p_partition, v_attname); EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s' , p_partition::text , v_check_name); EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)' - , p_partition + , p_partition::text , v_check_name , v_cond); @@ -559,19 +558,15 @@ CREATE OR REPLACE FUNCTION @extschema@.merge_range_partitions( RETURNS VOID AS $$ DECLARE - v_parent_relid1 OID; - v_parent_relid2 OID; - v_part1_relid OID := p_partition1::oid; - v_part2_relid OID := p_partition2::oid; - v_part1_relname TEXT; - v_part2_relname TEXT; - v_attname TEXT; - v_part_type INTEGER; - v_atttype TEXT; -BEGIN - v_part1_relname := @extschema@.validate_relname(p_partition1); - v_part2_relname := @extschema@.validate_relname(p_partition2); + v_parent_relid1 OID; + v_parent_relid2 OID; + v_part1_relid OID := p_partition1::oid; + v_part2_relid OID := p_partition2::oid; + v_attname TEXT; + v_part_type INTEGER; + v_atttype TEXT; +BEGIN IF v_part1_relid = v_part2_relid THEN RAISE EXCEPTION 'Cannot merge partition to itself'; END IF; @@ -585,7 +580,7 @@ BEGIN SELECT attname, parttype FROM @extschema@.pathman_config - WHERE relname::regclass = v_parent_relid1::regclass + WHERE partrel = v_parent_relid1 INTO v_attname, v_part_type; IF v_attname IS NULL THEN @@ -628,15 +623,13 @@ CREATE OR REPLACE FUNCTION @extschema@.merge_range_partitions_internal( RETURNS ANYARRAY AS $$ DECLARE - v_attname TEXT; - v_cond TEXT; - v_plain_schema TEXT; - v_plain_relname TEXT; - v_child_relname TEXT; - v_check_name TEXT; + v_attname TEXT; + v_cond TEXT; + v_check_name TEXT; + BEGIN SELECT attname FROM @extschema@.pathman_config - WHERE relname::regclass = p_parent_relid::regclass + WHERE partrel = p_parent_relid INTO v_attname; IF v_attname IS NULL THEN @@ -644,9 +637,6 @@ BEGIN quote_ident(p_parent_relid::regclass::text); END IF; - SELECT * INTO v_plain_schema, v_plain_relname - FROM @extschema@.get_plain_schema_and_relname(p_part1); - /* * Get ranges * first and second elements of array are MIN and MAX of partition1 @@ -667,7 +657,7 @@ BEGIN /* Alter first partition */ RAISE NOTICE 'Altering first partition...'; - v_check_name := quote_ident(v_plain_schema || '_' || v_plain_relname || '_check'); + v_check_name := @extschema@.build_check_constraint_name(p_part1, v_attname); EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s' , p_part1::text , v_check_name); @@ -698,17 +688,18 @@ CREATE OR REPLACE FUNCTION @extschema@.append_range_partition( RETURNS TEXT AS $$ DECLARE - v_attname TEXT; - v_atttype TEXT; - v_part_name TEXT; - v_interval TEXT; + v_attname TEXT; + v_atttype TEXT; + v_part_name TEXT; + v_interval TEXT; + BEGIN /* Prevent concurrent partition creation */ PERFORM @extschema@.acquire_partitions_lock(); SELECT attname, range_interval FROM @extschema@.pathman_config - WHERE relname::regclass = p_relation + WHERE partrel = p_relation INTO v_attname, v_interval; IF v_attname IS NULL THEN @@ -745,7 +736,8 @@ CREATE OR REPLACE FUNCTION @extschema@.append_partition_internal( RETURNS TEXT AS $$ DECLARE - v_part_name TEXT; + v_part_name TEXT; + BEGIN p_range := @extschema@.get_range_by_idx(p_relation::oid, -1, 0); RAISE NOTICE 'Appending new partition...'; @@ -772,17 +764,18 @@ CREATE OR REPLACE FUNCTION @extschema@.prepend_range_partition(p_relation REGCLA RETURNS TEXT AS $$ DECLARE - v_attname TEXT; - v_atttype TEXT; - v_part_name TEXT; - v_interval TEXT; + v_attname TEXT; + v_atttype TEXT; + v_part_name TEXT; + v_interval TEXT; + BEGIN /* Prevent concurrent partition creation */ PERFORM @extschema@.acquire_partitions_lock(); SELECT attname, range_interval FROM @extschema@.pathman_config - WHERE relname::regclass = p_relation + WHERE partrel = p_relation INTO v_attname, v_interval; IF v_attname IS NULL THEN @@ -819,7 +812,8 @@ CREATE OR REPLACE FUNCTION @extschema@.prepend_partition_internal( RETURNS TEXT AS $$ DECLARE - v_part_name TEXT; + v_part_name TEXT; + BEGIN p_range := @extschema@.get_range_by_idx(p_relation::oid, 0, 0); RAISE NOTICE 'Prepending new partition...'; @@ -850,7 +844,8 @@ CREATE OR REPLACE FUNCTION @extschema@.add_range_partition( RETURNS TEXT AS $$ DECLARE - v_part_name TEXT; + v_part_name TEXT; + BEGIN /* Prevent concurrent partition creation */ PERFORM @extschema@.acquire_partitions_lock(); @@ -889,9 +884,10 @@ CREATE OR REPLACE FUNCTION @extschema@.drop_range_partition( RETURNS TEXT AS $$ DECLARE - v_part_name TEXT := p_partition::TEXT; - v_parent TEXT; - v_count INTEGER; + v_part_name TEXT := p_partition::TEXT; + v_parent TEXT; + v_count INTEGER; + BEGIN /* Prevent concurrent partition management */ PERFORM @extschema@.acquire_partitions_lock(); @@ -931,11 +927,12 @@ CREATE OR REPLACE FUNCTION @extschema@.attach_range_partition( RETURNS TEXT AS $$ DECLARE - v_attname TEXT; - v_cond TEXT; - v_plain_partname TEXT; - v_plain_schema TEXT; - rel_persistence CHAR; + v_attname TEXT; + v_cond TEXT; + v_plain_partname TEXT; + v_plain_schema TEXT; + rel_persistence CHAR; + BEGIN /* Ignore temporary tables */ SELECT relpersistence FROM pg_catalog.pg_class WHERE oid = p_partition INTO rel_persistence; @@ -963,7 +960,7 @@ BEGIN /* Set check constraint */ v_attname := attname FROM @extschema@.pathman_config - WHERE relname::regclass = p_relation; + WHERE partrel = p_relation; IF v_attname IS NULL THEN RAISE EXCEPTION 'Table % is not partitioned', quote_ident(p_relation::TEXT); @@ -976,7 +973,7 @@ BEGIN EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)' , p_partition - , v_plain_schema || '_' || quote_ident(v_plain_partname || '_check') + , @extschema@.build_check_constraint_name(p_partition, v_attname) , v_cond); /* Invalidate cache */ @@ -1001,7 +998,9 @@ CREATE OR REPLACE FUNCTION @extschema@.detach_range_partition( RETURNS TEXT AS $$ DECLARE - v_parent TEXT; + v_attname text; + v_parent regclass; + BEGIN /* Prevent concurrent partition management */ PERFORM @extschema@.acquire_partitions_lock(); @@ -1010,15 +1009,23 @@ BEGIN SELECT inhparent::regclass INTO v_parent FROM pg_inherits WHERE inhrelid = p_partition::regclass::oid; + v_attname := attname + FROM @extschema@.pathman_config + WHERE partrel = v_parent; + + IF v_attname IS NULL THEN + RAISE EXCEPTION 'Table % is not partitioned', quote_ident(v_parent::TEXT); + END IF; + /* Remove inheritance */ EXECUTE format('ALTER TABLE %s NO INHERIT %s' , p_partition , v_parent); /* Remove check constraint */ - EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s_check' + EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s' , p_partition - , @extschema@.get_schema_qualified_name(p_partition::regclass)); + , @extschema@.build_check_constraint_name(p_partition, v_attname)); /* Invalidate cache */ PERFORM @extschema@.on_update_partitions(v_parent::regclass::oid); @@ -1042,38 +1049,48 @@ CREATE OR REPLACE FUNCTION @extschema@.create_range_update_trigger( RETURNS TEXT AS $$ DECLARE - func TEXT := ' - CREATE OR REPLACE FUNCTION %s_update_trigger_func() - RETURNS TRIGGER AS - $body$ - DECLARE - old_oid INTEGER; - new_oid INTEGER; - q TEXT; - BEGIN - old_oid := TG_RELID; - new_oid := @extschema@.find_or_create_range_partition(''%1$s''::regclass::oid, NEW.%2$s); - IF old_oid = new_oid THEN RETURN NEW; END IF; - q := format(''DELETE FROM %%s WHERE %4$s'', old_oid::regclass::text); - EXECUTE q USING %5$s; - q := format(''INSERT INTO %%s VALUES (%6$s)'', new_oid::regclass::text); - EXECUTE q USING %7$s; - RETURN NULL; - END $body$ LANGUAGE plpgsql'; - trigger TEXT := 'CREATE TRIGGER %s_update_trigger ' || - 'BEFORE UPDATE ON %s ' || - 'FOR EACH ROW EXECUTE PROCEDURE %s_update_trigger_func()'; - att_names TEXT; - old_fields TEXT; - new_fields TEXT; - att_val_fmt TEXT; - att_fmt TEXT; - relid INTEGER; - rec RECORD; - num INTEGER := 0; - attr TEXT; + func TEXT := 'CREATE OR REPLACE FUNCTION %s_update_trigger_func() + RETURNS TRIGGER AS + $body$ + DECLARE + old_oid INTEGER; + new_oid INTEGER; + q TEXT; + + BEGIN + old_oid := TG_RELID; + new_oid := @extschema@.find_or_create_range_partition( + ''%1$s''::regclass::oid, NEW.%2$s); + + IF old_oid = new_oid THEN + RETURN NEW; + END IF; + + q := format(''DELETE FROM %%s WHERE %4$s'', + old_oid::regclass::text); + EXECUTE q USING %5$s; + + q := format(''INSERT INTO %%s VALUES (%6$s)'', + new_oid::regclass::text); + EXECUTE q USING %7$s; + + RETURN NULL; + END $body$ + LANGUAGE plpgsql'; + + trigger TEXT := 'CREATE TRIGGER %s_update_trigger ' || + 'BEFORE UPDATE ON %s ' || + 'FOR EACH ROW EXECUTE PROCEDURE %s_update_trigger_func()'; + + att_names TEXT; + old_fields TEXT; + new_fields TEXT; + att_val_fmt TEXT; + att_fmt TEXT; + attr TEXT; + rec RECORD; + BEGIN - relid := relation::oid; SELECT string_agg(attname, ', '), string_agg('OLD.' || attname, ', '), string_agg('NEW.' || attname, ', '), @@ -1081,7 +1098,7 @@ BEGIN ' ELSE ' || attname || ' IS NULL END', ' AND '), string_agg('$' || attnum, ', ') FROM pg_attribute - WHERE attrelid=relid AND attnum>0 + WHERE attrelid::regclass = relation AND attnum > 0 INTO att_names, old_fields, new_fields, @@ -1090,7 +1107,7 @@ BEGIN attr := attname FROM @extschema@.pathman_config - WHERE relname::regclass = relation; + WHERE partrel = relation; IF attr IS NULL THEN RAISE EXCEPTION 'Table % is not partitioned', quote_ident(relation::TEXT); @@ -1098,13 +1115,12 @@ BEGIN EXECUTE format(func, relation, attr, 0, att_val_fmt, old_fields, att_fmt, new_fields); - FOR rec in (SELECT * FROM pg_inherits WHERE inhparent = relation::regclass::oid) + FOR rec in (SELECT * FROM pg_inherits WHERE inhparent = relation::oid) LOOP EXECUTE format(trigger - , @extschema@.get_schema_qualified_name(relation::regclass) + , @extschema@.get_schema_qualified_name(relation) , rec.inhrelid::regclass , relation); - num := num + 1; END LOOP; RETURN format('%s_update_trigger_func()', relation); @@ -1121,21 +1137,21 @@ CREATE OR REPLACE FUNCTION @extschema@.append_partitions_on_demand_internal( RETURNS OID AS $$ DECLARE - v_cnt INTEGER := 0; - i INTEGER := 0; - v_part TEXT; - v_interval TEXT; - v_attname TEXT; - v_min p_new_value%TYPE; - v_max p_new_value%TYPE; - v_cur_value p_new_value%TYPE; - v_next_value p_new_value%TYPE; - v_is_date BOOLEAN; + i INTEGER := 0; + v_part TEXT; + v_interval TEXT; + v_attname TEXT; + v_min p_new_value%TYPE; + v_max p_new_value%TYPE; + v_cur_value p_new_value%TYPE; + v_next_value p_new_value%TYPE; + v_is_date BOOLEAN; + BEGIN /* get attribute name and interval */ SELECT attname, range_interval FROM @extschema@.pathman_config - WHERE relname::regclass = p_relid::regclass + WHERE partrel = p_relid INTO v_attname, v_interval; IF v_attname IS NULL THEN diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 1c6b6e3f8b..30d6187908 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -147,11 +147,11 @@ WHERE j1.dt < '2015-03-01' AND j2.dt >= '2015-02-01' ORDER BY j2.dt; * Test CTE query */ EXPLAIN (COSTS OFF) - WITH ttt AS (SELECT * FROM test.range_rel WHERE dt >= '2015-02-01' AND dt < '2015-03-15') + WITH ttt AS (SELECT * FROM test.range_rel WHERE dt >= '2015-02-01' AND dt < '2015-03-15') SELECT * FROM ttt; EXPLAIN (COSTS OFF) - WITH ttt AS (SELECT * FROM test.hash_rel WHERE value = 2) + WITH ttt AS (SELECT * FROM test.hash_rel WHERE value = 2) SELECT * FROM ttt; @@ -451,14 +451,14 @@ EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-11-15' A SELECT pathman.detach_range_partition('test.range_rel_archive'); EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-11-15' AND '2015-01-15'; CREATE TABLE test.range_rel_test1 ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP, - txt TEXT, - abc INTEGER); + id SERIAL PRIMARY KEY, + dt TIMESTAMP, + txt TEXT, + abc INTEGER); SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test1', '2013-01-01'::DATE, '2014-01-01'::DATE); CREATE TABLE test.range_rel_test2 ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP); + id SERIAL PRIMARY KEY, + dt TIMESTAMP); SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test2', '2013-01-01'::DATE, '2014-01-01'::DATE); /* diff --git a/src/dsm_array.c b/src/dsm_array.c index 0419fc5a95..d55c94c778 100644 --- a/src/dsm_array.c +++ b/src/dsm_array.c @@ -1,15 +1,16 @@ /* ------------------------------------------------------------------------ * - * init.c - * This module allocates large DSM segment to store arrays, - * initializes it with block structure and provides functions to - * allocate and free arrays + * dsm_array.c + * Allocate data in shared memory * * Copyright (c) 2015-2016, Postgres Professional * * ------------------------------------------------------------------------ */ + #include "pathman.h" +#include "dsm_array.h" + #include "storage/shmem.h" #include "storage/dsm.h" @@ -49,11 +50,11 @@ typedef BlockHeader* BlockHeaderPtr; ((length) | ((*header) & FREE_BIT)) /* - * Amount of memory that need to be requested in shared memory to store dsm - * config + * Amount of memory that need to be requested + * for shared memory to store dsm config */ Size -get_dsm_shared_size() +estimate_dsm_config_size() { return (Size) MAXALIGN(sizeof(DsmConfig)); } @@ -300,8 +301,8 @@ void * dsm_array_get_pointer(const DsmArray *arr, bool copy) { uint8 *segment_address, - *dsm_array, - *result; + *dsm_array, + *result; size_t size; segment_address = (uint8 *) dsm_segment_address(segment); diff --git a/src/dsm_array.h b/src/dsm_array.h new file mode 100644 index 0000000000..3da4794867 --- /dev/null +++ b/src/dsm_array.h @@ -0,0 +1,45 @@ +/* ------------------------------------------------------------------------ + * + * dsm_array.h + * Allocate data in shared memory + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + +#ifndef DSM_ARRAY_H +#define DSM_ARRAY_H + +#include "postgres.h" +#include "storage/dsm.h" + + +/* + * Dynamic shared memory array + */ +typedef struct +{ + dsm_handle segment; + size_t offset; + size_t elem_count; + size_t entry_size; +} DsmArray; + + +#define InvalidDsmArray { 0, 0, 0, 0 } + + +/* Dynamic shared memory functions */ +Size estimate_dsm_config_size(void); +void init_dsm_config(void); +bool init_dsm_segment(size_t blocks_count, size_t block_size); +void init_dsm_table(size_t block_size, size_t start, size_t end); +void alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count); +void free_dsm_array(DsmArray *arr); +void resize_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count); +void *dsm_array_get_pointer(const DsmArray *arr, bool copy); +dsm_handle get_dsm_array_segment(void); +void attach_dsm_array_segment(void); + +#endif diff --git a/src/hooks.c b/src/hooks.c index 4ef66675e8..a62ce2185c 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -7,16 +7,18 @@ * * ------------------------------------------------------------------------ */ -#include "postgres.h" -#include "optimizer/cost.h" -#include "optimizer/restrictinfo.h" -#include "pathman.h" + #include "hooks.h" -#include "partition_filter.h" +#include "init.h" #include "runtimeappend.h" #include "runtime_merge_append.h" #include "utils.h" +#include "miscadmin.h" +#include "optimizer/cost.h" +#include "optimizer/restrictinfo.h" +#include "partition_filter.h" + set_join_pathlist_hook_type set_join_pathlist_next = NULL; set_rel_pathlist_hook_type set_rel_pathlist_hook_next = NULL; @@ -35,52 +37,53 @@ pathman_join_pathlist_hook(PlannerInfo *root, JoinPathExtraData *extra) { JoinCostWorkspace workspace; - Path *outer, - *inner; - Relids inner_required; - RangeTblEntry *inner_entry = root->simple_rte_array[innerrel->relid]; + RangeTblEntry *inner_rte = root->simple_rte_array[innerrel->relid]; PartRelationInfo *inner_prel; - NestPath *nest_path; - List *pathkeys = NIL; - List *joinrestrictclauses = extra->restrictlist; - List *joinclauses, + List *pathkeys = NIL, + *joinclauses, *otherclauses; ListCell *lc; - double paramsel; WalkerContext context; + double paramsel; bool context_initialized; bool innerrel_rinfo_contains_part_attr; + /* Call hooks set by other extensions */ if (set_join_pathlist_next) set_join_pathlist_next(root, joinrel, outerrel, innerrel, jointype, extra); - if (jointype == JOIN_UNIQUE_OUTER || - jointype == JOIN_UNIQUE_INNER) - { - jointype = JOIN_INNER; - } - - if (jointype == JOIN_FULL || !pg_pathman_enable_runtimeappend) + /* Check that both pg_pathman & RuntimeAppend nodes are enabled */ + if (!IsPathmanReady() || !pg_pathman_enable_runtimeappend) return; - if (innerrel->reloptkind != RELOPT_BASEREL || - !inner_entry->inh || - !(inner_prel = get_pathman_relation_info(inner_entry->relid, NULL))) + if (jointype == JOIN_FULL) + return; /* handling full joins is meaningless */ + + /* Check that innerrel is a BASEREL with inheritors & PartRelationInfo */ + if (innerrel->reloptkind != RELOPT_BASEREL || !inner_rte->inh || + !(inner_prel = get_pathman_relation_info(inner_rte->relid, NULL))) { return; /* Obviously not our case */ } + /* + * These codes are used internally in the planner, but are not supported + * by the executor (nor, indeed, by most of the planner). + */ + if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) + jointype = JOIN_INNER; /* replace with a proper value */ + /* Extract join clauses which will separate partitions */ if (IS_OUTER_JOIN(extra->sjinfo->jointype)) { - extract_actual_join_clauses(joinrestrictclauses, + extract_actual_join_clauses(extra->restrictlist, &joinclauses, &otherclauses); } else { /* We can treat all clauses alike for an inner join */ - joinclauses = extract_actual_clauses(joinrestrictclauses, false); + joinclauses = extract_actual_clauses(extra->restrictlist, false); otherclauses = NIL; } @@ -99,6 +102,7 @@ pathman_join_pathlist_hook(PlannerInfo *root, paramsel *= wrap->paramsel; } + /* Check that innerrel's RestrictInfo contains partitioned column */ innerrel_rinfo_contains_part_attr = check_rinfo_for_partitioned_attr(innerrel->baserestrictinfo, innerrel->relid, @@ -106,17 +110,24 @@ pathman_join_pathlist_hook(PlannerInfo *root, foreach (lc, innerrel->pathlist) { + Path *outer, + *inner; + NestPath *nest_path; /* NestLoop we're creating */ + ParamPathInfo *ppi; /* parameterization info */ + Relids inner_required; /* required paremeterization relids */ AppendPath *cur_inner_path = (AppendPath *) lfirst(lc); - ParamPathInfo *ppi; if (!IsA(cur_inner_path, AppendPath)) continue; + /* Select cheapest path for outerrel */ outer = outerrel->cheapest_total_path; + /* Make innerrel path depend on outerrel's column */ inner_required = bms_union(PATH_REQ_OUTER((Path *) cur_inner_path), bms_make_singleton(outerrel->relid)); + /* Get the ParamPathInfo for a parameterized path */ ppi = get_baserel_parampathinfo(root, innerrel, inner_required); /* @@ -130,11 +141,10 @@ pathman_join_pathlist_hook(PlannerInfo *root, continue; inner = create_runtimeappend_path(root, cur_inner_path, - ppi, - paramsel); + ppi, paramsel); initial_cost_nestloop(root, &workspace, jointype, - outer, inner, + outer, inner, /* built paths */ extra->sjinfo, &extra->semifactors); pathkeys = build_join_pathkeys(root, joinrel, jointype, outer->pathkeys); @@ -145,6 +155,7 @@ pathman_join_pathlist_hook(PlannerInfo *root, pathkeys, calc_nestloop_required_outer(outer, inner)); + /* Finally we can add new NestLoop path */ add_path(joinrel, (Path *) nest_path); } } @@ -156,28 +167,25 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb PartRelationInfo *prel = NULL; RangeTblEntry **new_rte_array; RelOptInfo **new_rel_array; - bool found; int len; /* Invoke original hook if needed */ if (set_rel_pathlist_hook_next != NULL) set_rel_pathlist_hook_next(root, rel, rti, rte); - if (!pg_pathman_enable) - return; + if (!IsPathmanReady()) + return; /* pg_pathman is not ready */ /* This works only for SELECT queries (at least for now) */ if (root->parse->commandType != CMD_SELECT || !list_member_oid(inheritance_enabled_relids, rte->relid)) return; - /* Lookup partitioning information for parent relation */ - prel = get_pathman_relation_info(rte->relid, &found); - - if (prel != NULL && found) + /* Proceed iff relation 'rel' is partitioned */ + if ((prel = get_pathman_relation_info(rte->relid, NULL)) != NULL) { ListCell *lc; - Oid *dsm_arr; + Oid *children; List *ranges, *wrappers; PathKey *pathkeyAsc = NULL, @@ -220,9 +228,10 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb pathkeyDesc = (PathKey *) linitial(pathkeys); } - rte->inh = true; - dsm_arr = (Oid *) dsm_array_get_pointer(&prel->children, true); - ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); + rte->inh = true; /* we must restore 'inh' flag! */ + + children = PrelGetChildrenArray(prel, true); + ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); /* Make wrappers over restrictions and collect final rangeset */ InitWalkerContext(&context, prel, NULL, CurrentMemoryContext, false); @@ -282,7 +291,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb IndexRange irange = lfirst_irange(lc); for (i = irange.ir_lower; i <= irange.ir_upper; i++) - append_child_relation(root, rel, rti, rte, i, dsm_arr[i], wrappers); + append_child_relation(root, rel, rti, rte, i, children[i], wrappers); } /* Clear old path list */ @@ -292,12 +301,12 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb set_append_rel_pathlist(root, rel, rti, rte, pathkeyAsc, pathkeyDesc); set_append_rel_size(root, rel, rti, rte); - /* No need to go further, return */ + /* No need to go further (both nodes are disabled), return */ if (!(pg_pathman_enable_runtimeappend || pg_pathman_enable_runtime_merge_append)) return; - /* RuntimeAppend is pointless if there are no params in clauses */ + /* Runtime[Merge]Append is pointless if there are no params in clauses */ if (!clause_contains_params((Node *) get_actual_clauses(rel->baserestrictinfo))) return; @@ -315,8 +324,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb /* Skip if rel contains some join-related stuff or path type mismatched */ if (!(IsA(cur_path, AppendPath) || IsA(cur_path, MergeAppendPath)) || - rel->has_eclass_joins || - rel->joininfo) + rel->has_eclass_joins || rel->joininfo) { continue; } @@ -345,9 +353,22 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb } } +/* + * Intercept 'pg_pathman.enable' GUC assignments. + */ void pg_pathman_enable_assign_hook(bool newval, void *extra) { + elog(DEBUG2, "pg_pathman_enable_assign_hook() [newval = %s] triggered", + newval ? "true" : "false"); + + if (initialization_needed) + { + elog(DEBUG2, "pg_pathman is not yet initialized, " + "pg_pathman.enable is set to false"); + return; + } + /* Return quickly if nothing has changed */ if (newval == (pg_pathman_enable && pg_pathman_enable_runtimeappend && @@ -372,9 +393,18 @@ pg_pathman_enable_assign_hook(bool newval, void *extra) PlannedStmt * pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) { +#define ExecuteForPlanTree(planned_stmt, proc) \ + do { \ + ListCell *lc; \ + proc((planned_stmt)->rtable, (planned_stmt)->planTree); \ + foreach (lc, (planned_stmt)->subplans) \ + proc((planned_stmt)->rtable, (Plan *) lfirst(lc)); \ + } while (0) + PlannedStmt *result; - if (pg_pathman_enable) + /* TODO: fix these commands (traverse whole query tree) */ + if (IsPathmanReady()) { switch(parse->commandType) { @@ -390,38 +420,24 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) handle_modification_query(parse); break; - case CMD_INSERT: - { - ListCell *lc; - - result = standard_planner(parse, cursorOptions, boundParams); - - add_partition_filters(result->rtable, result->planTree); - foreach (lc, result->subplans) - add_partition_filters(result->rtable, (Plan *) lfirst(lc)); - - return result; - } - default: break; } } - /* Invoke original hook */ + /* Invoke original hook if needed */ if (planner_hook_next) result = planner_hook_next(parse, cursorOptions, boundParams); else result = standard_planner(parse, cursorOptions, boundParams); - if (pg_pathman_enable) + if (IsPathmanReady()) { - ListCell *lc; - /* Give rowmark-related attributes correct names */ - postprocess_lock_rows(result->rtable, result->planTree); - foreach (lc, result->subplans) - postprocess_lock_rows(result->rtable, (Plan *) lfirst(lc)); + ExecuteForPlanTree(result, postprocess_lock_rows); + + /* Add PartitionFilter node for INSERT queries */ + ExecuteForPlanTree(result, add_partition_filters); } list_free(inheritance_disabled_relids); @@ -439,26 +455,76 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) void pathman_post_parse_analysis_hook(ParseState *pstate, Query *query) { - if (initialization_needed) - load_config(); - + /* Invoke original hook if needed */ if (post_parse_analyze_hook_next) post_parse_analyze_hook_next(pstate, query); + /* Load config if pg_pathman exists & it's still necessary */ + if (IsPathmanEnabled() && + initialization_needed && + get_pathman_schema() != InvalidOid) + { + load_config(); + } + inheritance_disabled_relids = NIL; inheritance_enabled_relids = NIL; } +/* + * Initialize dsm_config & shmem_config. + */ void pathman_shmem_startup_hook(void) { + /* Invoke original hook if needed */ + if (shmem_startup_hook_next != NULL) + shmem_startup_hook_next(); + /* Allocate shared memory objects */ LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); init_dsm_config(); init_shmem_config(); LWLockRelease(AddinShmemInitLock); +} - /* Invoke original hook if needed */ - if (shmem_startup_hook_next != NULL) - shmem_startup_hook_next(); +/* + * Invalidate PartRelationInfo cache entry if needed. + */ +void +pathman_relcache_hook(Datum arg, Oid relid) +{ + PartParentSearch search; + Oid partitioned_table; + + /* Invalidate PartParentInfo cache if needed */ + partitioned_table = forget_parent_of_partition(relid, &search); + + /* It is (or was) a valid partition */ + if (partitioned_table != InvalidOid) + { + elog(DEBUG2, "Invalidation message for partition %u [%u]", + relid, MyProcPid); + + /* Invalidate PartRelationInfo cache */ + invalidate_pathman_relation_info(partitioned_table, NULL); + + /* TODO: add table to 'invalidated_rel' list */ + } + + /* Both syscache and pathman's cache say it isn't a partition */ + else if (search == PPS_ENTRY_NOT_FOUND) + { + elog(DEBUG2, "Invalidation message for relation %u [%u]", + relid, MyProcPid); + } + + /* We can't say anything (state is not transactional) */ + else if (search == PPS_NOT_SURE) + { + elog(DEBUG2, "Invalidation message for vague relation %u [%u]", + relid, MyProcPid); + + /* TODO: add table to 'PPS_NOT_SURE' list */ + } } diff --git a/src/hooks.h b/src/hooks.h index c744e300b0..022387b130 100644 --- a/src/hooks.h +++ b/src/hooks.h @@ -7,6 +7,7 @@ * * ------------------------------------------------------------------------ */ + #ifndef JOIN_HOOK_H #define JOIN_HOOK_H @@ -47,4 +48,6 @@ void pathman_post_parse_analysis_hook(ParseState *pstate, void pathman_shmem_startup_hook(void); +void pathman_relcache_hook(Datum arg, Oid relid); + #endif diff --git a/src/init.c b/src/init.c index a3bcceb66d..3d10ee88d5 100644 --- a/src/init.c +++ b/src/init.c @@ -5,66 +5,140 @@ * * Copyright (c) 2015-2016, Postgres Professional * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * * ------------------------------------------------------------------------ */ + +#include "hooks.h" +#include "init.h" #include "pathman.h" -#include "miscadmin.h" -#include "executor/spi.h" +#include "relation_info.h" +#include "utils.h" + +#include "access/htup_details.h" +#include "catalog/indexing.h" +#include "catalog/pg_inherits_fn.h" #include "catalog/pg_type.h" +#include "catalog/pg_inherits.h" #include "catalog/pg_constraint.h" -#include "access/htup_details.h" +#include "executor/spi.h" +#include "miscadmin.h" +#include "optimizer/clauses.h" +#include "utils/inval.h" +#include "utils/fmgroids.h" #include "utils/syscache.h" #include "utils/builtins.h" #include "utils/typcache.h" +#include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/snapmgr.h" -#include "optimizer/clauses.h" -HTAB *relations = NULL; -HTAB *range_restrictions = NULL; +/* Initial size of 'partitioned_rels' table */ +#define PART_RELS_SIZE 10 +#define CHILD_FACTOR 500 + + +/* Storage for PartRelationInfos */ +HTAB *partitioned_rels = NULL; + +/* Storage for PartParentInfos */ +HTAB *parent_cache = NULL; + bool initialization_needed = true; -static bool validate_partition_constraints(const Oid *children_oids, - const uint32 children_count, - Snapshot snapshot, - PartRelationInfo *prel, - RangeRelation *rangerel); -static bool validate_range_constraint(Expr *, PartRelationInfo *, Datum *, Datum *); -static bool validate_hash_constraint(Expr *expr, PartRelationInfo *prel, int *hash); -static bool read_opexpr_const(OpExpr *opexpr, int varattno, Datum *val); +static void read_pathman_config(void); + +static Expr *get_partition_constraint_expr(Oid partition, AttrNumber part_attno); + static int cmp_range_entries(const void *p1, const void *p2, void *arg); +static bool validate_range_constraint(const Expr *expr, + const PartRelationInfo *prel, + Datum *min, + Datum *max); + +static bool validate_hash_constraint(const Expr *expr, + const PartRelationInfo *prel, + uint32 *part_hash); + +static bool read_opexpr_const(const OpExpr *opexpr, AttrNumber varattno, Datum *val); + +static int oid_cmp(const void *p1, const void *p2); + /* - * This argument contains type compare function - * and 'by_val' flag (absent in RangeEntry) which - * are required for the cmp_range_entries() function. + * Create local PartRelationInfo cache & load pg_pathman's config. */ -typedef struct +void +load_config() { - FmgrInfo *cmp_proc; - bool by_val; -} cmp_range_entries_arg; + init_local_config(); /* create 'relations' hash table */ + read_pathman_config(); /* read PATHMAN_CONFIG table & fill cache */ + initialization_needed = false; + elog(DEBUG2, "pg_pathman's config has been loaded successfully"); +} + +/* + * Estimate shmem amount needed for pg_pathman to run. + */ Size -pathman_memsize() +estimate_pathman_shmem_size(void) { - Size size; + return estimate_dsm_config_size() + MAXALIGN(sizeof(PathmanState)); +} - size = get_dsm_shared_size() + MAXALIGN(sizeof(PathmanState)); - return size; +/* + * Initialize per-process resources. + */ +void +init_local_config(void) +{ + HASHCTL ctl; + + if (partitioned_rels) + { + elog(DEBUG2, "pg_pathman's partitioned relations table already exists"); + return; + } + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(PartRelationInfo); + ctl.hcxt = TopMemoryContext; /* place data to persistent mcxt */ + + partitioned_rels = hash_create("pg_pathman's partitioned relations cache", + PART_RELS_SIZE, &ctl, HASH_ELEM | HASH_BLOBS); + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(PartParentInfo); + ctl.hcxt = TopMemoryContext; /* place data to persistent mcxt */ + + parent_cache = hash_create("pg_pathman's partition parents cache", + PART_RELS_SIZE * CHILD_FACTOR, + &ctl, HASH_ELEM | HASH_BLOBS); + + CacheRegisterRelcacheCallback(pathman_relcache_hook, + PointerGetDatum(NULL)); } +/* + * Initializes pg_pathman's global state (PathmanState) & locks. + */ void -init_shmem_config() +init_shmem_config(void) { bool found; /* Check if module was initialized in postmaster */ - pmstate = ShmemInitStruct("pathman state", sizeof(PathmanState), &found); + pmstate = ShmemInitStruct("pg_pathman's global state", + sizeof(PathmanState), &found); if (!found) { /* @@ -78,520 +152,406 @@ init_shmem_config() pmstate->edit_partitions_lock = LWLockAssign(); } } - - create_relations_hashtable(); - create_range_restrictions_hashtable(); } /* - * Initialize hashtables + * Fill PartRelationInfo with partition-related info. */ void -load_config(void) +fill_prel_with_partitions(const Oid *partitions, + const uint32 parts_count, + PartRelationInfo *prel) { - bool new_segment_created; - Oid *databases; - - initialization_needed = false; + uint32 i; + Expr *con_expr; + MemoryContext mcxt = TopMemoryContext; - LWLockAcquire(pmstate->dsm_init_lock, LW_EXCLUSIVE); - new_segment_created = init_dsm_segment(INITIAL_BLOCKS_COUNT, 32); + /* Allocate memory for 'prel->children' & 'prel->ranges' (if needed) */ + prel->children = MemoryContextAllocZero(mcxt, parts_count * sizeof(Oid)); + if (prel->parttype == PT_RANGE) + prel->ranges = MemoryContextAllocZero(mcxt, parts_count * sizeof(RangeEntry)); + prel->children_count = parts_count; - /* If dsm segment just created */ - if (new_segment_created) + for (i = 0; i < PrelChildrenCount(prel); i++) { - /* - * Allocate databases array and put current database - * oid into it. This array contains databases oids - * that have already been cached (to prevent repeat caching) - */ - if (&pmstate->databases.elem_count > 0) - free_dsm_array(&pmstate->databases); - alloc_dsm_array(&pmstate->databases, sizeof(Oid), 1); - databases = (Oid *) dsm_array_get_pointer(&pmstate->databases, false); - databases[0] = MyDatabaseId; - } - else - { - int databases_count = pmstate->databases.elem_count; - int i; + con_expr = get_partition_constraint_expr(partitions[i], prel->attnum); - /* Check if we already cached config for current database */ - databases = (Oid *) dsm_array_get_pointer(&pmstate->databases, false); - for(i = 0; i < databases_count; i++) - if (databases[i] == MyDatabaseId) - { - LWLockRelease(pmstate->dsm_init_lock); - return; - } + /* Perform a partitioning_type-dependent task */ + switch (prel->parttype) + { + case PT_HASH: + { + uint32 hash; /* hash value < parts_count */ - /* Put current database oid to databases list */ - resize_dsm_array(&pmstate->databases, sizeof(Oid), databases_count + 1); - databases = (Oid *) dsm_array_get_pointer(&pmstate->databases, false); - databases[databases_count] = MyDatabaseId; - } + if (validate_hash_constraint(con_expr, prel, &hash)) + prel->children[hash] = partitions[i]; + else + elog(ERROR, + "Wrong constraint format for HASH partition %u", + partitions[i]); + } + break; - /* Load cache */ - LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); - load_relations(new_segment_created); - LWLockRelease(pmstate->load_config_lock); - LWLockRelease(pmstate->dsm_init_lock); -} + case PT_RANGE: + { + Datum range_min, range_max; -/* - * Returns extension schema name or NULL. Caller is responsible for freeing - * the memory. - */ -char * -get_extension_schema() -{ - int ret; - bool isnull; + if (validate_range_constraint(con_expr, prel, + &range_min, &range_max)) + { + prel->ranges[i].child_oid = partitions[i]; + prel->ranges[i].min = range_min; + prel->ranges[i].max = range_max; + } + else + elog(ERROR, + "Wrong constraint format for RANGE partition %u", + partitions[i]); + } + break; - ret = SPI_exec("SELECT extnamespace::regnamespace::text FROM pg_extension WHERE extname = 'pg_pathman'", 0); - if (ret > 0 && SPI_tuptable != NULL && SPI_processed > 0) + default: + elog(ERROR, "Unknown partitioning type for relation %u", prel->key); + } + } + + /* Finalize 'prel' for a RANGE-partitioned table */ + if (prel->parttype == PT_RANGE) { - TupleDesc tupdesc = SPI_tuptable->tupdesc; - SPITupleTable *tuptable = SPI_tuptable; - HeapTuple tuple = tuptable->vals[0]; - Datum datum = SPI_getbinval(tuple, tupdesc, 1, &isnull); + TypeCacheEntry *tce = lookup_type_cache(prel->atttype, + TYPECACHE_CMP_PROC_FINFO); - if (isnull) - return NULL; + /* Sort partitions by RangeEntry->min asc */ + qsort_arg((void *) prel->ranges, PrelChildrenCount(prel), + sizeof(RangeEntry), cmp_range_entries, + (void *) &tce->cmp_proc_finfo); - return TextDatumGetCString(datum); + /* Initialize 'prel->children' array */ + for (i = 0; i < PrelChildrenCount(prel); i++) + prel->children[i] = prel->ranges[i].child_oid; } - return NULL; + +#ifdef USE_ASSERT_CHECKING + /* Check that each partition Oid has been assigned properly */ + if (prel->parttype == PT_HASH) + for (i = 0; i < PrelChildrenCount(prel); i++) + { + if (prel->children[i] == InvalidOid) + elog(ERROR, "pg_pathman's cache for relation %u " + "has not been properly initialized", prel->key); + } +#endif } /* - * Loads partitioned tables structure to hashtable. + * find_inheritance_children + * + * Returns an array containing the OIDs of all relations which + * inherit *directly* from the relation with OID 'parentrelId'. + * + * The specified lock type is acquired on each child relation (but not on the + * given rel; caller should already have locked it). If lockmode is NoLock + * then no locks are acquired, but caller must beware of race conditions + * against possible DROPs of child relations. * - * TODO: reload just the specified relation + * borrowed from pg_inherits.c */ -void -load_relations(bool reinitialize) +Oid * +find_inheritance_children_array(Oid parentrelId, LOCKMODE lockmode, uint32 *size) { - int ret, - i, - proc; - bool isnull; - List *part_oids = NIL; - ListCell *lc; - char *schema; - TypeCacheEntry *tce; - PartRelationInfo *prel; - char sql[] = "SELECT pg_class.oid, pg_attribute.attnum," - "cfg.parttype, pg_attribute.atttypid, pg_attribute.atttypmod " - "FROM %s.pathman_config as cfg " - "JOIN pg_class ON pg_class.oid = cfg.relname::regclass::oid " - "JOIN pg_attribute ON pg_attribute.attname = lower(cfg.attname) " - "AND attrelid = pg_class.oid"; - char *query; - - SPI_connect(); - schema = get_extension_schema(); - - /* If extension isn't exist then just quit */ - if (!schema) - { - SPI_finish(); - return; - } + Relation relation; + SysScanDesc scan; + ScanKeyData key[1]; + HeapTuple inheritsTuple; + Oid inhrelid; + Oid *oidarr; + uint32 maxoids, + numoids, + i; - /* Put schema name to the query */ - query = psprintf(sql, schema); - ret = SPI_exec(query, 0); - proc = SPI_processed; - - if (ret > 0 && SPI_tuptable != NULL) + /* + * Can skip the scan if pg_class shows the relation has never had a + * subclass. + */ + if (!has_subclass(parentrelId)) { - TupleDesc tupdesc = SPI_tuptable->tupdesc; - SPITupleTable *tuptable = SPI_tuptable; - - for (i = 0; i < proc; i++) - { - RelationKey key; - HeapTuple tuple = tuptable->vals[i]; - Oid oid = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 1, &isnull)); + *size = 0; + return NULL; + } - key.dbid = MyDatabaseId; - key.relid = oid; - prel = (PartRelationInfo*) - hash_search(relations, (const void *) &key, HASH_ENTER, NULL); + /* + * Scan pg_inherits and build a working array of subclass OIDs. + */ + maxoids = 32; + oidarr = (Oid *) palloc(maxoids * sizeof(Oid)); + numoids = 0; - prel->attnum = DatumGetInt32(SPI_getbinval(tuple, tupdesc, 2, &isnull)); - prel->parttype = DatumGetInt32(SPI_getbinval(tuple, tupdesc, 3, &isnull)); - prel->atttype = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 4, &isnull)); - prel->atttypmod = DatumGetInt32(SPI_getbinval(tuple, tupdesc, 5, &isnull)); + relation = heap_open(InheritsRelationId, AccessShareLock); - tce = lookup_type_cache(prel->atttype, TYPECACHE_CMP_PROC | TYPECACHE_HASH_PROC); - prel->cmp_proc = tce->cmp_proc; - prel->hash_proc = tce->hash_proc; + ScanKeyInit(&key[0], + Anum_pg_inherits_inhparent, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(parentrelId)); - part_oids = lappend_int(part_oids, oid); - } - } - pfree(query); + scan = systable_beginscan(relation, InheritsParentIndexId, true, + NULL, 1, key); - /* Load children information */ - foreach(lc, part_oids) + while ((inheritsTuple = systable_getnext(scan)) != NULL) { - Oid oid = (int) lfirst_int(lc); - - prel = get_pathman_relation_info(oid, NULL); - switch(prel->parttype) + inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid; + if (numoids >= maxoids) { - case PT_RANGE: - if (reinitialize && prel->children.elem_count > 0) - { - RangeRelation *rangerel = get_pathman_range_relation(oid, NULL); - free_dsm_array(&prel->children); - free_dsm_array(&rangerel->ranges); - prel->children_count = 0; - } - load_partitions(oid, GetCatalogSnapshot(oid)); - break; - case PT_HASH: - if (reinitialize && prel->children.elem_count > 0) - { - free_dsm_array(&prel->children); - prel->children_count = 0; - } - load_partitions(oid, GetCatalogSnapshot(oid)); - break; + maxoids *= 2; + oidarr = (Oid *) repalloc(oidarr, maxoids * sizeof(Oid)); } + oidarr[numoids++] = inhrelid; } - SPI_finish(); -} - -void -create_relations_hashtable() -{ - HASHCTL ctl; - - memset(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(RelationKey); - ctl.entrysize = sizeof(PartRelationInfo); - - /* Already exists, recreate */ - if (relations != NULL) - hash_destroy(relations); - - relations = ShmemInitHash("Partitioning relation info", 1024, 1024, &ctl, - HASH_ELEM | HASH_BLOBS); -} - -/* - * Load and validate CHECK constraints - */ -void -load_partitions(Oid parent_oid, Snapshot snapshot) -{ - PartRelationInfo *prel = NULL; - RangeRelation *rangerel = NULL; - SPIPlanPtr plan = NULL; - bool found; - int ret, - i, - children_count = 0; - Datum vals[1]; - Oid types[1] = { INT4OID }; - const bool nulls[1] = { false }; - Oid *children_oids; - - vals[0] = Int32GetDatum(parent_oid); - prel = get_pathman_relation_info(parent_oid, NULL); - - /* Skip if already loaded */ - if (prel->children.elem_count > 0) - return; - - /* Load children oids */ - plan = SPI_prepare("SELECT inhrelid FROM pg_inherits " - "WHERE inhparent = $1", - 1, types); - - ret = SPI_execute_snapshot(plan, vals, nulls, snapshot, - InvalidSnapshot, true, false, 0); - if (ret == SPI_OK_SELECT) - { - /* Initialize children data for validate_partition_constraints() */ - children_count = SPI_processed; - children_oids = palloc(sizeof(Oid) * children_count); + systable_endscan(scan); - for(i = 0; i < children_count; i++) - { - TupleDesc tupdesc = SPI_tuptable->tupdesc; - HeapTuple tuple = SPI_tuptable->vals[i]; - bool isnull; + heap_close(relation, AccessShareLock); - children_oids[i] = DatumGetObjectId(SPI_getbinval(tuple, - tupdesc, - 1, &isnull)); - } - } - else return; /* there are no children */ + /* + * If we found more than one child, sort them by OID. This ensures + * reasonably consistent behavior regardless of the vagaries of an + * indexscan. This is important since we need to be sure all backends + * lock children in the same order to avoid needless deadlocks. + */ + if (numoids > 1) + qsort(oidarr, numoids, sizeof(Oid), oid_cmp); - if (children_count > 0) + /* + * Acquire locks and build the result list. + */ + for (i = 0; i < numoids; i++) { - alloc_dsm_array(&prel->children, sizeof(Oid), children_count); + inhrelid = oidarr[i]; - /* allocate ranges array is dsm */ - if (prel->parttype == PT_RANGE) + if (lockmode != NoLock) { - TypeCacheEntry *tce = lookup_type_cache(prel->atttype, 0); - RelationKey key; - - key.dbid = MyDatabaseId; - key.relid = parent_oid; - rangerel = (RangeRelation *) hash_search(range_restrictions, - (const void *) &key, - HASH_ENTER, &found); - rangerel->by_val = tce->typbyval; - alloc_dsm_array(&rangerel->ranges, sizeof(RangeEntry), children_count); - } - - /* Validate partitions constraints */ - if (!validate_partition_constraints(children_oids, - children_count, - snapshot, - prel, - rangerel)) - { - RelationKey key; + /* Get the lock to synchronize against concurrent drop */ + LockRelationOid(inhrelid, lockmode); /* - * If validation failed then pg_pathman cannot handle this relation. - * Remove it from the cache + * Now that we have the lock, double-check to see if the relation + * really exists or not. If not, assume it was dropped while we + * waited to acquire lock, and ignore it. */ - key.dbid = MyDatabaseId; - key.relid = parent_oid; - - free_dsm_array(&prel->children); - free_dsm_array(&rangerel->ranges); - hash_search(relations, (const void *) &key, HASH_REMOVE, &found); - if (prel->parttype == PT_RANGE) - hash_search(range_restrictions, - (const void *) &key, - HASH_REMOVE, &found); - - elog(WARNING, "Validation failed for relation '%s'. " - "It will not be handled by pg_pathman", - get_rel_name(parent_oid)); + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(inhrelid))) + { + /* Release useless lock */ + UnlockRelationOid(inhrelid, lockmode); + /* And ignore this relation */ + continue; + } } - else - prel->children_count = children_count; - - pfree(children_oids); } + + *size = numoids; + return oidarr; } -static bool -validate_partition_constraints(const Oid *children_oids, - const uint32 children_count, - Snapshot snapshot, - PartRelationInfo *prel, - RangeRelation *rangerel) +/* + * Generate check constraint name for a partition. + * + * This function does not perform sanity checks at all. + */ +char * +build_check_constraint_name_internal(Oid relid, AttrNumber attno) { - Expr *expr = NULL; /* constraint object tree */ - RangeEntry *ranges = NULL; - Oid *dsm_children = NULL; /* points to prel->children */ - int i; - - - if (children_count == 0) - return false; /* nothing to do here */ - - dsm_children = dsm_array_get_pointer(&prel->children, false); - if (prel->parttype == PT_RANGE) - ranges = (RangeEntry *) dsm_array_get_pointer(&rangerel->ranges, false); + return psprintf("pathman_%u_%u_check", relid, attno); +} - /* Iterate through children */ - for (i = 0; i < children_count; i++) +/* + * Check that relation 'relid' is partitioned by pg_pathman. + * + * Extract tuple into 'values' and 'isnull' if they're provided. + */ +bool +pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull) +{ + Oid pathman_config; + Relation rel; + HeapScanDesc scan; + ScanKeyData key[1]; + Snapshot snapshot; + HeapTuple htup; + bool contains_rel = false; + + /* Get PATHMAN_CONFIG table Oid */ + pathman_config = get_relname_relid(PATHMAN_CONFIG, get_pathman_schema()); + + ScanKeyInit(&key[0], + Anum_pathman_config_partrel, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + /* Open relation with latest snapshot available */ + rel = heap_open(pathman_config, AccessShareLock); + + /* Check that 'partrel' column is if regclass type */ + Assert(RelationGetDescr(rel)-> + attrs[Anum_pathman_config_partrel - 1]-> + atttypid == REGCLASSOID); + + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = heap_beginscan(rel, snapshot, 1, key); + + while((htup = heap_getnext(scan, ForwardScanDirection)) != NULL) { - Form_pg_constraint constraint = NULL; - uint32 cur_processed; - bool found_valid_constraint = false; - - /* SPI args */ - Datum oids[1] = { ObjectIdGetDatum(children_oids[i]) }; - const bool nulls[1] = { false }; - Oid types[1] = { INT4OID }; - - SPIPlanPtr plan = NULL; - int ret; /* SPI result code */ - - - /* Select constraints for this partition */ - plan = SPI_prepare("SELECT * FROM pg_constraint " - "WHERE conrelid = $1 AND contype = 'c'", - 1, types); - - ret = SPI_execute_snapshot(plan, oids, nulls, snapshot, - InvalidSnapshot, true, false, 0); - - if (ret != SPI_OK_SELECT) - { - elog(WARNING, - "No constraints found for partition %s", - get_rel_name(children_oids[i])); - - return false; /* keep compiler happy */ - } - - /* Iterate through all available check constraints to find a valid one */ - for (cur_processed = 0; - cur_processed < SPI_processed && !found_valid_constraint; - cur_processed++) - { - int hash; /* temp hash value for HASH partitioning */ - RangeEntry re; /* temporary RangeEntry */ - Datum min, /* RangeEntry's min value */ - max; /* RangeEntry's max value */ - - Datum conbin_datum; /* nodeToString representation of constraint */ - bool conbin_isnull; + contains_rel = true; /* found partitioned table */ - HeapTuple tuple = SPI_tuptable->vals[cur_processed]; - - - /* Get 'conbin' from the 'pg_constraint' */ - constraint = (Form_pg_constraint) GETSTRUCT(tuple); - conbin_datum = SysCacheGetAttr(CONSTROID, tuple, - Anum_pg_constraint_conbin, - &conbin_isnull); + /* Extract data if necessary */ + if (values && isnull) + heap_deformtuple(htup, RelationGetDescr(rel), values, isnull); + } - /* Handle unexpected null value */ - if (conbin_isnull) - continue; /* not a valid value anyway, skipping */ + /* Clean resources */ + heap_endscan(scan); + UnregisterSnapshot(snapshot); + heap_close(rel, AccessShareLock); - /* Convert constraint datum to object tree */ - expr = (Expr *) stringToNode(TextDatumGetCString(conbin_datum)); + elog(DEBUG2, "PATHMAN_CONFIG table %s relation %u", + (contains_rel ? "contains" : "doesn't contain"), relid); - switch(prel->parttype) - { - case PT_RANGE: - if (!validate_range_constraint(expr, prel, &min, &max)) - continue; + return contains_rel; +} - /* If datum is referenced by val then just assign */ - if (rangerel->by_val) - { - re.min = min; - re.max = max; - } - /* else copy data by pointer */ - else - { - memcpy(&re.min, DatumGetPointer(min), sizeof(re.min)); - memcpy(&re.max, DatumGetPointer(max), sizeof(re.max)); - } - re.child_oid = constraint->conrelid; - ranges[i] = re; /* copy struct 're' */ +/* + * Go through the PATHMAN_CONFIG table and create PartRelationInfo entries. + */ +static void +read_pathman_config(void) +{ + Oid pathman_config; + Relation rel; + HeapScanDesc scan; + Snapshot snapshot; + HeapTuple htup; - /* Found valid range constraint */ - found_valid_constraint = true; - break; + /* Get PATHMAN_CONFIG table Oid */ + pathman_config = get_relname_relid(PATHMAN_CONFIG, get_pathman_schema()); - case PT_HASH: - if (!validate_hash_constraint(expr, prel, &hash)) - continue; + /* Open relation with latest snapshot available */ + rel = heap_open(pathman_config, AccessShareLock); - /* Copy oid to the prel's 'children' array */ - dsm_children[hash] = constraint->conrelid; + /* Check that 'partrel' column is if regclass type */ + Assert(RelationGetDescr(rel)-> + attrs[Anum_pathman_config_partrel - 1]-> + atttypid == REGCLASSOID); - /* Found valid hash constraint */ - found_valid_constraint = true; - break; - } - } + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = heap_beginscan(rel, snapshot, 0, NULL); - /* No constraint matches pattern, aborting */ - if (!found_valid_constraint) + /* Examine each row and create a PartRelationInfo in local cache */ + while((htup = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + Datum values[Natts_pathman_config]; + bool isnull[Natts_pathman_config]; + Oid relid; /* partitioned table */ + PartType parttype; /* partitioning type */ + text *attname; /* partitioned column name */ + + /* Extract Datums from tuple 'htup' */ + heap_deform_tuple(htup, RelationGetDescr(rel), values, isnull); + + /* These attributes are marked as NOT NULL, check anyway */ + Assert(!isnull[Anum_pathman_config_partrel - 1]); + Assert(!isnull[Anum_pathman_config_parttype - 1]); + Assert(!isnull[Anum_pathman_config_attname - 1]); + + /* Extract values from Datums */ + relid = DatumGetObjectId(values[Anum_pathman_config_partrel - 1]); + parttype = DatumGetPartType(values[Anum_pathman_config_parttype - 1]); + attname = DatumGetTextP(values[Anum_pathman_config_attname - 1]); + + /* Check that relation 'relid' exists */ + if (get_rel_type_id(relid) == InvalidOid) { - elog(ERROR, - "Cannot find valid check constraint for partition %s", - get_rel_name(children_oids[i])); + DisablePathman(); - return false; /* keep compiler happy */ + ereport(ERROR, + (errmsg("Table \"%s\" contains nonexistent relation %u", + PATHMAN_CONFIG, relid), + errdetail("pg_pathman will be disabled"))); } - /* Don't forget to free plan */ - SPI_freeplan(plan); + /* Create or update PartRelationInfo for this partitioned table */ + refresh_pathman_relation_info(relid, parttype, text_to_cstring(attname)); } - /* - * Sort range partitions and check if they overlap - */ - if (prel->parttype == PT_RANGE) - { - TypeCacheEntry *tce; - bool byVal = rangerel->by_val; - cmp_range_entries_arg sort_arg; /* for qsort_arg() */ - - /* Sort ascending */ - tce = lookup_type_cache(prel->atttype, - TYPECACHE_CMP_PROC | TYPECACHE_CMP_PROC_FINFO); - - /* Initialize qsort_arg comparator()'s argument */ - sort_arg.cmp_proc = &tce->cmp_proc_finfo; - sort_arg.by_val = byVal; - - qsort_arg(ranges, children_count, sizeof(RangeEntry), - cmp_range_entries, (void *) &sort_arg); - - /* Copy sorted oids to the prel's 'children' array */ - for(i = 0; i < children_count; i++) - dsm_children[i] = ranges[i].child_oid; + /* Clean resources */ + heap_endscan(scan); + UnregisterSnapshot(snapshot); + heap_close(rel, AccessShareLock); +} - /* Check if some ranges overlap */ - for(i = 0; i < children_count - 1; i++) - { - Datum cur_upper = PATHMAN_GET_DATUM(ranges[i].max, byVal), - next_lower = PATHMAN_GET_DATUM(ranges[i + 1].min, byVal); +/* + * Get constraint expression tree for a partition. + * + * build_check_constraint_name_internal() is used to build conname. + */ +static Expr * +get_partition_constraint_expr(Oid partition, AttrNumber part_attno) +{ + Oid conid; /* constraint Oid */ + char *conname; /* constraint name */ + HeapTuple con_tuple; + Datum conbin_datum; + bool conbin_isnull; + Expr *expr; /* expression tree for constraint */ + + conname = build_check_constraint_name_internal(partition, part_attno); + conid = get_relation_constraint_oid(partition, conname, false); + + con_tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(conid)); + conbin_datum = SysCacheGetAttr(CONSTROID, con_tuple, + Anum_pg_constraint_conbin, + &conbin_isnull); + if (conbin_isnull) + { + elog(DEBUG2, "conbin is null for constraint %s", conname); + pfree(conname); + return NULL; + } + pfree(conname); - bool overlap = DatumGetInt32(FunctionCall2(&tce->cmp_proc_finfo, - next_lower, - cur_upper)) < 0; - if (overlap) - { - elog(WARNING, "Partitions %s and %s overlap", - get_rel_name(ranges[i].child_oid), - get_rel_name(ranges[i + 1].child_oid)); + /* Finally we get a constraint expression tree */ + expr = (Expr *) stringToNode(TextDatumGetCString(conbin_datum)); - return false; /* keep compiler happy */ - } - } - } + /* Don't foreget to release syscache tuple */ + ReleaseSysCache(con_tuple); - return true; /* everything seems to be fine */ + return expr; } /* qsort comparison function for RangeEntries */ static int cmp_range_entries(const void *p1, const void *p2, void *arg) { - const RangeEntry *v1 = (const RangeEntry *) p1; - const RangeEntry *v2 = (const RangeEntry *) p2; - const cmp_range_entries_arg *sort_arg = (const cmp_range_entries_arg *) arg; + const RangeEntry *v1 = (const RangeEntry *) p1; + const RangeEntry *v2 = (const RangeEntry *) p2; + FmgrInfo *cmp_proc = (FmgrInfo *) arg; - return FunctionCall2(sort_arg->cmp_proc, - PATHMAN_GET_DATUM(v1->min, sort_arg->by_val), - PATHMAN_GET_DATUM(v2->min, sort_arg->by_val)); + return FunctionCall2(cmp_proc, v1->min, v2->min); } /* - * Validates range constraint. It MUST have the exact format: - * VARIABLE >= CONST AND VARIABLE < CONST + * Validates range constraint. It MUST have this exact format: + * + * VARIABLE >= CONST AND VARIABLE < CONST + * + * Writes 'min' & 'max' values on success. */ static bool -validate_range_constraint(Expr *expr, PartRelationInfo *prel, Datum *min, Datum *max) +validate_range_constraint(const Expr *expr, + const PartRelationInfo *prel, + Datum *min, + Datum *max) { - TypeCacheEntry *tce; - BoolExpr *boolexpr = (BoolExpr *) expr; - OpExpr *opexpr; + const TypeCacheEntry *tce; + const BoolExpr *boolexpr = (const BoolExpr *) expr; + const OpExpr *opexpr; /* it should be an AND operator on top */ if (!and_clause((Node *) expr)) @@ -601,7 +561,8 @@ validate_range_constraint(Expr *expr, PartRelationInfo *prel, Datum *min, Datum /* check that left operand is >= operator */ opexpr = (OpExpr *) linitial(boolexpr->args); - if (get_op_opfamily_strategy(opexpr->opno, tce->btree_opf) == BTGreaterEqualStrategyNumber) + if (BTGreaterEqualStrategyNumber == get_op_opfamily_strategy(opexpr->opno, + tce->btree_opf)) { if (!read_opexpr_const(opexpr, prel->attnum, min)) return false; @@ -611,7 +572,8 @@ validate_range_constraint(Expr *expr, PartRelationInfo *prel, Datum *min, Datum /* check that right operand is < operator */ opexpr = (OpExpr *) lsecond(boolexpr->args); - if (get_op_opfamily_strategy(opexpr->opno, tce->btree_opf) == BTLessStrategyNumber) + if (BTLessStrategyNumber == get_op_opfamily_strategy(opexpr->opno, + tce->btree_opf)) { if (!read_opexpr_const(opexpr, prel->attnum, max)) return false; @@ -626,141 +588,120 @@ validate_range_constraint(Expr *expr, PartRelationInfo *prel, Datum *min, Datum * Reads const value from expressions of kind: VAR >= CONST or VAR < CONST */ static bool -read_opexpr_const(OpExpr *opexpr, int varattno, Datum *val) +read_opexpr_const(const OpExpr *opexpr, AttrNumber varattno, Datum *val) { - Node *left = linitial(opexpr->args); - Node *right = lsecond(opexpr->args); + const Node *left = linitial(opexpr->args); + const Node *right = lsecond(opexpr->args); - if ( !IsA(left, Var) || !IsA(right, Const) ) + if (!IsA(left, Var) || !IsA(right, Const)) + return false; + if (((Var *) left)->varoattno != varattno) return false; - if ( ((Var*) left)->varattno != varattno ) + if (((Const *) right)->constisnull) return false; - *val = ((Const*) right)->constvalue; + + *val = ((Const *) right)->constvalue; return true; } /* - * Validate hash constraint. It MUST have the exact format - * VARIABLE % CONST = CONST + * Validate hash constraint. It MUST have this exact format: + * + * get_hash(TYPE_HASH_PROC(VALUE), PARTITIONS_COUNT) = CUR_PARTITION_HASH + * + * Writes 'part_hash' hash value for this partition on success. */ static bool -validate_hash_constraint(Expr *expr, PartRelationInfo *prel, int *hash) +validate_hash_constraint(const Expr *expr, + const PartRelationInfo *prel, + uint32 *part_hash) { - OpExpr *eqexpr; - TypeCacheEntry *tce; - FuncExpr *gethashfunc; - FuncExpr *funcexpr; - Var *var; + const TypeCacheEntry *tce; + const OpExpr *eq_expr; + const FuncExpr *get_hash_expr, + *type_hash_proc_expr; + const Var *var; /* partitioned column */ if (!IsA(expr, OpExpr)) return false; - eqexpr = (OpExpr *) expr; + eq_expr = (const OpExpr *) expr; - /* - * We expect get_hash() function on the left - * TODO: check that it is really the 'get_hash' function - */ - if (!IsA(linitial(eqexpr->args), FuncExpr)) + /* Check that left expression is a function call */ + if (!IsA(linitial(eq_expr->args), FuncExpr)) return false; - gethashfunc = (FuncExpr *) linitial(eqexpr->args); - /* Is this an equality operator? */ - tce = lookup_type_cache(gethashfunc->funcresulttype, TYPECACHE_BTREE_OPFAMILY); - if (get_op_opfamily_strategy(eqexpr->opno, tce->btree_opf) != BTEqualStrategyNumber) + get_hash_expr = (FuncExpr *) linitial(eq_expr->args); /* arg #1: get_hash(...) */ + + /* Is 'eqexpr' an equality operator? */ + tce = lookup_type_cache(get_hash_expr->funcresulttype, TYPECACHE_BTREE_OPFAMILY); + if (BTEqualStrategyNumber != get_op_opfamily_strategy(eq_expr->opno, + tce->btree_opf)) return false; - if (list_length(gethashfunc->args) == 2) + if (list_length(get_hash_expr->args) == 2) { - Node *first = linitial(gethashfunc->args); - Node *second = lsecond(gethashfunc->args); - Const *mod_result; + Node *first = linitial(get_hash_expr->args); /* arg #1: TYPE_HASH_PROC(VALUE) */ + Node *second = lsecond(get_hash_expr->args); /* arg #2: PARTITIONS_COUNT */ + Const *cur_partition_hash; /* hash value for this partition */ if (!IsA(first, FuncExpr) || !IsA(second, Const)) return false; - /* Check that function is the base hash function for the type */ - funcexpr = (FuncExpr *) first; - if (funcexpr->funcid != prel->hash_proc || - (!IsA(linitial(funcexpr->args), Var) && !IsA(linitial(funcexpr->args), - RelabelType))) + type_hash_proc_expr = (FuncExpr *) first; + + /* Check that function is indeed TYPE_HASH_PROC */ + if (type_hash_proc_expr->funcid != prel->hash_proc || + !(IsA(linitial(type_hash_proc_expr->args), Var) || + IsA(linitial(type_hash_proc_expr->args), RelabelType))) + { return false; + } - /* Check that argument is partitioning key attribute */ - if (IsA(linitial(funcexpr->args), RelabelType)) - var = (Var *) ((RelabelType *) linitial(funcexpr->args))->arg; + /* Extract argument into 'var' */ + if (IsA(linitial(type_hash_proc_expr->args), RelabelType)) + var = (Var *) ((RelabelType *) linitial(type_hash_proc_expr->args))->arg; else - var = (Var *) linitial(funcexpr->args); - if (var->varattno != prel->attnum) - return false; + var = (Var *) linitial(type_hash_proc_expr->args); - /* Check that const value less than partitions count */ - if (DatumGetInt32(((Const*) second)->constvalue) != prel->children.elem_count) + /* Check that 'var' is the partitioning key attribute */ + if (var->varoattno != prel->attnum) return false; - if (!IsA(lsecond(eqexpr->args), Const)) + /* Check that PARTITIONS_COUNT is equal to total amount of partitions */ + if (DatumGetUInt32(((Const*) second)->constvalue) != PrelChildrenCount(prel)) return false; - mod_result = lsecond(eqexpr->args); - *hash = DatumGetInt32(mod_result->constvalue); - return true; - } - - return false; -} - -/* - * Create range restrictions table - */ -void -create_range_restrictions_hashtable() -{ - HASHCTL ctl; - - memset(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(RelationKey); - ctl.entrysize = sizeof(RangeRelation); - range_restrictions = ShmemInitHash("pg_pathman range restrictions", - 1024, 1024, &ctl, HASH_ELEM | HASH_BLOBS); -} + /* Check that CUR_PARTITION_HASH is Const */ + if (!IsA(lsecond(eq_expr->args), Const)) + return false; -/* - * Remove partitions from pathman's cache - */ -void -remove_relation_info(Oid relid) -{ - PartRelationInfo *prel; - RangeRelation *rangerel; - RelationKey key; + cur_partition_hash = lsecond(eq_expr->args); - key.dbid = MyDatabaseId; - key.relid = relid; + /* Check that CUR_PARTITION_HASH is NOT NULL */ + if (cur_partition_hash->constisnull) + return false; - prel = get_pathman_relation_info(relid, NULL); + *part_hash = DatumGetUInt32(cur_partition_hash->constvalue); + if (*part_hash >= PrelChildrenCount(prel)) + return false; - /* If there is nothing to remove then just return */ - if (!prel) - { - elog(DEBUG2, "pg_pathman's cache does not contain relation %u", relid); - return; + return true; /* everything seems to be ok */ } - /* Remove children relations */ - switch (prel->parttype) - { - case PT_HASH: - free_dsm_array(&prel->children); - break; - - case PT_RANGE: - rangerel = get_pathman_range_relation(relid, NULL); - free_dsm_array(&rangerel->ranges); - free_dsm_array(&prel->children); - hash_search(range_restrictions, (const void *) &key, HASH_REMOVE, NULL); - break; - } + return false; +} - prel->children_count = 0; - hash_search(relations, (const void *) &key, HASH_REMOVE, 0); +/* needed for find_inheritance_children_array() function */ +static int +oid_cmp(const void *p1, const void *p2) +{ + Oid v1 = *((const Oid *) p1); + Oid v2 = *((const Oid *) p2); + + if (v1 < v2) + return -1; + if (v1 > v2) + return 1; + return 0; } diff --git a/src/init.h b/src/init.h new file mode 100644 index 0000000000..cc07aa0d46 --- /dev/null +++ b/src/init.h @@ -0,0 +1,47 @@ +/* ------------------------------------------------------------------------ + * + * init.h + * Initialization functions + * + * Copyright (c) 2015-2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + +#ifndef PATHMAN_INIT_H +#define PATHMAN_INIT_H + +#include "relation_info.h" + +#include "postgres.h" +#include "storage/lmgr.h" +#include "utils/snapshot.h" +#include "utils/hsearch.h" + + +extern HTAB *partitioned_rels; +extern HTAB *parent_cache; +extern bool initialization_needed; + + +Size estimate_pathman_shmem_size(void); +void init_local_config(void); +void init_shmem_config(void); +void load_config(void); + +void fill_prel_with_partitions(const Oid *partitions, + const uint32 parts_count, + PartRelationInfo *prel); + +Oid *find_inheritance_children_array(Oid parentrelId, + LOCKMODE lockmode, + uint32 *size); + +char *build_check_constraint_name_internal(Oid relid, + AttrNumber attno); + +bool pathman_config_contains_relation(Oid relid, + Datum *values, + bool *isnull); + +#endif diff --git a/src/nodes_common.c b/src/nodes_common.c index cacd21205d..849a778501 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -7,12 +7,13 @@ * * ------------------------------------------------------------------------ */ -#include "postgres.h" + #include "nodes_common.h" #include "runtimeappend.h" +#include "utils.h" + #include "optimizer/restrictinfo.h" #include "utils/memutils.h" -#include "utils.h" /* Allocation settings */ @@ -246,7 +247,7 @@ get_partition_oids(List *ranges, int *n, PartRelationInfo *prel) int allocated = INITIAL_ALLOC_NUM; int used = 0; Oid *result = (Oid *) palloc(allocated * sizeof(Oid)); - Oid *children = dsm_array_get_pointer(&prel->children, true); + Oid *children = PrelGetChildrenArray(prel, true); foreach (range_cell, ranges) { @@ -262,7 +263,7 @@ get_partition_oids(List *ranges, int *n, PartRelationInfo *prel) result = repalloc(result, allocated * sizeof(Oid)); } - Assert(i < prel->children_count); + Assert(i < PrelChildrenCount(prel)); result[used++] = children[i]; } } @@ -500,7 +501,7 @@ rescan_append_common(CustomScanState *node) Oid *parts; int nparts; - ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); + ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); InitWalkerContextCustomNode(&scan_state->wcxt, scan_state->prel, econtext, CurrentMemoryContext, false, diff --git a/src/nodes_common.h b/src/nodes_common.h index a456baca12..07aaa6e0dc 100644 --- a/src/nodes_common.h +++ b/src/nodes_common.h @@ -7,11 +7,15 @@ * * ------------------------------------------------------------------------ */ + #ifndef NODES_COMMON_H #define NODES_COMMON_H +#include "relation_info.h" + +#include "postgres.h" #include "commands/explain.h" -#include "pathman.h" +#include "optimizer/planner.h" /* diff --git a/src/partition_filter.c b/src/partition_filter.c index 5f13dc96ff..bfa2d7d1a1 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -1,5 +1,17 @@ +/* ------------------------------------------------------------------------ + * + * partition_filter.c + * Select partition for INSERT operation + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + #include "partition_filter.h" +#include "nodes_common.h" #include "utils.h" + #include "utils/guc.h" #include "utils/memutils.h" #include "nodes/nodeFuncs.h" @@ -352,12 +364,12 @@ partition_filter_visitor(Plan *plan, void *context) ListCell *lc1, *lc2; - Assert(rtable && IsA(rtable, List)); - /* Skip if not ModifyTable with 'INSERT' command */ if (!IsA(modify_table, ModifyTable) || modify_table->operation != CMD_INSERT) return; + Assert(rtable && IsA(rtable, List)); + forboth (lc1, modify_table->plans, lc2, modify_table->resultRelations) { Index rindex = lfirst_int(lc2); diff --git a/src/partition_filter.h b/src/partition_filter.h index 71b3a89f0e..dfcadcac34 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -1,10 +1,22 @@ +/* ------------------------------------------------------------------------ + * + * partition_filter.h + * Select partition for INSERT operation + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + #ifndef RUNTIME_INSERT_H #define RUNTIME_INSERT_H -#include "postgres.h" - +#include "relation_info.h" #include "pathman.h" -#include "nodes_common.h" + +#include "postgres.h" +#include "commands/explain.h" +#include "optimizer/planner.h" typedef struct diff --git a/src/pathman.h b/src/pathman.h index 6e3a203974..f7cecdac9f 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -7,23 +7,27 @@ * * ------------------------------------------------------------------------ */ + #ifndef PATHMAN_H #define PATHMAN_H +#include "dsm_array.h" +#include "init.h" +#include "relation_info.h" +#include "rangeset.h" + #include "postgres.h" #include "utils/date.h" -#include "utils/hsearch.h" #include "utils/snapshot.h" #include "utils/typcache.h" -#include "nodes/pg_list.h" #include "nodes/makefuncs.h" #include "nodes/primnodes.h" #include "nodes/execnodes.h" #include "optimizer/planner.h" #include "parser/parsetree.h" -#include "storage/dsm.h" #include "storage/lwlock.h" + /* Check PostgreSQL version */ #if PG_VERSION_NUM < 90500 #error "You are trying to build pg_pathman with PostgreSQL version lower than 9.5. Please, check your environment." @@ -32,104 +36,29 @@ #define ALL NIL #define INITIAL_BLOCKS_COUNT 8192 -/* - * Partitioning type - */ -typedef enum PartType -{ - PT_HASH = 1, - PT_RANGE -} PartType; /* - * Dynamic shared memory array + * Definitions for the "pathman_config" table. */ -typedef struct DsmArray -{ - dsm_handle segment; - size_t offset; - size_t elem_count; - size_t entry_size; -} DsmArray; +#define PATHMAN_CONFIG "pathman_config" +#define Natts_pathman_config 5 +#define Anum_pathman_config_id 1 +#define Anum_pathman_config_partrel 2 +#define Anum_pathman_config_attname 3 +#define Anum_pathman_config_parttype 4 +#define Anum_pathman_config_range_interval 5 -/* - * Hashtable key for relations - */ -typedef struct RelationKey -{ - Oid dbid; - Oid relid; -} RelationKey; +#define PATHMAN_CONFIG_partrel_idx "pathman_config_partrel_idx" -/* - * PartRelationInfo - * Per-relation partitioning information - * - * oid - parent table's Oid - * children - list of children's Oids - * parttype - partitioning type (HASH, LIST or RANGE) - * attnum - attribute number of parent relation's column - * atttype - attribute type - * atttypmod - attrubute type modifier - * cmp_proc - compare fuction for a type of the attribute - * hash_proc - hash function for a type of the attribute - */ -typedef struct PartRelationInfo -{ - RelationKey key; - DsmArray children; - int children_count; - PartType parttype; - Index attnum; - Oid atttype; - int32 atttypmod; - Oid cmp_proc; - Oid hash_proc; -} PartRelationInfo; /* - * Child relation info for HASH partitioning + * pg_pathman's global state. */ -typedef struct HashRelationKey -{ - uint32 hash; - Oid parent_oid; -} HashRelationKey; - -typedef struct HashRelation -{ - HashRelationKey key; - Oid child_oid; -} HashRelation; - -/* - * Child relation info for RANGE partitioning - */ -typedef struct RangeEntry -{ - Oid child_oid; - -#ifdef HAVE_INT64_TIMESTAMP - int64 min; - int64 max; -#else - double min; - double max; -#endif -} RangeEntry; - -typedef struct RangeRelation -{ - RelationKey key; - bool by_val; - DsmArray ranges; -} RangeRelation; - typedef struct PathmanState { - LWLock *load_config_lock; - LWLock *dsm_init_lock; - LWLock *edit_partitions_lock; + LWLock *dsm_init_lock, + *load_config_lock, + *edit_partitions_lock; DsmArray databases; } PathmanState; @@ -155,100 +84,33 @@ extern List *inheritance_disabled_relids; extern bool pg_pathman_enable; extern PathmanState *pmstate; -#define PATHMAN_GET_DATUM(value, by_val) ( (by_val) ? (Datum) (value) : PointerGetDatum(&value) ) - -typedef struct { - bool ir_valid : 1; - bool ir_lossy : 1; - uint32 ir_lower : 31; - uint32 ir_upper : 31; -} IndexRange; - -#define RANGE_MASK 0xEFFFFFFF -#define InvalidIndexRange { false, false, 0, 0 } +#define PATHMAN_GET_DATUM(value, by_val) \ + ( (by_val) ? (Datum) (value) : PointerGetDatum(&value) ) -inline static IndexRange -make_irange(uint32 lower, uint32 upper, bool lossy) -{ - IndexRange result; +/* + * Check if pg_pathman is initialized & enabled. + */ +#define IsPathmanReady() ( !initialization_needed && pg_pathman_enable ) - result.ir_valid = true; - result.ir_lossy = lossy; - result.ir_lower = (lower & RANGE_MASK); - result.ir_upper = (upper & RANGE_MASK); +#define IsPathmanEnabled() ( pg_pathman_enable ) - return result; -} +#define DisablePathman() \ + do { \ + pg_pathman_enable = false; \ + } while (0) -inline static IndexRange * -alloc_irange(IndexRange irange) -{ - IndexRange *result = (IndexRange *) palloc(sizeof(IndexRange)); - - memcpy((void *) result, (void *) &irange, sizeof(IndexRange)); - - return result; -} - -#define lfirst_irange(lc) ( *(IndexRange *) lfirst(lc) ) -#define lappend_irange(list, irange) ( lappend((list), alloc_irange(irange)) ) -#define lcons_irange(irange, list) ( lcons(alloc_irange(irange), (list)) ) -#define list_make1_irange(irange) ( lcons(alloc_irange(irange), NIL) ) -#define llast_irange(list) ( lfirst_irange(list_tail(list)) ) -#define linitial_irange(list) ( lfirst_irange(list_head(list)) ) - - -extern HTAB *relations; -extern HTAB *range_restrictions; -extern bool initialization_needed; - - -/* rangeset.c */ -bool irange_intersects(IndexRange a, IndexRange b); -bool irange_conjuncted(IndexRange a, IndexRange b); -IndexRange irange_union(IndexRange a, IndexRange b); -IndexRange irange_intersect(IndexRange a, IndexRange b); -List *irange_list_union(List *a, List *b); -List *irange_list_intersect(List *a, List *b); -int irange_list_length(List *rangeset); -bool irange_list_find(List *rangeset, int index, bool *lossy); - -/* Dynamic shared memory functions */ -Size get_dsm_shared_size(void); -void init_dsm_config(void); -bool init_dsm_segment(size_t blocks_count, size_t block_size); -void init_dsm_table(size_t block_size, size_t start, size_t end); -void alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count); -void free_dsm_array(DsmArray *arr); -void resize_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count); -void *dsm_array_get_pointer(const DsmArray *arr, bool copy); -dsm_handle get_dsm_array_segment(void); -void attach_dsm_array_segment(void); - -/* initialization functions */ -Size pathman_memsize(void); -void init_shmem_config(void); -void load_config(void); -void create_relations_hashtable(void); -void create_hash_restrictions_hashtable(void); -void create_range_restrictions_hashtable(void); -void load_relations(bool reinitialize); -void load_partitions(Oid parent_oid, Snapshot snapshot); -void remove_relation_info(Oid relid); /* utility functions */ int append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte, int index, Oid childOID, List *wrappers); -PartRelationInfo *get_pathman_relation_info(Oid relid, bool *found); -RangeRelation *get_pathman_range_relation(Oid relid, bool *found); + search_rangerel_result search_range_partition_eq(const Datum value, FmgrInfo *cmp_func, - const RangeRelation *rangerel, - RangeEntry *out_rentry); -char *get_extension_schema(void); + const PartRelationInfo *prel, + RangeEntry *out_re); + Oid create_partitions_bg_worker(Oid relid, Datum value, Oid value_type); -Oid create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed); uint32 make_hash(uint32 value, uint32 partitions); void handle_modification_query(Query *parse); @@ -333,7 +195,6 @@ typedef struct #define WcxtHasExprContext(wcxt) ( (wcxt)->econtext ) void select_range_partitions(const Datum value, - const bool byVal, FmgrInfo *cmp_func, const RangeEntry *ranges, const size_t nranges, diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 6e4dacc7dd..6920d43a93 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -8,7 +8,15 @@ * * ------------------------------------------------------------------------ */ + #include "pathman.h" +#include "init.h" +#include "hooks.h" +#include "utils.h" +#include "partition_filter.h" +#include "runtimeappend.h" +#include "runtime_merge_append.h" + #include "postgres.h" #include "fmgr.h" #include "miscadmin.h" @@ -37,18 +45,14 @@ #include "storage/ipc.h" #include "catalog/pg_type.h" #include "foreign/fdwapi.h" -#include "hooks.h" -#include "utils.h" -#include "partition_filter.h" -#include "runtimeappend.h" -#include "runtime_merge_append.h" + PG_MODULE_MAGIC; List *inheritance_disabled_relids = NIL; List *inheritance_enabled_relids = NIL; -bool pg_pathman_enable; +bool pg_pathman_enable = true; PathmanState *pmstate; @@ -106,6 +110,7 @@ static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo #define ExtractConst(wcxt, node) \ ( IsA((node), Param) ? extract_const((wcxt), (Param *) (node)) : ((Const *) (node)) ) + /* * Entry point */ @@ -117,11 +122,12 @@ _PG_init(void) elog(ERROR, "Pathman module must be initialized in postmaster. " "Put the following line to configuration file: " "shared_preload_libraries='pg_pathman'"); + initialization_needed = false; } /* Request additional shared resources */ - RequestAddinShmemSpace(pathman_memsize()); + RequestAddinShmemSpace(estimate_pathman_shmem_size()); RequestAddinLWLocks(3); /* Initialize 'next' hook pointers */ @@ -141,7 +147,7 @@ _PG_init(void) init_runtime_merge_append_static_data(); init_partition_filter_static_data(); - /* Main toggle */ + /* Main toggle, load_config() will enable it */ DefineCustomBoolVariable("pg_pathman.enable", "Enables pg_pathman's optimizations during the planner stage", NULL, @@ -154,26 +160,6 @@ _PG_init(void) NULL); } -PartRelationInfo * -get_pathman_relation_info(Oid relid, bool *found) -{ - RelationKey key; - - key.dbid = MyDatabaseId; - key.relid = relid; - return hash_search(relations, (const void *) &key, HASH_FIND, found); -} - -RangeRelation * -get_pathman_range_relation(Oid relid, bool *found) -{ - RelationKey key; - - key.dbid = MyDatabaseId; - key.relid = relid; - return hash_search(range_restrictions, (const void *) &key, HASH_FIND, found); -} - /* * Disables inheritance for partitioned by pathman relations. It must be done to * prevent PostgresSQL from full search. @@ -301,7 +287,7 @@ disable_inheritance_subselect_walker(Node *node, void *context) } /* - * Checks if query is affects only one partition. If true then substitute + * Checks if query affects only one partition. If true then substitute */ void handle_modification_query(Query *parse) @@ -325,7 +311,7 @@ handle_modification_query(Query *parse) return; /* Parse syntax tree and extract partition ranges */ - ranges = list_make1_irange(make_irange(0, prel->children_count - 1, false)); + ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); expr = (Expr *) eval_const_expressions(NULL, parse->jointree->quals); if (!expr) return; @@ -344,7 +330,7 @@ handle_modification_query(Query *parse) IndexRange irange = linitial_irange(ranges); if (irange.ir_lower == irange.ir_upper) { - Oid *children = (Oid *) dsm_array_get_pointer(&prel->children, true); + Oid *children = PrelGetChildrenArray(prel, true); rte->relid = children[irange.ir_lower]; rte->inh = false; } @@ -388,8 +374,6 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, rel->rows = parent_rows; rel->width = rint(parent_size / parent_rows); - // for (i = 0; i < nattrs; i++) - // rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows); rel->tuples = parent_rows; } @@ -644,14 +628,10 @@ wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysTrue) return (Node *)result; } else - { return copyObject(wrap->orig); - } } else - { return copyObject(wrap->orig); - } } /* @@ -663,10 +643,10 @@ wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysTrue) void refresh_walker_context_ranges(WalkerContext *context) { - RangeRelation *rangerel; MemoryContext old_mcxt; + Oid partitioned_table = context->prel->key; - rangerel = get_pathman_range_relation(context->prel->key.relid, NULL); + context->prel = get_pathman_relation_info(partitioned_table, NULL); /* Clear old cached data */ clear_walker_context(context); @@ -674,8 +654,8 @@ refresh_walker_context_ranges(WalkerContext *context) /* Switch to long-living context which should store data */ old_mcxt = MemoryContextSwitchTo(context->persistent_mcxt); - context->ranges = dsm_array_get_pointer(&rangerel->ranges, true); - context->nranges = rangerel->ranges.elem_count; + context->ranges = PrelGetRangesArray(context->prel, true); + context->nranges = PrelChildrenCount(context->prel); /* Switch back */ MemoryContextSwitchTo(old_mcxt); @@ -689,8 +669,8 @@ clear_walker_context(WalkerContext *context) { if (context->ranges) { - pfree((void *) context->ranges); context->ranges = NULL; + context->nranges = 0; } } @@ -730,7 +710,8 @@ walk_expr_tree(Expr *expr, WalkerContext *context) result = (WrapperNode *) palloc(sizeof(WrapperNode)); result->orig = (const Node *) expr; result->args = NIL; - result->rangeset = list_make1_irange(make_irange(0, context->prel->children_count - 1, true)); + result->rangeset = list_make1_irange( + make_irange(0, PrelChildrenCount(context->prel) - 1, true)); result->paramsel = 1.0; return result; } @@ -749,14 +730,14 @@ finish_least_greatest(WrapperNode *wrap, WalkerContext *context) greatest = DatumGetInt32(context->greatest); List *rangeset = NIL; - if (greatest - least + 1 < context->prel->children_count) + if (greatest - least + 1 < PrelChildrenCount(context->prel)) { uint32 value, hash; for (value = least; value <= greatest; value++) { - hash = make_hash(value, context->prel->children_count); + hash = make_hash(value, PrelChildrenCount(context->prel)); rangeset = irange_list_union(rangeset, list_make1_irange(make_irange(hash, hash, true))); } @@ -775,7 +756,6 @@ finish_least_greatest(WrapperNode *wrap, WalkerContext *context) void select_range_partitions(const Datum value, - const bool byVal, FmgrInfo *cmp_func, const RangeEntry *ranges, const size_t nranges, @@ -813,10 +793,8 @@ select_range_partitions(const Datum value, Assert(cmp_func); /* Corner cases */ - cmp_min = FunctionCall2(cmp_func, value, - PATHMAN_GET_DATUM(ranges[startidx].min, byVal)), - cmp_max = FunctionCall2(cmp_func, value, - PATHMAN_GET_DATUM(ranges[endidx].max, byVal)); + cmp_min = FunctionCall2(cmp_func, value, ranges[startidx].min), + cmp_max = FunctionCall2(cmp_func, value, ranges[endidx].max); if ((cmp_min <= 0 && strategy == BTLessStrategyNumber) || (cmp_min < 0 && (strategy == BTLessEqualStrategyNumber || @@ -859,10 +837,8 @@ select_range_partitions(const Datum value, current_re = &ranges[i]; - cmp_min = FunctionCall2(cmp_func, value, - PATHMAN_GET_DATUM(current_re->min, byVal)); - cmp_max = FunctionCall2(cmp_func, value, - PATHMAN_GET_DATUM(current_re->max, byVal)); + cmp_min = FunctionCall2(cmp_func, value, current_re->min); + cmp_max = FunctionCall2(cmp_func, value, current_re->max); is_less = (cmp_min < 0 || (cmp_min == 0 && strategy == BTLessStrategyNumber)); is_greater = (cmp_max > 0 || (cmp_max >= 0 && strategy != BTLessStrategyNumber)); @@ -979,7 +955,8 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, if (strategy == BTEqualStrategyNumber) { Datum value = OidFunctionCall1(prel->hash_proc, c->constvalue); - uint32 hash = make_hash(DatumGetUInt32(value), prel->children_count); + uint32 hash = make_hash(DatumGetUInt32(value), + PrelChildrenCount(prel)); result->rangeset = list_make1_irange(make_irange(hash, hash, true)); @@ -987,14 +964,12 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, } case PT_RANGE: - if (get_pathman_range_relation(context->prel->key.relid, NULL)) { /* Refresh 'ranges' cache if necessary */ if (!context->ranges) refresh_walker_context_ranges(context); select_range_partitions(c->constvalue, - c->constbyval, &cmp_func, context->ranges, context->nranges, @@ -1004,7 +979,9 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, } } - result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, true)); + result->rangeset = list_make1_irange(make_irange(0, + PrelChildrenCount(prel) - 1, + true)); result->paramsel = 1.0; } @@ -1030,11 +1007,13 @@ handle_binary_opexpr_param(const PartRelationInfo *prel, tce = lookup_type_cache(vartype, TYPECACHE_BTREE_OPFAMILY); strategy = get_op_opfamily_strategy(expr->opno, tce->btree_opf); - result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, true)); + result->rangeset = list_make1_irange(make_irange(0, + PrelChildrenCount(prel) - 1, + true)); if (strategy == BTEqualStrategyNumber) { - result->paramsel = 1.0 / (double) prel->children_count; + result->paramsel = 1.0 / (double) PrelChildrenCount(prel); } else if (prel->parttype == PT_RANGE && strategy > 0) { @@ -1058,20 +1037,17 @@ make_hash(uint32 value, uint32 partitions) search_rangerel_result search_range_partition_eq(const Datum value, FmgrInfo *cmp_func, - const RangeRelation *rangerel, - RangeEntry *out_rentry) /* actual result */ + const PartRelationInfo *prel, + RangeEntry *out_re) /* returned RangeEntry */ { RangeEntry *ranges; size_t nranges; WrapperNode result; - Assert(rangerel); - - ranges = dsm_array_get_pointer(&rangerel->ranges, true); - nranges = rangerel->ranges.elem_count; + ranges = PrelGetRangesArray(prel, true); + nranges = PrelChildrenCount(prel); select_range_partitions(value, - rangerel->by_val, cmp_func, ranges, nranges, @@ -1080,12 +1056,10 @@ search_range_partition_eq(const Datum value, if (result.found_gap) { - pfree(ranges); return SEARCH_RANGEREL_GAP; } else if (result.rangeset == NIL) { - pfree(ranges); return SEARCH_RANGEREL_OUT_OF_RANGE; } else @@ -1097,12 +1071,11 @@ search_range_partition_eq(const Datum value, Assert(irange.ir_valid); /* Write result to the 'out_rentry' if necessary */ - if (out_rentry) - memcpy((void *) out_rentry, - (void *) &ranges[irange.ir_lower], + if (out_re) + memcpy((void *) out_re, + (const void *) &ranges[irange.ir_lower], sizeof(RangeEntry)); - pfree(ranges); return SEARCH_RANGEREL_FOUND; } } @@ -1134,7 +1107,7 @@ handle_const(const Const *c, WalkerContext *context) if (!context->for_insert) { result->rangeset = list_make1_irange(make_irange(0, - prel->children_count - 1, + PrelChildrenCount(prel) - 1, true)); result->paramsel = 1.0; @@ -1146,7 +1119,8 @@ handle_const(const Const *c, WalkerContext *context) case PT_HASH: { Datum value = OidFunctionCall1(prel->hash_proc, c->constvalue); - uint32 hash = make_hash(DatumGetUInt32(value), prel->children_count); + uint32 hash = make_hash(DatumGetUInt32(value), + PrelChildrenCount(prel)); result->rangeset = list_make1_irange(make_irange(hash, hash, true)); } break; @@ -1162,7 +1136,6 @@ handle_const(const Const *c, WalkerContext *context) refresh_walker_context_ranges(context); select_range_partitions(c->constvalue, - c->constbyval, &tce->cmp_proc_finfo, context->ranges, context->nranges, @@ -1209,7 +1182,9 @@ handle_opexpr(const OpExpr *expr, WalkerContext *context) } } - result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, true)); + result->rangeset = list_make1_irange(make_irange(0, + PrelChildrenCount(prel) - 1, + true)); result->paramsel = 1.0; return result; } @@ -1280,7 +1255,9 @@ handle_boolexpr(const BoolExpr *expr, WalkerContext *context) result->paramsel = 1.0; if (expr->boolop == AND_EXPR) - result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, false)); + result->rangeset = list_make1_irange(make_irange(0, + PrelChildrenCount(prel) - 1, + false)); else result->rangeset = NIL; @@ -1301,7 +1278,9 @@ handle_boolexpr(const BoolExpr *expr, WalkerContext *context) result->paramsel *= arg->paramsel; break; default: - result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, false)); + result->rangeset = list_make1_irange(make_irange(0, + PrelChildrenCount(prel) - 1, + false)); break; } } @@ -1383,7 +1362,7 @@ handle_arrexpr(const ScalarArrayOpExpr *expr, WalkerContext *context) { /* Invoke base hash function for value type */ value = OidFunctionCall1(prel->hash_proc, elem_values[i]); - hash = make_hash(DatumGetUInt32(value), prel->children_count); + hash = make_hash(DatumGetUInt32(value), PrelChildrenCount(prel)); result->rangeset = irange_list_union(result->rangeset, list_make1_irange(make_irange(hash, hash, true))); } @@ -1399,7 +1378,9 @@ handle_arrexpr(const ScalarArrayOpExpr *expr, WalkerContext *context) result->paramsel = DEFAULT_INEQ_SEL; handle_arrexpr_return: - result->rangeset = list_make1_irange(make_irange(0, prel->children_count - 1, true)); + result->rangeset = list_make1_irange(make_irange(0, + PrelChildrenCount(prel) - 1, + true)); return result; } diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 3109c9efa2..3970eb7428 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -9,13 +9,16 @@ */ #include "pathman.h" +#include "init.h" +#include "utils.h" + #include "access/nbtree.h" #include "access/xact.h" +#include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/typcache.h" #include "utils/array.h" #include "utils/memutils.h" -#include "utils.h" /* declarations */ @@ -32,222 +35,15 @@ PG_FUNCTION_INFO_V1( get_min_range_value ); PG_FUNCTION_INFO_V1( get_max_range_value ); PG_FUNCTION_INFO_V1( get_type_hash_func ); PG_FUNCTION_INFO_V1( get_hash ); +PG_FUNCTION_INFO_V1( build_check_constraint_name_attnum ); +PG_FUNCTION_INFO_V1( build_check_constraint_name_attname ); -/* - * Partition-related operation type. - */ -typedef enum -{ - EV_ON_PART_CREATED = 1, - EV_ON_PART_UPDATED, - EV_ON_PART_REMOVED -} part_event_type; - -/* - * We have to reset shared memory cache each time a transaction - * containing a partitioning-related operation has been rollbacked, - * hence we need to pass a partitioned table's Oid & some other stuff. - * - * Note: 'relname' cannot be fetched within - * Xact callbacks, so we have to store it here. - */ -typedef struct part_abort_arg part_abort_arg; - -struct part_abort_arg -{ - Oid partitioned_table_relid; - char *relname; - - bool is_subxact; /* needed for correct callback removal */ - SubTransactionId subxact_id; /* necessary for detecting specific subxact */ - part_abort_arg *xact_cb_arg; /* points to the parent Xact's arg */ - - part_event_type event; /* created | updated | removed partitions */ - - bool expired; /* set by (Sub)Xact when a job is done */ -}; - - -static part_abort_arg * make_part_abort_arg(Oid partitioned_table, - part_event_type event, - bool is_subxact, - part_abort_arg *xact_cb_arg); - -static void handle_part_event_cancellation(const part_abort_arg *arg); -static void on_xact_abort_callback(XactEvent event, void *arg); -static void on_subxact_abort_callback(SubXactEvent event, SubTransactionId mySubid, - SubTransactionId parentSubid, void *arg); - -static void remove_on_xact_abort_callbacks(void *arg); -static void add_on_xact_abort_callbacks(Oid partitioned_table, part_event_type event); - static void on_partitions_created_internal(Oid partitioned_table, bool add_callbacks); static void on_partitions_updated_internal(Oid partitioned_table, bool add_callbacks); static void on_partitions_removed_internal(Oid partitioned_table, bool add_callbacks); -/* Construct part_abort_arg for callbacks in TopTransactionContext. */ -static part_abort_arg * -make_part_abort_arg(Oid partitioned_table, part_event_type event, - bool is_subxact, part_abort_arg *xact_cb_arg) -{ - part_abort_arg *arg = MemoryContextAlloc(TopTransactionContext, - sizeof(part_abort_arg)); - - const char *relname = get_rel_name(partitioned_table); - - /* Fill in Oid & relation name */ - arg->partitioned_table_relid = partitioned_table; - arg->relname = MemoryContextStrdup(TopTransactionContext, relname); - arg->is_subxact = is_subxact; - arg->subxact_id = GetCurrentSubTransactionId(); /* for SubXact callback */ - arg->xact_cb_arg = xact_cb_arg; - arg->event = event; - arg->expired = false; - - return arg; -} - -/* Revert shared memory cache changes iff xact has been aborted. */ -static void -handle_part_event_cancellation(const part_abort_arg *arg) -{ -#define DO_NOT_USE_CALLBACKS false /* just to clarify intentions */ - - switch (arg->event) - { - case EV_ON_PART_CREATED: - { - elog(WARNING, "Partitioning of table '%s' has been aborted, " - "removing partitions from pg_pathman's cache", - arg->relname); - - on_partitions_removed_internal(arg->partitioned_table_relid, - DO_NOT_USE_CALLBACKS); - } - break; - - case EV_ON_PART_UPDATED: - { - elog(WARNING, "All changes in partitioned table " - "'%s' will be discarded", - arg->relname); - - on_partitions_updated_internal(arg->partitioned_table_relid, - DO_NOT_USE_CALLBACKS); - } - break; - - case EV_ON_PART_REMOVED: - { - elog(WARNING, "All changes in partitioned table " - "'%s' will be discarded", - arg->relname); - - on_partitions_created_internal(arg->partitioned_table_relid, - DO_NOT_USE_CALLBACKS); - } - break; - - default: - elog(ERROR, "Unknown event spotted in xact callback"); - } -} - -/* - * Add & remove xact callbacks - */ - -static void -remove_on_xact_abort_callbacks(void *arg) -{ - part_abort_arg *parg = (part_abort_arg *) arg; - - elog(DEBUG2, "remove_on_xact_abort_callbacks() " - "[is_subxact = %s, relname = '%s', event = %u] " - "triggered for relation %u", - (parg->is_subxact ? "true" : "false"), parg->relname, - parg->event, parg->partitioned_table_relid); - - /* Is this a SubXact callback or not? */ - if (!parg->is_subxact) - UnregisterXactCallback(on_xact_abort_callback, arg); - else - UnregisterSubXactCallback(on_subxact_abort_callback, arg); - - pfree(arg); -} - -static void -add_on_xact_abort_callbacks(Oid partitioned_table, part_event_type event) -{ - part_abort_arg *xact_cb_arg = make_part_abort_arg(partitioned_table, - event, false, NULL); - - RegisterXactCallback(on_xact_abort_callback, (void *) xact_cb_arg); - execute_on_xact_mcxt_reset(TopTransactionContext, - remove_on_xact_abort_callbacks, - xact_cb_arg); - - /* Register SubXact callback if necessary */ - if (IsSubTransaction()) - { - /* - * SubXact callback's arg contains a pointer to the parent - * Xact callback's arg. This will allow it to 'expire' both - * args and to prevent Xact's callback from doing anything - */ - void *subxact_cb_arg = make_part_abort_arg(partitioned_table, event, - true, xact_cb_arg); - - RegisterSubXactCallback(on_subxact_abort_callback, subxact_cb_arg); - execute_on_xact_mcxt_reset(CurTransactionContext, - remove_on_xact_abort_callbacks, - subxact_cb_arg); - } -} - -/* - * Xact & SubXact callbacks - */ - -static void -on_xact_abort_callback(XactEvent event, void *arg) -{ - part_abort_arg *parg = (part_abort_arg *) arg; - - /* Check that this is an aborted Xact & action has not expired yet */ - if ((event == XACT_EVENT_ABORT || event == XACT_EVENT_PARALLEL_ABORT) && - !parg->expired) - { - handle_part_event_cancellation(parg); - - /* Set expiration flag */ - parg->expired = true; - } -} - -static void -on_subxact_abort_callback(SubXactEvent event, SubTransactionId mySubid, - SubTransactionId parentSubid, void *arg) -{ - part_abort_arg *parg = (part_abort_arg *) arg; - - Assert(parg->subxact_id != InvalidSubTransactionId); - - /* Check if this is an aborted SubXact we've been waiting for */ - if (event == SUBXACT_EVENT_ABORT_SUB && - mySubid <= parg->subxact_id && !parg->expired) - { - handle_part_event_cancellation(parg); - - /* Now set expiration flags to disable Xact callback */ - parg->xact_cb_arg->expired = true; - parg->expired = true; - } -} - /* * Callbacks */ @@ -258,14 +54,6 @@ on_partitions_created_internal(Oid partitioned_table, bool add_callbacks) elog(DEBUG2, "on_partitions_created() [add_callbacks = %s] " "triggered for relation %u", (add_callbacks ? "true" : "false"), partitioned_table); - - LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); - load_relations(false); - LWLockRelease(pmstate->load_config_lock); - - /* Register hooks that will clear shmem cache if needed */ - if (add_callbacks) - add_on_xact_abort_callbacks(partitioned_table, EV_ON_PART_CREATED); } static void @@ -274,18 +62,6 @@ on_partitions_updated_internal(Oid partitioned_table, bool add_callbacks) elog(DEBUG2, "on_partitions_updated() [add_callbacks = %s] " "triggered for relation %u", (add_callbacks ? "true" : "false"), partitioned_table); - - if (get_pathman_relation_info(partitioned_table, NULL)) - { - LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); - remove_relation_info(partitioned_table); - load_relations(false); - LWLockRelease(pmstate->load_config_lock); - } - - /* Register hooks that will clear shmem cache if needed */ - if (add_callbacks) - add_on_xact_abort_callbacks(partitioned_table, EV_ON_PART_UPDATED); } static void @@ -294,14 +70,6 @@ on_partitions_removed_internal(Oid partitioned_table, bool add_callbacks) elog(DEBUG2, "on_partitions_removed() [add_callbacks = %s] " "triggered for relation %u", (add_callbacks ? "true" : "false"), partitioned_table); - - LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); - remove_relation_info(partitioned_table); - LWLockRelease(pmstate->load_config_lock); - - /* Register hooks that will clear shmem cache if needed */ - if (add_callbacks) - add_on_xact_abort_callbacks(partitioned_table, EV_ON_PART_REMOVED); } /* @@ -331,7 +99,7 @@ on_partitions_removed(PG_FUNCTION_ARGS) /* * Returns partition oid for specified parent relid and value. - * In case when partition isn't exist try to create one. + * In case when partition doesn't exist try to create one. */ Datum find_or_create_range_partition(PG_FUNCTION_ARGS) @@ -339,28 +107,26 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) Oid relid = PG_GETARG_OID(0); Datum value = PG_GETARG_DATUM(1); Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); - RangeRelation *rangerel; PartRelationInfo *prel; FmgrInfo cmp_func; + RangeEntry found_rentry; search_rangerel_result search_state; - RangeEntry found_re; prel = get_pathman_relation_info(relid, NULL); - rangerel = get_pathman_range_relation(relid, NULL); - if (!prel || !rangerel) + if (!prel) PG_RETURN_NULL(); fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); - search_state = search_range_partition_eq(value, &cmp_func, - rangerel, &found_re); + search_state = search_range_partition_eq(value, &cmp_func,prel, + &found_rentry); /* * If found then just return oid, else create new partitions */ if (search_state == SEARCH_RANGEREL_FOUND) - PG_RETURN_OID(found_re.child_oid); + PG_RETURN_OID(found_rentry.child_oid); /* * If not found and value is between first and last partitions */ @@ -368,21 +134,30 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) PG_RETURN_NULL(); else { - Oid child_oid; + Oid child_oid = InvalidOid; + + LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); + LWLockAcquire(pmstate->edit_partitions_lock, LW_EXCLUSIVE); /* * Check if someone else has already created partition. */ - search_state = search_range_partition_eq(value, &cmp_func, - rangerel, &found_re); + search_state = search_range_partition_eq(value, &cmp_func, prel, + &found_rentry); if (search_state == SEARCH_RANGEREL_FOUND) { - PG_RETURN_OID(found_re.child_oid); + LWLockRelease(pmstate->load_config_lock); + LWLockRelease(pmstate->edit_partitions_lock); + + PG_RETURN_OID(found_rentry.child_oid); } /* Start background worker to create new partitions */ child_oid = create_partitions_bg_worker(relid, value, value_type); + LWLockRelease(pmstate->load_config_lock); + LWLockRelease(pmstate->edit_partitions_lock); + PG_RETURN_OID(child_oid); } } @@ -404,24 +179,23 @@ get_partition_range(PG_FUNCTION_ARGS) bool found = false; Datum *elems; PartRelationInfo *prel; - RangeRelation *rangerel; RangeEntry *ranges; + Oid *parts; TypeCacheEntry *tce; ArrayType *arr; prel = get_pathman_relation_info(parent_oid, NULL); - rangerel = get_pathman_range_relation(parent_oid, NULL); - - if (!prel || !rangerel) + if (!prel) PG_RETURN_NULL(); - ranges = dsm_array_get_pointer(&rangerel->ranges, true); + ranges = PrelGetRangesArray(prel, true); + parts = PrelGetChildrenArray(prel, true); tce = lookup_type_cache(prel->atttype, 0); /* Looking for specified partition */ - for (i = 0; i < rangerel->ranges.elem_count; i++) - if (ranges[i].child_oid == child_oid) + for (i = 0; i < PrelChildrenCount(prel); i++) + if (parts[i] == child_oid) { found = true; break; @@ -429,11 +203,9 @@ get_partition_range(PG_FUNCTION_ARGS) if (found) { - bool byVal = rangerel->by_val; - elems = palloc(nelems * sizeof(Datum)); - elems[0] = PATHMAN_GET_DATUM(ranges[i].min, byVal); - elems[1] = PATHMAN_GET_DATUM(ranges[i].max, byVal); + elems[0] = ranges[i].min; + elems[1] = ranges[i].max; arr = construct_array(elems, nelems, prel->atttype, tce->typlen, tce->typbyval, tce->typalign); @@ -457,7 +229,6 @@ get_range_by_idx(PG_FUNCTION_ARGS) Oid parent_oid = PG_GETARG_OID(0); int idx = PG_GETARG_INT32(1); PartRelationInfo *prel; - RangeRelation *rangerel; RangeEntry *ranges; RangeEntry re; Datum *elems; @@ -465,23 +236,19 @@ get_range_by_idx(PG_FUNCTION_ARGS) prel = get_pathman_relation_info(parent_oid, NULL); - rangerel = get_pathman_range_relation(parent_oid, NULL); - - if (!prel || !rangerel || idx >= (int)rangerel->ranges.elem_count) + if (!prel || idx >= PrelChildrenCount(prel)) PG_RETURN_NULL(); tce = lookup_type_cache(prel->atttype, 0); - ranges = dsm_array_get_pointer(&rangerel->ranges, true); + ranges = PrelGetRangesArray(prel, true); if (idx >= 0) re = ranges[idx]; else - re = ranges[rangerel->ranges.elem_count - 1]; + re = ranges[PrelChildrenCount(prel) - 1]; elems = palloc(2 * sizeof(Datum)); - elems[0] = PATHMAN_GET_DATUM(re.min, rangerel->by_val); - elems[1] = PATHMAN_GET_DATUM(re.max, rangerel->by_val); - - pfree(ranges); + elems[0] = re.min; + elems[1] = re.max; PG_RETURN_ARRAYTYPE_P( construct_array(elems, 2, prel->atttype, @@ -496,18 +263,16 @@ get_min_range_value(PG_FUNCTION_ARGS) { Oid parent_oid = PG_GETARG_OID(0); PartRelationInfo *prel; - RangeRelation *rangerel; RangeEntry *ranges; prel = get_pathman_relation_info(parent_oid, NULL); - rangerel = get_pathman_range_relation(parent_oid, NULL); - if (!prel || !rangerel || prel->parttype != PT_RANGE || rangerel->ranges.elem_count == 0) + if (!prel || prel->parttype != PT_RANGE || PrelChildrenCount(prel) == 0) PG_RETURN_NULL(); - ranges = dsm_array_get_pointer(&rangerel->ranges, true); + ranges = PrelGetRangesArray(prel, true); - PG_RETURN_DATUM(PATHMAN_GET_DATUM(ranges[0].min, rangerel->by_val)); + PG_RETURN_DATUM(ranges[0].min); } /* @@ -518,18 +283,16 @@ get_max_range_value(PG_FUNCTION_ARGS) { Oid parent_oid = PG_GETARG_OID(0); PartRelationInfo *prel; - RangeRelation *rangerel; RangeEntry *ranges; prel = get_pathman_relation_info(parent_oid, NULL); - rangerel = get_pathman_range_relation(parent_oid, NULL); - if (!prel || !rangerel || prel->parttype != PT_RANGE || rangerel->ranges.elem_count == 0) + if (!prel || prel->parttype != PT_RANGE || PrelChildrenCount(prel) == 0) PG_RETURN_NULL(); - ranges = dsm_array_get_pointer(&rangerel->ranges, true); + ranges = PrelGetRangesArray(prel, true); - PG_RETURN_DATUM(PATHMAN_GET_DATUM(ranges[rangerel->ranges.elem_count - 1].max, rangerel->by_val)); + PG_RETURN_DATUM(ranges[PrelChildrenCount(prel) - 1].max); } /* @@ -551,38 +314,28 @@ check_overlap(PG_FUNCTION_ARGS) cmp_func_2; PartRelationInfo *prel; - RangeRelation *rangerel; RangeEntry *ranges; int i; - bool byVal; prel = get_pathman_relation_info(partitioned_table, NULL); - rangerel = get_pathman_range_relation(partitioned_table, NULL); - if (!prel || !rangerel || prel->parttype != PT_RANGE) + if (!prel || prel->parttype != PT_RANGE) PG_RETURN_NULL(); /* comparison functions */ fill_type_cmp_fmgr_info(&cmp_func_1, p1_type, prel->atttype); fill_type_cmp_fmgr_info(&cmp_func_2, p2_type, prel->atttype); - byVal = rangerel->by_val; - ranges = (RangeEntry *) dsm_array_get_pointer(&rangerel->ranges, true); - for (i = 0; i < rangerel->ranges.elem_count; i++) + ranges = PrelGetRangesArray(prel, true); + for (i = 0; i < PrelChildrenCount(prel); i++) { - int c1 = FunctionCall2(&cmp_func_1, p1, - PATHMAN_GET_DATUM(ranges[i].max, byVal)); - int c2 = FunctionCall2(&cmp_func_2, p2, - PATHMAN_GET_DATUM(ranges[i].min, byVal)); + int c1 = FunctionCall2(&cmp_func_1, p1, ranges[i].max); + int c2 = FunctionCall2(&cmp_func_2, p2, ranges[i].min); if (c1 < 0 && c2 > 0) - { - pfree(ranges); PG_RETURN_BOOL(true); - } } - pfree(ranges); PG_RETURN_BOOL(false); } @@ -625,3 +378,44 @@ get_hash(PG_FUNCTION_ARGS) PG_RETURN_UINT32(make_hash(value, part_count)); } + +Datum +build_check_constraint_name_attnum(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + AttrNumber attnum = PG_GETARG_INT16(1); + const char *result; + + if (!get_rel_name(relid)) + elog(ERROR, "Invalid relation %u", relid); + + /* We explicitly do not support system attributes */ + if (attnum == InvalidAttrNumber || attnum < 0) + elog(ERROR, "Cannot build check constraint name: " + "invalid attribute number %i", attnum); + + result = build_check_constraint_name_internal(relid, attnum); + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + +Datum +build_check_constraint_name_attname(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + text *attname = PG_GETARG_TEXT_P(1); + AttrNumber attnum = get_attnum(relid, text_to_cstring(attname)); + const char *result; + + if (!get_rel_name(relid)) + elog(ERROR, "Invalid relation %u", relid); + + if (attnum == InvalidAttrNumber) + elog(ERROR, "Relation '%s' has no column '%s'", + get_rel_name(relid), + text_to_cstring(attname)); + + result = build_check_constraint_name_internal(relid, attnum); + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} diff --git a/src/rangeset.c b/src/rangeset.c index 9c6e197031..beff56de32 100644 --- a/src/rangeset.c +++ b/src/rangeset.c @@ -1,13 +1,14 @@ /* ------------------------------------------------------------------------ * * rangeset.c - * Index range functions + * IndexRange functions * * Copyright (c) 2015-2016, Postgres Professional * * ------------------------------------------------------------------------ */ -#include "pathman.h" + +#include "rangeset.h" /* Check if two ranges are intersecting */ bool diff --git a/src/rangeset.h b/src/rangeset.h new file mode 100644 index 0000000000..ffe7f31fc8 --- /dev/null +++ b/src/rangeset.h @@ -0,0 +1,75 @@ +/* ------------------------------------------------------------------------ + * + * rangeset.h + * IndexRange functions + * + * Copyright (c) 2015-2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + +#ifndef PATHMAN_RANGESET_H +#define PATHMAN_RANGESET_H + + +#include "pathman.h" +#include "nodes/pg_list.h" + + +/* + * IndexRange contains a set of selected partitions. + */ +typedef struct { + bool ir_valid : 1; + bool ir_lossy : 1; /* should we use IndexScan? */ + uint32 ir_lower : 31; /* lower bound */ + uint32 ir_upper : 31; /* upper bound */ +} IndexRange; + + +#define RANGE_MASK 0xEFFFFFFF +#define InvalidIndexRange { false, false, 0, 0 } + + +inline static IndexRange +make_irange(uint32 lower, uint32 upper, bool lossy) +{ + IndexRange result; + + result.ir_valid = true; + result.ir_lossy = lossy; + result.ir_lower = (lower & RANGE_MASK); + result.ir_upper = (upper & RANGE_MASK); + + return result; +} + +inline static IndexRange * +alloc_irange(IndexRange irange) +{ + IndexRange *result = (IndexRange *) palloc(sizeof(IndexRange)); + + memcpy((void *) result, (void *) &irange, sizeof(IndexRange)); + + return result; +} + +#define lfirst_irange(lc) ( *(IndexRange *) lfirst(lc) ) +#define lappend_irange(list, irange) ( lappend((list), alloc_irange(irange)) ) +#define lcons_irange(irange, list) ( lcons(alloc_irange(irange), (list)) ) +#define list_make1_irange(irange) ( lcons(alloc_irange(irange), NIL) ) +#define llast_irange(list) ( lfirst_irange(list_tail(list)) ) +#define linitial_irange(list) ( lfirst_irange(list_head(list)) ) + + +/* rangeset.c */ +bool irange_intersects(IndexRange a, IndexRange b); +bool irange_conjuncted(IndexRange a, IndexRange b); +IndexRange irange_union(IndexRange a, IndexRange b); +IndexRange irange_intersect(IndexRange a, IndexRange b); +List *irange_list_union(List *a, List *b); +List *irange_list_intersect(List *a, List *b); +int irange_list_length(List *rangeset); +bool irange_list_find(List *rangeset, int index, bool *lossy); + +#endif diff --git a/src/relation_info.c b/src/relation_info.c new file mode 100644 index 0000000000..ee9ee7cc06 --- /dev/null +++ b/src/relation_info.c @@ -0,0 +1,388 @@ +/* ------------------------------------------------------------------------ + * + * relation_info.c + * Data structures describing partitioned relations + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + +#include "relation_info.h" +#include "init.h" +#include "utils.h" + +#include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/indexing.h" +#include "catalog/pg_inherits.h" +#include "miscadmin.h" +#include "storage/lmgr.h" +#include "utils/builtins.h" +#include "utils/hsearch.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/snapmgr.h" + + +static Oid try_syscache_parent_search(Oid partition, PartParentSearch *status); +static Oid get_parent_of_partition_internal(Oid partition, + PartParentSearch *status, + HASHACTION action); + +#define FreeChildrenArray(prel) \ + do { \ + uint32 i; \ + /* Remove relevant PartParentInfos */ \ + if ((prel)->children) \ + { \ + for (i = 0; i < PrelChildrenCount(prel); i++) \ + { \ + Oid child = (prel)->children[i]; \ + /* If it's *always been* relid's partition, free cache */ \ + if (relid == get_parent_of_partition(child, NULL)) \ + forget_parent_of_partition(child, NULL); \ + } \ + pfree((prel)->children); \ + (prel)->children = NULL; \ + } \ + } while (0) + +#define FreeRangesArray(prel) \ + do { \ + if ((prel)->ranges) pfree((prel)->ranges); \ + (prel)->ranges = NULL; \ + } while (0) + + +/* + * refresh\invalidate\get\remove PartRelationInfo functions. + */ + +/* Create or update PartRelationInfo in local cache. */ +PartRelationInfo * +refresh_pathman_relation_info(Oid relid, + PartType partitioning_type, + const char *part_column_name) +{ + const LOCKMODE lockmode = AccessShareLock; + const TypeCacheEntry *typcache; + Oid *prel_children; + uint32 prel_children_count = 0, + i; + bool found; + PartRelationInfo *prel; + + prel = (PartRelationInfo *) hash_search(partitioned_rels, + (const void *) &relid, + HASH_ENTER, &found); + elog(DEBUG2, + found ? + "Refreshing record for relation %u in pg_pathman's cache [%u]" : + "Creating new record for relation %u in pg_pathman's cache [%u]", + relid, MyProcPid); + + /* First we assume that this entry is invalid */ + prel->valid = false; + + /* Clear outdated resources */ + if (found) + { + /* Free these arrays iff they're not NULL */ + FreeChildrenArray(prel); + FreeRangesArray(prel); + } + + /* Make both arrays point to NULL */ + prel->children = NULL; + prel->ranges = NULL; + + /* Set partitioning type */ + prel->parttype = partitioning_type; + + /* Initialize PartRelationInfo using syscache & typcache */ + prel->attnum = get_attnum(relid, part_column_name); + prel->atttype = get_atttype(relid, prel->attnum); + prel->atttypmod = get_atttypmod(relid, prel->attnum); + prel->attbyval = get_typbyval(prel->atttype); + + /* Fetch HASH & CMP fuctions for atttype */ + typcache = lookup_type_cache(prel->atttype, + TYPECACHE_CMP_PROC | TYPECACHE_HASH_PROC); + + prel->cmp_proc = typcache->cmp_proc; + prel->hash_proc = typcache->hash_proc; + + LockRelationOid(relid, lockmode); + prel_children = find_inheritance_children_array(relid, lockmode, + &prel_children_count); + UnlockRelationOid(relid, lockmode); + + /* If there's no children at all, remove this entry */ + if (prel_children_count == 0) + remove_pathman_relation_info(relid); + + /* + * Fill 'prel' with partition info, raise ERROR if anything is wrong. + * This way PartRelationInfo will remain 'invalid', and 'get' procedure + * will try to refresh it again (and again), until the error is fixed + * by user manually (i.e. invalid check constraints etc). + */ + fill_prel_with_partitions(prel_children, prel_children_count, prel); + + /* Add "partition+parent" tuple to cache */ + for (i = 0; i < prel_children_count; i++) + cache_parent_of_partition(prel_children[i], relid); + + pfree(prel_children); + + /* We've successfully built a cache entry */ + prel->valid = true; + + return prel; +} + +/* Invalidate PartRelationInfo cache entry. */ +void +invalidate_pathman_relation_info(Oid relid, bool *found) +{ + bool found_prel; + PartRelationInfo *prel = hash_search(partitioned_rels, + (const void *) &relid, + HASH_ENTER, &found_prel); + + /* We should create entry if it doesn't exist */ + if (!found_prel) + { + prel->children = NULL; + prel->ranges = NULL; + } + + prel->valid = false; /* now cache entry is invalid */ + + /* Set 'found' if needed */ + if (found) *found = found_prel; + + elog(DEBUG2, + "Invalidating record for relation %u in pg_pathman's cache [%u]", + relid, MyProcPid); +} + +/* Get PartRelationInfo from local cache. */ +PartRelationInfo * +get_pathman_relation_info(Oid relid, bool *found) +{ + PartRelationInfo *prel = hash_search(partitioned_rels, + (const void *) &relid, + HASH_FIND, found); + + /* Refresh PartRelationInfo if needed */ + if (prel && !PrelIsValid(prel)) + { + Datum values[Natts_pathman_config]; + bool isnull[Natts_pathman_config]; + + /* Check that PATHMAN_CONFIG table contains this relation */ + if (pathman_config_contains_relation(relid, values, isnull)) + { + PartType part_type; + const char *attname; + + /* We can't use 'part_type' & 'attname' from invalid prel */ + part_type = DatumGetPartType(values[Anum_pathman_config_parttype - 1]); + attname = TextDatumGetCString(values[Anum_pathman_config_attname - 1]); + + /* Refresh partitioned table cache entry */ + refresh_pathman_relation_info(relid, part_type, attname); + } + /* Else clear remaining cache entry */ + else remove_pathman_relation_info(relid); + } + + elog(DEBUG2, + "Fetching %s record for relation %u from pg_pathman's cache [%u]", + (prel ? "live" : "NULL"), relid, MyProcPid); + + return prel; +} + +/* Remove PartRelationInfo from local cache. */ +void +remove_pathman_relation_info(Oid relid) +{ + PartRelationInfo *prel = hash_search(partitioned_rels, + (const void *) &relid, + HASH_REMOVE, NULL); + + if (prel) + { + /* Free these arrays iff they're not NULL */ + FreeChildrenArray(prel); + FreeRangesArray(prel); + } + + elog(DEBUG2, + "Removing record for relation %u in pg_pathman's cache [%u]", + relid, MyProcPid); +} + + +/* + * cache\forget\get PartParentInfo functions. + */ + +void +cache_parent_of_partition(Oid partition, Oid parent) +{ + bool found; + PartParentInfo *ppar; + + ppar = hash_search(parent_cache, + (const void *) &partition, + HASH_ENTER, &found); + + elog(DEBUG2, + found ? + "Refreshing record for child %u in pg_pathman's cache [%u]" : + "Creating new record for child %u in pg_pathman's cache [%u]", + partition, MyProcPid); + + ppar->child_rel = partition; + ppar->parent_rel = parent; +} + +/* Remove "partition+parent" pair from cache & return parent's Oid */ +Oid +forget_parent_of_partition(Oid partition, PartParentSearch *status) +{ + return get_parent_of_partition_internal(partition, status, HASH_REMOVE); +} + +/* Peturn partition parent's Oid */ +Oid +get_parent_of_partition(Oid partition, PartParentSearch *status) +{ + return get_parent_of_partition_internal(partition, status, HASH_FIND); +} + +/* + * Get [and remove] "partition+parent" pair from cache, + * also check syscache if 'status' is provided. + * + * "status == NULL" implies that we don't care about + * neither syscache nor PATHMAN_CONFIG table contents. + */ +static Oid +get_parent_of_partition_internal(Oid partition, + PartParentSearch *status, + HASHACTION action) +{ + const char *action_str; /* "Fetching"\"Resetting" */ + Oid parent; + PartParentInfo *ppar = hash_search(parent_cache, + (const void *) &partition, + action, NULL); + + /* Set 'action_str' */ + switch (action) + { + case HASH_REMOVE: + action_str = "Resetting"; + break; + + case HASH_FIND: + action_str = "Fetching"; + break; + + default: + elog(ERROR, "Unexpected HTAB action %u", action); + } + + elog(DEBUG2, + "%s %s record for child %u from pg_pathman's cache [%u]", + action_str, (ppar ? "live" : "NULL"), partition, MyProcPid); + + if (ppar) + { + if (status) *status = PPS_ENTRY_FOUND; + parent = ppar->parent_rel; + } + /* Try fetching parent from syscache if 'status' is provided */ + else if (status) + parent = try_syscache_parent_search(partition, status); + else + parent = InvalidOid; /* we don't have to set status */ + + return parent; +} + +/* Try to find parent of a partition using syscache & PATHMAN_CONFIG */ +static Oid +try_syscache_parent_search(Oid partition, PartParentSearch *status) +{ + if (!IsTransactionState()) + { + /* We could not perform search */ + if (status) *status = PPS_NOT_SURE; + + return InvalidOid; + } + else + { + Relation relation; + Snapshot snapshot; + ScanKeyData key[1]; + SysScanDesc scan; + HeapTuple inheritsTuple; + Oid parent = InvalidOid; + + /* At first we assume parent does not exist (not a partition) */ + if (status) *status = PPS_ENTRY_NOT_FOUND; + + relation = heap_open(InheritsRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(partition)); + + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = systable_beginscan(relation, InheritsRelidSeqnoIndexId, + true, NULL, 1, key); + + while ((inheritsTuple = systable_getnext(scan)) != NULL) + { + parent = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent; + + /* Check that PATHMAN_CONFIG contains this table */ + if (pathman_config_contains_relation(parent, NULL, NULL)) + { + /* We've found the entry, update status */ + if (status) *status = PPS_ENTRY_FOUND; + } + else parent = InvalidOid; /* invalidate 'parent' */ + + break; /* there should be no more rows */ + } + + systable_endscan(scan); + UnregisterSnapshot(snapshot); + heap_close(relation, AccessShareLock); + + return parent; + } +} + +/* + * Safe PartType wrapper. + */ +PartType +DatumGetPartType(Datum datum) +{ + uint32 val = DatumGetUInt32(datum); + + if (val < 1 || val > 2) + elog(ERROR, "Unknown partitioning type %u", val); + + return (PartType) val; +} diff --git a/src/relation_info.h b/src/relation_info.h new file mode 100644 index 0000000000..be23764a29 --- /dev/null +++ b/src/relation_info.h @@ -0,0 +1,122 @@ +/* ------------------------------------------------------------------------ + * + * relation_info.h + * Data structures describing partitioned relations + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + +#ifndef RELATION_INFO_H +#define RELATION_INFO_H + +#include "dsm_array.h" + +#include "postgres.h" +#include "port/atomics.h" + + +/* + * Partitioning type + */ +typedef enum +{ + PT_HASH = 1, + PT_RANGE +} PartType; + +/* + * Child relation info for RANGE partitioning + */ +typedef struct +{ + Oid child_oid; + + Datum min, + max; +} RangeEntry; + +/* + * PartRelationInfo + * Per-relation partitioning information + */ +typedef struct +{ + Oid key; /* partitioned table's Oid */ + bool valid; /* is this entry valid? */ + + uint32 children_count; + Oid *children; /* Oids of child partitions */ + RangeEntry *ranges; /* per-partition range entry or NULL */ + + PartType parttype; /* partitioning type (HASH | RANGE) */ + Index attnum; /* partitioned column's index */ + Oid atttype; /* partitioned column's type */ + int32 atttypmod; /* partitioned column's type modifier */ + bool attbyval; /* is partitioned column stored by value? */ + + Oid cmp_proc, /* comparison fuction for 'atttype' */ + hash_proc; /* hash function for 'atttype' */ +} PartRelationInfo; + +/* + * ShmemRelationInfo + * Per-relation misc information stored in shmem + */ +typedef struct +{ + Oid key; /* partitioned table's Oid */ + + pg_atomic_flag dirty; /* is anyone performing any of the + partitioning-related operations + on this table at the moment? */ +} ShmemRelationInfo; + +/* + * RelParentInfo + * Cached parent of the specified partition. + * Allows us to quickly search for PartRelationInfo. + */ +typedef struct +{ + Oid child_rel; /* key */ + Oid parent_rel; +} PartParentInfo; + +/* + * PartParentSearch + * Represents status of a specific cached entry. + * Returned by [for]get_parent_of_partition(). + */ +typedef enum +{ + PPS_ENTRY_NOT_FOUND = 0, + PPS_ENTRY_FOUND, /* entry was found in pathman's or system cache */ + PPS_NOT_SURE /* can't determine (not transactional state) */ +} PartParentSearch; + + +#define PrelGetChildrenArray(prel, copy) ( (prel)->children ) + +#define PrelGetRangesArray(prel, copy) ( (prel)->ranges ) + +#define PrelChildrenCount(prel) ( (prel)->children_count ) + +#define PrelIsValid(prel) ( (prel)->valid ) + + +PartRelationInfo *refresh_pathman_relation_info(Oid relid, + PartType partitioning_type, + const char *part_column_name); +void invalidate_pathman_relation_info(Oid relid, bool *found); +void remove_pathman_relation_info(Oid relid); +PartRelationInfo *get_pathman_relation_info(Oid relid, bool *found); + +void cache_parent_of_partition(Oid partition, Oid parent); +Oid forget_parent_of_partition(Oid partition, PartParentSearch *status); +Oid get_parent_of_partition(Oid partition, PartParentSearch *status); + +PartType DatumGetPartType(Datum datum); + +#endif diff --git a/src/runtime_merge_append.c b/src/runtime_merge_append.c index 20dd018aa8..1be7aad4cd 100644 --- a/src/runtime_merge_append.c +++ b/src/runtime_merge_append.c @@ -7,11 +7,11 @@ * * ------------------------------------------------------------------------ */ -#include "postgres.h" -#include "runtime_merge_append.h" +#include "runtime_merge_append.h" #include "pathman.h" +#include "postgres.h" #include "optimizer/cost.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" @@ -20,7 +20,6 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/guc.h" - #include "lib/binaryheap.h" diff --git a/src/runtime_merge_append.h b/src/runtime_merge_append.h index 1d32a21018..8dd8dcb116 100644 --- a/src/runtime_merge_append.h +++ b/src/runtime_merge_append.h @@ -9,13 +9,15 @@ * * ------------------------------------------------------------------------ */ + #ifndef RUNTIME_MERGE_APPEND_H #define RUNTIME_MERGE_APPEND_H -#include "postgres.h" #include "runtimeappend.h" #include "pathman.h" +#include "postgres.h" + typedef struct { diff --git a/src/runtimeappend.c b/src/runtimeappend.c index 900b8240ad..7260ab2c0c 100644 --- a/src/runtimeappend.c +++ b/src/runtimeappend.c @@ -7,11 +7,12 @@ * * ------------------------------------------------------------------------ */ + +#include "runtimeappend.h" + #include "postgres.h" #include "utils/memutils.h" #include "utils/guc.h" -#include "runtimeappend.h" -#include "pathman.h" bool pg_pathman_enable_runtimeappend = true; diff --git a/src/runtimeappend.h b/src/runtimeappend.h index a1f4a03f42..f34c94aa2f 100644 --- a/src/runtimeappend.h +++ b/src/runtimeappend.h @@ -7,17 +7,18 @@ * * ------------------------------------------------------------------------ */ + #ifndef RUNTIME_APPEND_H #define RUNTIME_APPEND_H +#include "pathman.h" +#include "nodes_common.h" + #include "postgres.h" #include "optimizer/paths.h" #include "optimizer/pathnode.h" #include "commands/explain.h" -#include "pathman.h" -#include "nodes_common.h" - typedef struct { diff --git a/src/utils.c b/src/utils.c index 0883f74406..0217ef3790 100644 --- a/src/utils.c +++ b/src/utils.c @@ -7,21 +7,29 @@ * * ------------------------------------------------------------------------ */ + #include "utils.h" + #include "access/nbtree.h" #include "access/sysattr.h" +#include "access/xact.h" +#include "access/htup_details.h" +#include "catalog/heap.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "catalog/pg_extension.h" +#include "commands/extension.h" #include "executor/spi.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "nodes/makefuncs.h" #include "optimizer/var.h" #include "optimizer/restrictinfo.h" +#include "rewrite/rewriteManip.h" #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/lsyscache.h" -#include "rewrite/rewriteManip.h" -#include "catalog/heap.h" -#include "catalog/pg_type.h" +#include "utils/fmgroids.h" #define TABLEOID_STR(subst) ( "pathman_tableoid" subst ) @@ -481,7 +489,6 @@ change_varnos_in_restrinct_info(RestrictInfo *rinfo, change_varno_context *conte change_varno_walker(node, context); } - /* TODO: find some elegant way to do this */ if (bms_is_member(context->old_varno, rinfo->clause_relids)) { rinfo->clause_relids = bms_del_member(rinfo->clause_relids, context->old_varno); @@ -499,6 +506,9 @@ change_varnos_in_restrinct_info(RestrictInfo *rinfo, change_varno_context *conte } } +/* + * Convert number-as-string to Oid. + */ Oid str_to_oid(const char *cstr) { @@ -640,3 +650,48 @@ postprocess_lock_rows(List *rtable, Plan *plan) { plan_tree_walker(plan, lock_rows_visitor, rtable); } + +/* + * Returns pg_pathman schema's Oid or InvalidOid if that's not possible. + */ +Oid +get_pathman_schema(void) +{ + Oid result; + Relation rel; + SysScanDesc scandesc; + HeapTuple tuple; + ScanKeyData entry[1]; + Oid ext_schema; + + /* It's impossible to fetch pg_pathman's schema now */ + if (!IsTransactionState()) + return InvalidOid; + + ext_schema = get_extension_oid("pg_pathman", true); + if (ext_schema == InvalidOid) + return InvalidOid; /* exit if pg_pathman does not exist */ + + ScanKeyInit(&entry[0], + ObjectIdAttributeNumber, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ext_schema)); + + rel = heap_open(ExtensionRelationId, AccessShareLock); + scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, + NULL, 1, entry); + + tuple = systable_getnext(scandesc); + + /* We assume that there can be at most one matching tuple */ + if (HeapTupleIsValid(tuple)) + result = ((Form_pg_extension) GETSTRUCT(tuple))->extnamespace; + else + result = InvalidOid; + + systable_endscan(scandesc); + + heap_close(rel, AccessShareLock); + + return result; +} diff --git a/src/utils.h b/src/utils.h index 618a336e98..e69a0f219c 100644 --- a/src/utils.h +++ b/src/utils.h @@ -7,14 +7,16 @@ * * ------------------------------------------------------------------------ */ + #ifndef UTILS_H #define UTILS_H +#include "pathman.h" + #include "postgres.h" #include "utils/rel.h" #include "nodes/relation.h" #include "nodes/nodeFuncs.h" -#include "pathman.h" typedef struct @@ -60,4 +62,6 @@ void rowmark_add_tableoids(Query *parse); void postprocess_lock_rows(List *rtable, Plan *plan); +Oid get_pathman_schema(void); + #endif diff --git a/src/worker.c b/src/worker.c index ee4f7aff4a..355d8bff9b 100644 --- a/src/worker.c +++ b/src/worker.c @@ -1,14 +1,3 @@ -#include "pathman.h" -#include "miscadmin.h" -#include "postmaster/bgworker.h" -#include "catalog/pg_type.h" -#include "executor/spi.h" -#include "storage/dsm.h" -#include "access/xact.h" -#include "utils/snapmgr.h" -#include "utils/typcache.h" -#include "utils.h" - /*------------------------------------------------------------------------- * * worker.c @@ -21,25 +10,146 @@ *------------------------------------------------------------------------- */ -static dsm_segment *segment; +#include "pathman.h" +#include "init.h" +#include "utils.h" + +#include "access/xact.h" +#include "catalog/pg_type.h" +#include "executor/spi.h" +#include "miscadmin.h" +#include "postmaster/bgworker.h" +#include "storage/dsm.h" +#include "utils/datum.h" +#include "utils/snapmgr.h" +#include "utils/typcache.h" +#include "utils/lsyscache.h" + +static void bg_worker_load_config(const char *bgw_name); static void bg_worker_main(Datum main_arg); +static Oid append_partitions(Oid relid, Datum value, + Oid value_type, volatile bool *crashed); -typedef struct PartitionArgs + +/* + * Store args, result and execution status of CreatePartitionsWorker. + */ +typedef struct { + bool crashed; /* has bgw crashed? */ + Oid result; /* target partition */ Oid dbid; - Oid relid; -#ifdef HAVE_INT64_TIMESTAMP - int64 value; -#else - double value; -#endif + Oid partitioned_table; + + /* Type will help us to work with Datum */ Oid value_type; - bool by_val; - Oid result; - bool crashed; + Size value_size; + bool value_byval; + + /* Store Datum as flexible array */ + uint8 value[FLEXIBLE_ARRAY_MEMBER]; } PartitionArgs; + +#ifdef USE_ASSERT_CHECKING + + #include "access/htup_details.h" + #include "utils/syscache.h" + + #define PrintUnpackedDatum(datum, typid) \ + do { \ + HeapTuple tup = SearchSysCache1(TYPEOID, \ + ObjectIdGetDatum(typid)); \ + if (HeapTupleIsValid(tup)) \ + { \ + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tup); \ + FmgrInfo finfo; \ + fmgr_info(typtup->typoutput, &finfo); \ + elog(LOG, "BGW: arg->value is '%s'", \ + DatumGetCString(FunctionCall1(&finfo, datum))); \ + } \ + } while (0) +#elif + #define PrintUnpackedDatum(datum, typid) (true) +#endif + +#define PackDatumToByteArray(array, datum, datum_size, typbyval) \ + do { \ + memcpy((void *) (array), \ + (const void *) ((typbyval) ? \ + (Pointer) (&datum) : \ + DatumGetPointer(datum)), \ + datum_size); \ + } while (0) + +/* + * 'typid' is not necessary, but it is used by PrintUnpackedDatum(). + */ +#define UnpackDatumFromByteArray(array, datum, datum_size, typbyval, typid) \ + do { \ + if (typbyval) \ + memcpy((void *) &datum, (const void *) array, datum_size); \ + else \ + { \ + datum = PointerGetDatum(palloc(datum_size)); \ + memcpy((void *) DatumGetPointer(datum), \ + (const void *) array, \ + datum_size); \ + } \ + PrintUnpackedDatum(datum, typid); \ + } while (0) + + +/* + * Initialize pg_pathman's local config in BGW process. + */ +static void +bg_worker_load_config(const char *bgw_name) +{ + load_config(); + elog(LOG, "%s loaded pg_pathman's config [%u]", + bgw_name, MyProcPid); +} + +/* + * Create args segment for partitions bgw. + */ +static dsm_segment * +create_partitions_bg_worker_segment(Oid relid, Datum value, Oid value_type) +{ + TypeCacheEntry *typcache; + Size datum_size; + Size segment_size; + dsm_segment *segment; + PartitionArgs *args; + + typcache = lookup_type_cache(value_type, 0); + + /* Calculate segment size */ + datum_size = datumGetSize(value, typcache->typbyval, typcache->typlen); + segment_size = offsetof(PartitionArgs, value) + datum_size; + + segment = dsm_create(segment_size, 0); + + /* Initialize BGW args */ + args = (PartitionArgs *) dsm_segment_address(segment); + args->crashed = true; /* default value */ + args->result = InvalidOid; + args->dbid = MyDatabaseId; + args->partitioned_table = relid; + + /* Write value-related stuff */ + args->value_type = value_type; + args->value_size = datum_size; + args->value_byval = typcache->typbyval; + + PackDatumToByteArray(&args->value, value, + datum_size, args->value_byval); + + return segment; +} + /* * Starts background worker that will create new partitions, * waits till it finishes the job and returns the result (new partition oid) @@ -67,39 +177,21 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) dsm_handle segment_handle; pid_t pid; PartitionArgs *args; - TypeCacheEntry *tce; Oid child_oid = InvalidOid; /* Create a dsm segment for the worker to pass arguments */ - segment = dsm_create(sizeof(PartitionArgs), 0); + segment = create_partitions_bg_worker_segment(relid, value, value_type); segment_handle = dsm_segment_handle(segment); - - tce = lookup_type_cache(value_type, 0); - - /* Fill arguments structure */ args = (PartitionArgs *) dsm_segment_address(segment); - args->dbid = MyDatabaseId; - args->relid = relid; - if (tce->typbyval) - args->value = value; - else - memcpy(&args->value, DatumGetPointer(value), sizeof(args->value)); - args->by_val = tce->typbyval; - args->value_type = value_type; - args->result = 0; /* Initialize worker struct */ - worker.bgw_flags = BGWORKER_SHMEM_ACCESS | - BGWORKER_BACKEND_DATABASE_CONNECTION; - worker.bgw_start_time = BgWorkerStart_RecoveryFinished; - worker.bgw_restart_time = BGW_NEVER_RESTART; - worker.bgw_main = bg_worker_main; - worker.bgw_main_arg = Int32GetDatum(segment_handle); - worker.bgw_notify_pid = MyProcPid; - - LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); - LWLockAcquire(pmstate->edit_partitions_lock, LW_EXCLUSIVE); + worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; + worker.bgw_start_time = BgWorkerStart_RecoveryFinished; + worker.bgw_restart_time = BGW_NEVER_RESTART; + worker.bgw_notify_pid = MyProcPid; + worker.bgw_main_arg = UInt32GetDatum(segment_handle); + worker.bgw_main = bg_worker_main; /* Start dynamic worker */ bgw_started = RegisterDynamicBackgroundWorker(&worker, &bgw_handle); @@ -119,8 +211,6 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) /* end execution */ handle_exec_state: - LWLockRelease(pmstate->load_config_lock); - LWLockRelease(pmstate->edit_partitions_lock); /* Free dsm segment */ dsm_detach(segment); @@ -145,6 +235,9 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) break; } + if (child_oid == InvalidOid) + elog(ERROR, "Attempt to append new partitions to relation %u failed", relid); + return child_oid; } @@ -154,17 +247,21 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) static void bg_worker_main(Datum main_arg) { - PartitionArgs *args; - dsm_handle handle = DatumGetInt32(main_arg); + const char *bgw_name = "CreatePartitionsWorker"; + dsm_handle handle = DatumGetUInt32(main_arg); + dsm_segment *segment; + PartitionArgs *args; + Datum value; /* Create resource owner */ - CurrentResourceOwner = ResourceOwnerCreate(NULL, "CreatePartitionsWorker"); + CurrentResourceOwner = ResourceOwnerCreate(NULL, bgw_name); - /* Attach to dynamic shared memory */ if (!handle) - ereport(WARNING, (errmsg("pg_pathman worker: invalid dsm_handle"))); + elog(ERROR, "%s: invalid dsm_handle", bgw_name); - segment = dsm_attach(handle); + /* Attach to dynamic shared memory */ + if ((segment = dsm_attach(handle)) == NULL) + elog(ERROR, "%s: cannot attach to segment", bgw_name); args = dsm_segment_address(segment); /* Establish connection and start transaction */ @@ -173,10 +270,17 @@ bg_worker_main(Datum main_arg) SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); + /* Initialize pg_pathman's local config */ + bg_worker_load_config(bgw_name); + + UnpackDatumFromByteArray(&args->value, value, + args->value_size, + args->value_byval, + args->value_type); + /* Create partitions */ - args->result = create_partitions(args->relid, - PATHMAN_GET_DATUM(args->value, - args->by_val), + args->result = append_partitions(args->partitioned_table, + value, args->value_type, &args->crashed); @@ -189,40 +293,38 @@ bg_worker_main(Datum main_arg) } /* - * Create partitions and return an OID of the partition that contain value + * Append partitions and return Oid of the partition that contains value */ -Oid -create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) +static Oid +append_partitions(Oid relid, Datum value, Oid value_type, volatile bool *crashed) { Oid oids[] = { OIDOID, value_type }; Datum vals[] = { ObjectIdGetDatum(relid), value }; bool nulls[] = { false, false }; char *sql; PartRelationInfo *prel; - RangeRelation *rangerel; FmgrInfo cmp_func; - char *schema; + MemoryContext old_mcxt = CurrentMemoryContext; - *crashed = true; - schema = get_extension_schema(); + *crashed = true; /* write default value */ - prel = get_pathman_relation_info(relid, NULL); - rangerel = get_pathman_range_relation(relid, NULL); + if ((prel = get_pathman_relation_info(relid, NULL)) == NULL) + elog(ERROR, "BGW: cannot fetch PartRelationInfo for relation %u", relid); /* Comparison function */ fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); /* Perform PL procedure */ sql = psprintf("SELECT %s.append_partitions_on_demand_internal($1, $2)", - schema); + get_namespace_name(get_pathman_schema())); PG_TRY(); { - int ret; - Oid partid = InvalidOid; - bool isnull; + int ret; + Oid partid = InvalidOid; + bool isnull; ret = SPI_execute_with_args(sql, 2, oids, vals, nulls, false, 0); - if (ret > 0) + if (ret == SPI_OK_SELECT) { TupleDesc tupdesc = SPI_tuptable->tupdesc; HeapTuple tuple = SPI_tuptable->vals[0]; @@ -232,9 +334,7 @@ create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) partid = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 1, &isnull)); /* Update relation info */ - free_dsm_array(&rangerel->ranges); - free_dsm_array(&prel->children); - load_partitions(relid, GetCatalogSnapshot(relid)); + /* TODO: mark ShmemRelationInfo as 'dirty' to invalidate cache */ } *crashed = false; @@ -242,9 +342,16 @@ create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) } PG_CATCH(); { - elog(ERROR, "Attempt to create new partitions failed"); + ErrorData *edata; + + MemoryContextSwitchTo(old_mcxt); + edata = CopyErrorData(); + FlushErrorState(); + + elog(LOG, "BGW: %s", edata->message); + FreeErrorData(edata); - return InvalidOid; /* compiler should be happy */ + return InvalidOid; /* something bad happened */ } PG_END_TRY(); } From 4d8e43011b49e997f3f4b21e7c2b4d82ee2e0f5d Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 4 Aug 2016 19:16:03 +0300 Subject: [PATCH 030/184] refactoring, introduce create_partitions() (switch between backend\BGW), fixes --- range.sql | 3 +- src/init.c | 19 +++++- src/init.h | 3 +- src/partition_filter.c | 7 +- src/pathman.h | 3 + src/pg_pathman.c | 148 +++++++++++++++++++++++++++++++++++------ src/pl_funcs.c | 3 +- src/relation_info.c | 4 +- src/worker.c | 131 +++++++++--------------------------- 9 files changed, 190 insertions(+), 131 deletions(-) diff --git a/range.sql b/range.sql index 4b450363a0..37f890e856 100644 --- a/range.sql +++ b/range.sql @@ -449,7 +449,8 @@ BEGIN EXECUTE v_sql; RETURN v_child_relname; END -$$ LANGUAGE plpgsql; +$$ LANGUAGE plpgsql +SET client_min_messages = WARNING; /* * Split RANGE partition diff --git a/src/init.c b/src/init.c index 3d10ee88d5..2b51822db7 100644 --- a/src/init.c +++ b/src/init.c @@ -18,6 +18,7 @@ #include "utils.h" #include "access/htup_details.h" +#include "access/sysattr.h" #include "catalog/indexing.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_type.h" @@ -368,7 +369,8 @@ build_check_constraint_name_internal(Oid relid, AttrNumber attno) * Extract tuple into 'values' and 'isnull' if they're provided. */ bool -pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull) +pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, + TransactionId *xmin) { Oid pathman_config; Relation rel; @@ -404,6 +406,21 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull) /* Extract data if necessary */ if (values && isnull) heap_deformtuple(htup, RelationGetDescr(rel), values, isnull); + + /* Set xmin if necessary */ + if (xmin) + { + Datum value; + bool isnull; + + value = heap_getsysattr(htup, + MinTransactionIdAttributeNumber, + RelationGetDescr(rel), + &isnull); + + Assert(!isnull); + *xmin = DatumGetTransactionId(value); + } } /* Clean resources */ diff --git a/src/init.h b/src/init.h index cc07aa0d46..6f0a6a645b 100644 --- a/src/init.h +++ b/src/init.h @@ -42,6 +42,7 @@ char *build_check_constraint_name_internal(Oid relid, bool pathman_config_contains_relation(Oid relid, Datum *values, - bool *isnull); + bool *isnull, + TransactionId *xmin); #endif diff --git a/src/partition_filter.c b/src/partition_filter.c index bfa2d7d1a1..8750754083 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -184,14 +184,13 @@ partition_filter_exec(CustomScanState *node) ranges = walk_expr_tree((Expr *) &state->temp_const, &state->wcxt)->rangeset; parts = get_partition_oids(ranges, &nparts, state->prel); - if (nparts > 1) elog(ERROR, "PartitionFilter selected more than one partition"); else if (nparts == 0) { - selected_partid = create_partitions_bg_worker(state->partitioned_table, - state->temp_const.constvalue, - state->temp_const.consttype); + selected_partid = create_partitions(state->partitioned_table, + state->temp_const.constvalue, + state->temp_const.consttype); /* Now we have to refresh state->wcxt->ranges manually */ refresh_walker_context_ranges(&state->wcxt); diff --git a/src/pathman.h b/src/pathman.h index f7cecdac9f..bdd48704a0 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -194,6 +194,9 @@ typedef struct /* Check that WalkerContext contains ExprContext (plan execution stage) */ #define WcxtHasExprContext(wcxt) ( (wcxt)->econtext ) +Oid create_partitions_internal(Oid relid, Datum value, Oid value_type); +Oid create_partitions(Oid relid, Datum value, Oid value_type); + void select_range_partitions(const Datum value, FmgrInfo *cmp_func, const RangeEntry *ranges, diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 6920d43a93..a8c1ebc3ee 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -18,33 +18,24 @@ #include "runtime_merge_append.h" #include "postgres.h" -#include "fmgr.h" +#include "access/heapam.h" +#include "access/transam.h" +#include "access/xact.h" +#include "catalog/pg_type.h" +#include "executor/spi.h" #include "miscadmin.h" -#include "nodes/makefuncs.h" -#include "nodes/nodeFuncs.h" -#include "nodes/pg_list.h" -#include "nodes/relation.h" -#include "nodes/primnodes.h" #include "optimizer/clauses.h" -#include "optimizer/paths.h" -#include "optimizer/pathnode.h" -#include "optimizer/planner.h" #include "optimizer/prep.h" #include "optimizer/restrictinfo.h" #include "optimizer/cost.h" -#include "parser/analyze.h" -#include "utils/hsearch.h" #include "utils/rel.h" -#include "utils/elog.h" -#include "utils/array.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/selfuncs.h" +#include "utils/snapmgr.h" #include "utils/memutils.h" -#include "access/heapam.h" -#include "storage/ipc.h" -#include "catalog/pg_type.h" #include "foreign/fdwapi.h" +#include "fmgr.h" PG_MODULE_MAGIC; @@ -112,7 +103,7 @@ static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo /* - * Entry point + * Set initial values for all Postmaster's forks. */ void _PG_init(void) @@ -161,8 +152,8 @@ _PG_init(void) } /* - * Disables inheritance for partitioned by pathman relations. It must be done to - * prevent PostgresSQL from full search. + * Disables inheritance for partitioned by pathman relations. + * It must be done to prevent PostgresSQL from exhaustive search. */ void disable_inheritance(Query *parse) @@ -171,7 +162,6 @@ disable_inheritance(Query *parse) RangeTblEntry *rte; PartRelationInfo *prel; MemoryContext oldcontext; - bool found; /* If query contains CTE (WITH statement) then handle subqueries too */ disable_inheritance_cte(parse); @@ -189,10 +179,12 @@ disable_inheritance(Query *parse) if (rte->inh) { /* Look up this relation in pathman relations */ - prel = get_pathman_relation_info(rte->relid, &found); - if (prel != NULL && found) + prel = get_pathman_relation_info(rte->relid, NULL); + if (prel) { + /* We'll set this flag later */ rte->inh = false; + /* * Sometimes user uses the ONLY statement and in this case * rte->inh is also false. We should differ the case @@ -754,6 +746,118 @@ finish_least_greatest(WrapperNode *wrap, WalkerContext *context) context->hasGreatest = false; } +/* + * Append partitions (if needed) and return Oid of the partition to contain value. + * + * NB: This function should not be called directly, use create_partitions() instead. + */ +Oid +create_partitions_internal(Oid relid, Datum value, Oid value_type) +{ + int ret; + char *sql; + PartRelationInfo *prel; + FmgrInfo cmp_func; + MemoryContext old_mcxt = CurrentMemoryContext; + Oid partid = InvalidOid; /* default value */ + + if ((prel = get_pathman_relation_info(relid, NULL)) == NULL) + { + elog(LOG, "Cannot fetch PartRelationInfo for relation %u [%u]", + relid, MyProcPid); + + return InvalidOid; + } + + if ((ret = SPI_connect()) < 0) + { + elog(LOG, "create_partitions_internal(): SPI_connect returned %d", ret); + + return InvalidOid; + } + + /* Comparison function */ + fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); + + /* Perform PL procedure */ + sql = psprintf("SELECT %s.append_partitions_on_demand_internal($1, $2)", + get_namespace_name(get_pathman_schema())); + + PG_TRY(); + { + Oid oids[] = { OIDOID, value_type }; + Datum vals[] = { ObjectIdGetDatum(relid), value }; + bool nulls[] = { false, false }; + bool isnull; + + /* TODO: maybe this could be rewritten with FunctionCall */ + ret = SPI_execute_with_args(sql, 2, oids, vals, nulls, false, 0); + if (ret == SPI_OK_SELECT) + { + TupleDesc tupdesc = SPI_tuptable->tupdesc; + HeapTuple tuple = SPI_tuptable->vals[0]; + + Assert(SPI_processed == 1); + + partid = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 1, &isnull)); + } + } + PG_CATCH(); + { + ErrorData *edata; + + MemoryContextSwitchTo(old_mcxt); + edata = CopyErrorData(); + FlushErrorState(); + + elog(LOG, "create_partitions_internal(): %s [%u]", + edata->message, MyProcPid); + + FreeErrorData(edata); + } + PG_END_TRY(); + + SPI_finish(); + + return partid; +} + +/* + * Create RANGE partitions (if needed) using either BGW or current backend. + * + * Returns Oid of the partition to store 'value'. + */ +Oid +create_partitions(Oid relid, Datum value, Oid value_type) +{ + TransactionId rel_xmin; + + /* Check that table is partitioned and fetch xmin */ + if (pathman_config_contains_relation(relid, NULL, NULL, &rel_xmin)) + { + /* If table was partitioned in some previous xact, run BGWorker */ + if (TransactionIdPrecedes(rel_xmin, GetCurrentTransactionId())) + { + elog(DEBUG2, "create_partitions(): chose BGW [%u]", MyProcPid); + return create_partitions_bg_worker(relid, value, value_type); + } + /* Else it'd better for the current backend to create partitions */ + else + { + elog(DEBUG2, "create_partitions(): chose backend [%u]", MyProcPid); + return create_partitions_internal(relid, value, value_type); + } + } + else + elog(ERROR, "Relation %u is not partitioned by pg_pathman", relid); + + return InvalidOid; /* keep compiler happy */ +} + +/* + * Given RangeEntry array and 'value', return selected + * RANGE partitions inside the WrapperNode. + */ void select_range_partitions(const Datum value, FmgrInfo *cmp_func, diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 3970eb7428..568201bd2c 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -152,8 +152,7 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) PG_RETURN_OID(found_rentry.child_oid); } - /* Start background worker to create new partitions */ - child_oid = create_partitions_bg_worker(relid, value, value_type); + child_oid = create_partitions(relid, value, value_type); LWLockRelease(pmstate->load_config_lock); LWLockRelease(pmstate->edit_partitions_lock); diff --git a/src/relation_info.c b/src/relation_info.c index ee9ee7cc06..200c50c041 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -183,7 +183,7 @@ get_pathman_relation_info(Oid relid, bool *found) bool isnull[Natts_pathman_config]; /* Check that PATHMAN_CONFIG table contains this relation */ - if (pathman_config_contains_relation(relid, values, isnull)) + if (pathman_config_contains_relation(relid, values, isnull, NULL)) { PartType part_type; const char *attname; @@ -355,7 +355,7 @@ try_syscache_parent_search(Oid partition, PartParentSearch *status) parent = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent; /* Check that PATHMAN_CONFIG contains this table */ - if (pathman_config_contains_relation(parent, NULL, NULL)) + if (pathman_config_contains_relation(parent, NULL, NULL, NULL)) { /* We've found the entry, update status */ if (status) *status = PPS_ENTRY_FOUND; diff --git a/src/worker.c b/src/worker.c index 355d8bff9b..346b53a6e0 100644 --- a/src/worker.c +++ b/src/worker.c @@ -21,23 +21,22 @@ #include "postmaster/bgworker.h" #include "storage/dsm.h" #include "utils/datum.h" -#include "utils/snapmgr.h" #include "utils/typcache.h" #include "utils/lsyscache.h" +#include "utils/resowner.h" static void bg_worker_load_config(const char *bgw_name); static void bg_worker_main(Datum main_arg); -static Oid append_partitions(Oid relid, Datum value, - Oid value_type, volatile bool *crashed); +static const char *create_partitions_bgw = "CreatePartitionsWorker"; + /* * Store args, result and execution status of CreatePartitionsWorker. */ typedef struct { - bool crashed; /* has bgw crashed? */ Oid result; /* target partition */ Oid dbid; Oid partitioned_table; @@ -66,8 +65,10 @@ typedef struct Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tup); \ FmgrInfo finfo; \ fmgr_info(typtup->typoutput, &finfo); \ - elog(LOG, "BGW: arg->value is '%s'", \ - DatumGetCString(FunctionCall1(&finfo, datum))); \ + elog(LOG, "BGW: arg->value is '%s' [%u]", \ + DatumGetCString(FunctionCall1(&finfo, datum)), \ + MyProcPid); \ + ReleaseSysCache(tup); \ } \ } while (0) #elif @@ -108,7 +109,7 @@ static void bg_worker_load_config(const char *bgw_name) { load_config(); - elog(LOG, "%s loaded pg_pathman's config [%u]", + elog(LOG, "%s: loaded pg_pathman's config [%u]", bgw_name, MyProcPid); } @@ -134,7 +135,6 @@ create_partitions_bg_worker_segment(Oid relid, Datum value, Oid value_type) /* Initialize BGW args */ args = (PartitionArgs *) dsm_segment_address(segment); - args->crashed = true; /* default value */ args->result = InvalidOid; args->dbid = MyDatabaseId; args->partitioned_table = relid; @@ -158,15 +158,14 @@ Oid create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) { #define HandleError(condition, new_state) \ - if (condition) { exec_state = (new_state); goto handle_exec_state; } + if (condition) { exec_state = (new_state); goto handle_bg_exec_state; } /* Execution state to be checked */ enum { BGW_OK = 0, /* everything is fine (default) */ BGW_COULD_NOT_START, /* could not start worker */ - BGW_PM_DIED, /* postmaster died */ - BGW_CRASHED /* worker crashed */ + BGW_PM_DIED /* postmaster died */ } exec_state = BGW_OK; BackgroundWorker worker; @@ -193,6 +192,11 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) worker.bgw_main_arg = UInt32GetDatum(segment_handle); worker.bgw_main = bg_worker_main; + /* Set worker's name */ + memcpy((void *) &worker.bgw_name, + (const void *) create_partitions_bgw, + strlen(create_partitions_bgw)); + /* Start dynamic worker */ bgw_started = RegisterDynamicBackgroundWorker(&worker, &bgw_handle); HandleError(bgw_started == false, BGW_COULD_NOT_START); @@ -210,7 +214,7 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) /* end execution */ -handle_exec_state: +handle_bg_exec_state: /* Free dsm segment */ dsm_detach(segment); @@ -223,12 +227,8 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) case BGW_PM_DIED: ereport(ERROR, - (errmsg("Postmaster died during the pg_pathman background worker process"), - errhint("More details may be available in the server log."))); - break; - - case BGW_CRASHED: - elog(ERROR, "Could not create partition due to background worker crash"); + (errmsg("Postmaster died during the pg_pathman's background worker process"), + errhint("More details may be available in the server log."))); break; default: @@ -236,7 +236,9 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) } if (child_oid == InvalidOid) - elog(ERROR, "Attempt to append new partitions to relation %u failed", relid); + elog(ERROR, + "Attempt to append new partitions to relation \"%s\" failed", + get_rel_name(relid)); return child_oid; } @@ -247,111 +249,44 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) static void bg_worker_main(Datum main_arg) { - const char *bgw_name = "CreatePartitionsWorker"; dsm_handle handle = DatumGetUInt32(main_arg); dsm_segment *segment; PartitionArgs *args; Datum value; /* Create resource owner */ - CurrentResourceOwner = ResourceOwnerCreate(NULL, bgw_name); + CurrentResourceOwner = ResourceOwnerCreate(NULL, create_partitions_bgw); if (!handle) - elog(ERROR, "%s: invalid dsm_handle", bgw_name); + elog(ERROR, "%s: invalid dsm_handle [%u]", + create_partitions_bgw, MyProcPid); /* Attach to dynamic shared memory */ if ((segment = dsm_attach(handle)) == NULL) - elog(ERROR, "%s: cannot attach to segment", bgw_name); + elog(ERROR, "%s: cannot attach to segment [%u]", + create_partitions_bgw, MyProcPid); args = dsm_segment_address(segment); /* Establish connection and start transaction */ BackgroundWorkerInitializeConnectionByOid(args->dbid, InvalidOid); + StartTransactionCommand(); - SPI_connect(); - PushActiveSnapshot(GetTransactionSnapshot()); /* Initialize pg_pathman's local config */ - bg_worker_load_config(bgw_name); + bg_worker_load_config(create_partitions_bgw); + /* Upack Datum from segment to 'value' */ UnpackDatumFromByteArray(&args->value, value, args->value_size, args->value_byval, args->value_type); /* Create partitions */ - args->result = append_partitions(args->partitioned_table, - value, - args->value_type, - &args->crashed); - - /* Cleanup */ - SPI_finish(); - PopActiveSnapshot(); + args->result = create_partitions_internal(args->partitioned_table, + value, /* unpacked Datum */ + args->value_type); + CommitTransactionCommand(); dsm_detach(segment); } - -/* - * Append partitions and return Oid of the partition that contains value - */ -static Oid -append_partitions(Oid relid, Datum value, Oid value_type, volatile bool *crashed) -{ - Oid oids[] = { OIDOID, value_type }; - Datum vals[] = { ObjectIdGetDatum(relid), value }; - bool nulls[] = { false, false }; - char *sql; - PartRelationInfo *prel; - FmgrInfo cmp_func; - MemoryContext old_mcxt = CurrentMemoryContext; - - *crashed = true; /* write default value */ - - if ((prel = get_pathman_relation_info(relid, NULL)) == NULL) - elog(ERROR, "BGW: cannot fetch PartRelationInfo for relation %u", relid); - - /* Comparison function */ - fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); - - /* Perform PL procedure */ - sql = psprintf("SELECT %s.append_partitions_on_demand_internal($1, $2)", - get_namespace_name(get_pathman_schema())); - PG_TRY(); - { - int ret; - Oid partid = InvalidOid; - bool isnull; - - ret = SPI_execute_with_args(sql, 2, oids, vals, nulls, false, 0); - if (ret == SPI_OK_SELECT) - { - TupleDesc tupdesc = SPI_tuptable->tupdesc; - HeapTuple tuple = SPI_tuptable->vals[0]; - - Assert(SPI_processed == 1); - - partid = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 1, &isnull)); - - /* Update relation info */ - /* TODO: mark ShmemRelationInfo as 'dirty' to invalidate cache */ - } - - *crashed = false; - return partid; - } - PG_CATCH(); - { - ErrorData *edata; - - MemoryContextSwitchTo(old_mcxt); - edata = CopyErrorData(); - FlushErrorState(); - - elog(LOG, "BGW: %s", edata->message); - FreeErrorData(edata); - - return InvalidOid; /* something bad happened */ - } - PG_END_TRY(); -} From 8f87c537505670d5dac30a0ff74cd67d56896d51 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 9 Aug 2016 18:05:54 +0300 Subject: [PATCH 031/184] Rewrite all mixed-sign comparisons, rewrite some plpgsql functions to C (append_partitions_on_demand, get_range_condition, is_date_type, is_attribute_nullable, get_attribute_type_name), introduce 'delayed invalidation' list & finish_delayed_invalidation() function, group functions in utils.h --- hash.sql | 3 +- init.sql | 245 +++++++++++++-------------- range.sql | 152 ++--------------- src/dsm_array.c | 18 +- src/dsm_array.h | 2 + src/hooks.c | 68 ++++---- src/init.c | 18 +- src/nodes_common.c | 28 ++- src/nodes_common.h | 2 +- src/partition_filter.c | 70 +++++--- src/partition_filter.h | 11 +- src/pathman.h | 70 +++----- src/pg_pathman.c | 375 +++++++++++++++++++++++++---------------- src/pl_funcs.c | 161 +++++++++++++++--- src/relation_info.c | 167 ++++++++++++++++-- src/relation_info.h | 16 +- src/runtimeappend.h | 1 - src/utils.c | 102 +++++++++-- src/utils.h | 60 ++++--- src/worker.c | 36 +--- 20 files changed, 939 insertions(+), 666 deletions(-) diff --git a/hash.sql b/hash.sql index e15fcc505a..2accbe1d29 100644 --- a/hash.sql +++ b/hash.sql @@ -68,7 +68,8 @@ BEGIN RETURN partitions_count; END -$$ LANGUAGE plpgsql; +$$ LANGUAGE plpgsql +SET client_min_messages = WARNING; /* * Creates an update trigger diff --git a/init.sql b/init.sql index 22d0707253..b5aae811d2 100644 --- a/init.sql +++ b/init.sql @@ -1,7 +1,7 @@ /* ------------------------------------------------------------------------ * * init.sql - * Creates config table and provides common utility functions + * Creates config table and provides common utility functions * * Copyright (c) 2015-2016, Postgres Professional * @@ -27,57 +27,9 @@ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config ( CHECK (parttype >= 1 OR parttype <= 2) /* check for allowed part types */ ); -SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config', ''); - -CREATE OR REPLACE FUNCTION @extschema@.on_create_partitions(relid OID) -RETURNS VOID AS 'pg_pathman', 'on_partitions_created' LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION @extschema@.on_update_partitions(relid OID) -RETURNS VOID AS 'pg_pathman', 'on_partitions_updated' LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION @extschema@.on_remove_partitions(relid OID) -RETURNS VOID AS 'pg_pathman', 'on_partitions_removed' LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION @extschema@.find_or_create_range_partition(relid OID, value ANYELEMENT) -RETURNS OID AS 'pg_pathman', 'find_or_create_range_partition' LANGUAGE C STRICT; - - -/* - * Returns min and max values for specified RANGE partition. - */ -CREATE OR REPLACE FUNCTION @extschema@.get_partition_range( - parent_relid OID, partition_relid OID, dummy ANYELEMENT) -RETURNS ANYARRAY AS 'pg_pathman', 'get_partition_range' LANGUAGE C STRICT; - - -/* - * Returns N-th range (in form of array) - */ -CREATE OR REPLACE FUNCTION @extschema@.get_range_by_idx( - parent_relid OID, idx INTEGER, dummy ANYELEMENT) -RETURNS ANYARRAY AS 'pg_pathman', 'get_range_by_idx' LANGUAGE C STRICT; - -/* - * Returns min value of the first range for relation - */ -CREATE OR REPLACE FUNCTION @extschema@.get_min_range_value( - parent_relid OID, dummy ANYELEMENT) -RETURNS ANYELEMENT AS 'pg_pathman', 'get_min_range_value' LANGUAGE C STRICT; -/* - * Returns max value of the last range for relation - */ -CREATE OR REPLACE FUNCTION @extschema@.get_max_range_value( - parent_relid OID, dummy ANYELEMENT) -RETURNS ANYELEMENT AS 'pg_pathman', 'get_max_range_value' LANGUAGE C STRICT; +SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config', ''); -/* - * Checks if range overlaps with existing partitions. - * Returns TRUE if overlaps and FALSE otherwise. - */ -CREATE OR REPLACE FUNCTION @extschema@.check_overlap( - parent_relid OID, range_min ANYELEMENT, range_max ANYELEMENT) -RETURNS BOOLEAN AS 'pg_pathman', 'check_overlap' LANGUAGE C STRICT; /* * Copy rows to partitions @@ -111,7 +63,6 @@ END $$ LANGUAGE plpgsql; - /* * Disable pathman partitioning for specified relation */ @@ -130,43 +81,6 @@ END $$ LANGUAGE plpgsql; - -/* - * Returns attribute type name for relation - */ -CREATE OR REPLACE FUNCTION @extschema@.get_attribute_type_name( - p_relation REGCLASS - , p_attname TEXT - , OUT p_atttype TEXT) -RETURNS TEXT AS -$$ -BEGIN - SELECT typname::TEXT INTO p_atttype - FROM pg_type JOIN pg_attribute on atttypid = "oid" - WHERE attrelid = p_relation::oid and attname = lower(p_attname); -END -$$ -LANGUAGE plpgsql; - - -/* - * Checks if attribute is nullable - */ -CREATE OR REPLACE FUNCTION @extschema@.is_attribute_nullable( - p_relation REGCLASS - , p_attname TEXT - , OUT p_nullable BOOLEAN) -RETURNS BOOLEAN AS -$$ -BEGIN - SELECT NOT attnotnull INTO p_nullable - FROM pg_type JOIN pg_attribute on atttypid = "oid" - WHERE attrelid = p_relation::oid and attname = lower(p_attname); -END -$$ -LANGUAGE plpgsql; - - /* * Aggregates several common relation checks before partitioning. Suitable for every partitioning type. */ @@ -217,17 +131,6 @@ END $$ LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION @extschema@.get_plain_relname(cls regclass) -RETURNS TEXT AS -$$ -BEGIN - RETURN relname FROM pg_class WHERE oid = cls::oid; -END -$$ -LANGUAGE plpgsql; - - /* * Validates relation name. It must be schema qualified */ @@ -240,7 +143,6 @@ END $$ LANGUAGE plpgsql; - /* * Returns schema-qualified name for table */ @@ -288,18 +190,6 @@ END $$ LANGUAGE plpgsql; -/* - * Check if regclass if date or timestamp - */ -CREATE OR REPLACE FUNCTION @extschema@.is_date(cls REGTYPE) -RETURNS BOOLEAN AS -$$ -BEGIN - RETURN cls IN ('timestamp'::regtype, 'timestamptz'::regtype, 'date'::regtype); -END -$$ -LANGUAGE plpgsql; - /* * DDL trigger that deletes entry from pathman_config table */ @@ -322,22 +212,6 @@ END $$ LANGUAGE plpgsql; -CREATE EVENT TRIGGER pathman_ddl_trigger -ON sql_drop -EXECUTE PROCEDURE @extschema@.pathman_ddl_trigger_func(); - -/* - * Acquire partitions lock to prevent concurrent partitions creation - */ -CREATE OR REPLACE FUNCTION @extschema@.acquire_partitions_lock() -RETURNS VOID AS 'pg_pathman', 'acquire_partitions_lock' LANGUAGE C STRICT; - -/* - * Release partitions lock - */ -CREATE OR REPLACE FUNCTION @extschema@.release_partitions_lock() -RETURNS VOID AS 'pg_pathman', 'release_partitions_lock' LANGUAGE C STRICT; - /* * Drop trigger */ @@ -410,18 +284,122 @@ END $$ LANGUAGE plpgsql SET pg_pathman.enable_partitionfilter = off; + + +CREATE EVENT TRIGGER pathman_ddl_trigger +ON sql_drop +EXECUTE PROCEDURE @extschema@.pathman_ddl_trigger_func(); + + +/* + * Acquire partitions lock to prevent concurrent partitions creation + */ +CREATE OR REPLACE FUNCTION @extschema@.acquire_partitions_lock() +RETURNS VOID AS 'pg_pathman', 'acquire_partitions_lock' +LANGUAGE C STRICT; + +/* + * Release partitions lock + */ +CREATE OR REPLACE FUNCTION @extschema@.release_partitions_lock() +RETURNS VOID AS 'pg_pathman', 'release_partitions_lock' +LANGUAGE C STRICT; + +/* + * Check if regclass is date or timestamp + */ +CREATE OR REPLACE FUNCTION @extschema@.is_date_type(cls REGTYPE) +RETURNS BOOLEAN AS 'pg_pathman', 'is_date_type' +LANGUAGE C STRICT; + +/* + * Checks if range overlaps with existing partitions. + * Returns TRUE if overlaps and FALSE otherwise. + */ +CREATE OR REPLACE FUNCTION @extschema@.check_overlap( + parent_relid OID, range_min ANYELEMENT, range_max ANYELEMENT) +RETURNS BOOLEAN AS 'pg_pathman', 'check_overlap' +LANGUAGE C STRICT; + + +CREATE OR REPLACE FUNCTION @extschema@.on_create_partitions(relid OID) +RETURNS VOID AS 'pg_pathman', 'on_partitions_created' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION @extschema@.on_update_partitions(relid OID) +RETURNS VOID AS 'pg_pathman', 'on_partitions_updated' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION @extschema@.on_remove_partitions(relid OID) +RETURNS VOID AS 'pg_pathman', 'on_partitions_removed' +LANGUAGE C STRICT; + + +CREATE OR REPLACE FUNCTION @extschema@.find_or_create_range_partition(relid OID, value ANYELEMENT) +RETURNS OID AS 'pg_pathman', 'find_or_create_range_partition' +LANGUAGE C STRICT; + + +/* + * Returns min and max values for specified RANGE partition. + */ +CREATE OR REPLACE FUNCTION @extschema@.get_partition_range( + parent_relid OID, partition_relid OID, dummy ANYELEMENT) +RETURNS ANYARRAY AS 'pg_pathman', 'get_partition_range' +LANGUAGE C STRICT; + + +/* + * Returns N-th range (in form of array) + */ +CREATE OR REPLACE FUNCTION @extschema@.get_range_by_idx( + parent_relid OID, idx INTEGER, dummy ANYELEMENT) +RETURNS ANYARRAY AS 'pg_pathman', 'get_range_by_idx' +LANGUAGE C STRICT; + +/* + * Returns min value of the first range for relation + */ +CREATE OR REPLACE FUNCTION @extschema@.get_min_range_value( + parent_relid OID, dummy ANYELEMENT) +RETURNS ANYELEMENT AS 'pg_pathman', 'get_min_range_value' +LANGUAGE C STRICT; + +/* + * Returns max value of the last range for relation + */ +CREATE OR REPLACE FUNCTION @extschema@.get_max_range_value( + parent_relid OID, dummy ANYELEMENT) +RETURNS ANYELEMENT AS 'pg_pathman', 'get_max_range_value' +LANGUAGE C STRICT; + /* * Returns hash function OID for specified type */ CREATE OR REPLACE FUNCTION @extschema@.get_type_hash_func(OID) -RETURNS OID AS 'pg_pathman', 'get_type_hash_func' LANGUAGE C STRICT; +RETURNS OID AS 'pg_pathman', 'get_type_hash_func' +LANGUAGE C STRICT; /* * Calculates hash for integer value */ CREATE OR REPLACE FUNCTION @extschema@.get_hash(INTEGER, INTEGER) -RETURNS INTEGER AS 'pg_pathman', 'get_hash' LANGUAGE C STRICT; +RETURNS INTEGER AS 'pg_pathman', 'get_hash' +LANGUAGE C STRICT; +/* + * Checks if attribute is nullable + */ +CREATE OR REPLACE FUNCTION @extschema@.is_attribute_nullable(REGCLASS, TEXT) +RETURNS BOOLEAN AS 'pg_pathman', 'is_attribute_nullable' +LANGUAGE C STRICT; + +/* + * Returns attribute type name for relation + */ +CREATE OR REPLACE FUNCTION @extschema@.get_attribute_type_name(REGCLASS, TEXT) +RETURNS TEXT AS 'pg_pathman', 'get_attribute_type_name' +LANGUAGE C STRICT; /* * Build check constraint name for a specified relation's column @@ -433,3 +411,10 @@ LANGUAGE C STRICT; CREATE OR REPLACE FUNCTION @extschema@.build_check_constraint_name(REGCLASS, TEXT) RETURNS TEXT AS 'pg_pathman', 'build_check_constraint_name_attname' LANGUAGE C STRICT; + +/* + * DEBUG: Place this inside some plpgsql fuction and set breakpoint. + */ +CREATE OR REPLACE FUNCTION @extschema@.debug_capture() +RETURNS VOID AS 'pg_pathman', 'debug_capture' +LANGUAGE C STRICT; diff --git a/range.sql b/range.sql index 37f890e856..3f344ce753 100644 --- a/range.sql +++ b/range.sql @@ -314,7 +314,7 @@ END $$ LANGUAGE plpgsql; /* - * + * Check RANGE partition boundaries. */ CREATE OR REPLACE FUNCTION @extschema@.check_boundaries( p_relation REGCLASS @@ -334,9 +334,9 @@ BEGIN p_attribute, p_attribute, p_relation::text, p_attribute) INTO v_count, v_min, v_max; - /* check if column has NULL values */ + /* check that column has NULL values */ IF v_count > 0 AND (v_min IS NULL OR v_max IS NULL) THEN - RAISE EXCEPTION '''%'' column has NULL values', p_attribute; + RAISE EXCEPTION '''%'' column contains NULL values', p_attribute; END IF; /* check lower boundary */ @@ -353,40 +353,6 @@ BEGIN END $$ LANGUAGE plpgsql; -/* - * Formats range condition. Utility function. - */ -CREATE OR REPLACE FUNCTION @extschema@.get_range_condition( - p_attname TEXT - , p_start_value ANYELEMENT - , p_end_value ANYELEMENT) -RETURNS TEXT AS -$$ -DECLARE - v_type REGTYPE; - v_sql TEXT; - -BEGIN - /* determine the type of values */ - v_type := pg_typeof(p_start_value); - - /* we cannot use placeholders in DDL queries, so we are using format(...) */ - IF v_type IN ('date'::regtype, 'timestamp'::regtype, 'timestamptz'::regtype) THEN - v_sql := '%s >= ''%s'' AND %s < ''%s'''; - ELSE - v_sql := '%s >= %s AND %s < %s'; - END IF; - - v_sql := format(v_sql - , p_attname - , p_start_value - , p_attname - , p_end_value); - RETURN v_sql; -END -$$ -LANGUAGE plpgsql; - /* * Creates new RANGE partition. Returns partition name */ @@ -512,14 +478,12 @@ BEGIN END IF; /* Create new partition */ - RAISE NOTICE 'Creating new partition...'; v_new_partition := @extschema@.create_single_range_partition( @extschema@.get_schema_qualified_name(v_parent_relid::regclass, '.'), p_value, p_range[2]); /* Copy data */ - RAISE NOTICE 'Copying data to new partition...'; v_cond := @extschema@.get_range_condition(v_attname, p_value, p_range[2]); EXECUTE format(' WITH part_data AS ( @@ -530,7 +494,6 @@ BEGIN , v_new_partition); /* Alter original partition */ - RAISE NOTICE 'Altering original partition...'; v_cond := @extschema@.get_range_condition(v_attname, p_range[1], p_value); v_check_name := @extschema@.build_check_constraint_name(p_partition, v_attname); EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s' @@ -543,8 +506,6 @@ BEGIN /* Tell backend to reload configuration */ PERFORM @extschema@.on_update_partitions(v_parent_relid::oid); - - RAISE NOTICE 'Done!'; END $$ LANGUAGE plpgsql; @@ -601,8 +562,6 @@ BEGIN /* Tell backend to reload configuration */ PERFORM @extschema@.on_update_partitions(v_parent_relid1::oid); - - RAISE NOTICE 'Done!'; END $$ LANGUAGE plpgsql; @@ -657,7 +616,6 @@ BEGIN , greatest(p_range[2], p_range[4])); /* Alter first partition */ - RAISE NOTICE 'Altering first partition...'; v_check_name := @extschema@.build_check_constraint_name(p_part1, v_attname); EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s' , p_part1::text @@ -668,14 +626,12 @@ BEGIN , v_cond); /* Copy data from second partition to the first one */ - RAISE NOTICE 'Copying data...'; EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) INSERT INTO %s SELECT * FROM part_data' , p_part2::text , p_part1::text); /* Remove second partition */ - RAISE NOTICE 'Dropping second partition...'; EXECUTE format('DROP TABLE %s', p_part2::text); END $$ LANGUAGE plpgsql; @@ -719,7 +675,6 @@ BEGIN /* Release lock */ PERFORM @extschema@.release_partitions_lock(); - RAISE NOTICE 'Done!'; RETURN v_part_name; EXCEPTION WHEN others THEN @@ -741,8 +696,8 @@ DECLARE BEGIN p_range := @extschema@.get_range_by_idx(p_relation::oid, -1, 0); - RAISE NOTICE 'Appending new partition...'; - IF @extschema@.is_date(p_atttype::regtype) THEN + + IF @extschema@.is_date_type(p_atttype::regtype) THEN v_part_name := @extschema@.create_single_range_partition(p_relation , p_range[2] , p_range[2] + p_interval::interval); @@ -795,7 +750,6 @@ BEGIN /* Release lock */ PERFORM @extschema@.release_partitions_lock(); - RAISE NOTICE 'Done!'; RETURN v_part_name; EXCEPTION WHEN others THEN @@ -817,9 +771,8 @@ DECLARE BEGIN p_range := @extschema@.get_range_by_idx(p_relation::oid, 0, 0); - RAISE NOTICE 'Prepending new partition...'; - IF @extschema@.is_date(p_atttype::regtype) THEN + IF @extschema@.is_date_type(p_atttype::regtype) THEN v_part_name := @extschema@.create_single_range_partition(p_relation , p_range[1] - p_interval::interval , p_range[1]); @@ -867,7 +820,6 @@ BEGIN /* Release lock */ PERFORM @extschema@.release_partitions_lock(); - RAISE NOTICE 'Done!'; RETURN v_part_name; EXCEPTION WHEN others THEN @@ -1129,89 +1081,11 @@ END $$ LANGUAGE plpgsql; /* - * Internal function used to create new partitions on insert or update trigger. - * Invoked from C-function find_or_create_range_partition(). + * Construct CHECK constraint condition for a range partition. */ -CREATE OR REPLACE FUNCTION @extschema@.append_partitions_on_demand_internal( - p_relid OID - , p_new_value ANYELEMENT) -RETURNS OID AS -$$ -DECLARE - i INTEGER := 0; - v_part TEXT; - v_interval TEXT; - v_attname TEXT; - v_min p_new_value%TYPE; - v_max p_new_value%TYPE; - v_cur_value p_new_value%TYPE; - v_next_value p_new_value%TYPE; - v_is_date BOOLEAN; - -BEGIN - /* get attribute name and interval */ - SELECT attname, range_interval - FROM @extschema@.pathman_config - WHERE partrel = p_relid - INTO v_attname, v_interval; - - IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', - quote_ident(p_relid::regclass::text); - END IF; - - v_min := @extschema@.get_min_range_value(p_relid::regclass::oid, p_new_value); - v_max := @extschema@.get_max_range_value(p_relid::regclass::oid, p_new_value); - - v_is_date := @extschema@.is_date(pg_typeof(p_new_value)::regtype); - - IF p_new_value >= v_max THEN - v_cur_value := v_max; - WHILE v_cur_value <= p_new_value AND i < 1000 - LOOP - IF v_is_date THEN - v_next_value := v_cur_value + v_interval::interval; - ELSE - EXECUTE format('SELECT $1 + $2::%s', pg_typeof(p_new_value)) - USING v_cur_value, v_interval - INTO v_next_value; - END IF; - - v_part := @extschema@.create_single_range_partition( - @extschema@.get_schema_qualified_name(p_relid::regclass, '.') - , v_cur_value - , v_next_value); - i := i + 1; - v_cur_value := v_next_value; - RAISE NOTICE 'partition % created', v_part; - END LOOP; - ELSIF p_new_value <= v_min THEN - v_cur_value := v_min; - WHILE v_cur_value >= p_new_value AND i < 1000 - LOOP - IF v_is_date THEN - v_next_value := v_cur_value - v_interval::interval; - ELSE - EXECUTE format('SELECT $1 - $2::%s', pg_typeof(p_new_value)) - USING v_cur_value, v_interval - INTO v_next_value; - END IF; - - v_part := @extschema@.create_single_range_partition( - @extschema@.get_schema_qualified_name(p_relid::regclass, '.') - , v_next_value - , v_cur_value); - i := i + 1; - v_cur_value := v_next_value; - RAISE NOTICE 'partition % created', v_part; - END LOOP; - ELSE - RAISE EXCEPTION 'Could not create partition'; - END IF; - - IF i > 0 THEN - RETURN v_part::regclass::oid; - END IF; - RETURN NULL; -END -$$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION @extschema@.get_range_condition( + p_attname TEXT, + p_start_value ANYELEMENT, + p_end_value ANYELEMENT) +RETURNS TEXT AS 'pg_pathman', 'get_range_condition' +LANGUAGE C STRICT; diff --git a/src/dsm_array.c b/src/dsm_array.c index d55c94c778..62039fb895 100644 --- a/src/dsm_array.c +++ b/src/dsm_array.c @@ -133,12 +133,12 @@ init_dsm_segment(size_t blocks_count, size_t block_size) void init_dsm_table(size_t block_size, size_t start, size_t end) { - int i; - BlockHeaderPtr header; - char *ptr = dsm_segment_address(segment); + size_t i; + BlockHeaderPtr header; + char *ptr = dsm_segment_address(segment); /* create blocks */ - for (i=start; ioffset / dsm_cfg->block_size; - int i = 0; - char *ptr = dsm_segment_address(segment); - BlockHeaderPtr header = (BlockHeaderPtr) &ptr[start * dsm_cfg->block_size]; - size_t blocks_count = get_length(header); + size_t i = 0, + start = arr->offset / dsm_cfg->block_size; + char *ptr = dsm_segment_address(segment); + BlockHeaderPtr header = (BlockHeaderPtr) &ptr[start * dsm_cfg->block_size]; + size_t blocks_count = get_length(header); /* set blocks free */ for(; i < blocks_count; i++) diff --git a/src/dsm_array.h b/src/dsm_array.h index 3da4794867..2b7184d829 100644 --- a/src/dsm_array.h +++ b/src/dsm_array.h @@ -29,6 +29,8 @@ typedef struct #define InvalidDsmArray { 0, 0, 0, 0 } +#define INITIAL_BLOCKS_COUNT 8192 + /* Dynamic shared memory functions */ Size estimate_dsm_config_size(void); diff --git a/src/hooks.c b/src/hooks.c index a62ce2185c..26a3ef7cb8 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -45,7 +45,6 @@ pathman_join_pathlist_hook(PlannerInfo *root, ListCell *lc; WalkerContext context; double paramsel; - bool context_initialized; bool innerrel_rinfo_contains_part_attr; /* Call hooks set by other extensions */ @@ -87,16 +86,12 @@ pathman_join_pathlist_hook(PlannerInfo *root, otherclauses = NIL; } - context_initialized = false; paramsel = 1.0; foreach (lc, joinclauses) { WrapperNode *wrap; - /* We aim to persist cached context->ranges */ - InitWalkerContextCustomNode(&context, inner_prel, NULL, - CurrentMemoryContext, false, - &context_initialized); + InitWalkerContext(&context, inner_prel, NULL, false); wrap = walk_expr_tree((Expr *) lfirst(lc), &context); paramsel *= wrap->paramsel; @@ -234,16 +229,14 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); /* Make wrappers over restrictions and collect final rangeset */ - InitWalkerContext(&context, prel, NULL, CurrentMemoryContext, false); + InitWalkerContext(&context, prel, NULL, false); wrappers = NIL; foreach(lc, rel->baserestrictinfo) { WrapperNode *wrap; - RestrictInfo *rinfo = (RestrictInfo*) lfirst(lc); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); wrap = walk_expr_tree(rinfo->clause, &context); - if (!lc->next) - finish_least_greatest(wrap, &context); paramsel *= wrap->paramsel; wrappers = lappend(wrappers, wrap); @@ -462,11 +455,16 @@ pathman_post_parse_analysis_hook(ParseState *pstate, Query *query) /* Load config if pg_pathman exists & it's still necessary */ if (IsPathmanEnabled() && initialization_needed && + /* Now evaluate the most expensive clause */ get_pathman_schema() != InvalidOid) { - load_config(); + load_config(); /* perform main cache initialization */ } + /* Finish delayed invalidation jobs */ + if (IsPathmanReady()) + finish_delayed_invalidation(); + inheritance_disabled_relids = NIL; inheritance_enabled_relids = NIL; } @@ -500,31 +498,39 @@ pathman_relcache_hook(Datum arg, Oid relid) /* Invalidate PartParentInfo cache if needed */ partitioned_table = forget_parent_of_partition(relid, &search); - /* It is (or was) a valid partition */ - if (partitioned_table != InvalidOid) + switch (search) { - elog(DEBUG2, "Invalidation message for partition %u [%u]", - relid, MyProcPid); + /* It is (or was) a valid partition */ + case PPS_ENTRY_PART_PARENT: + case PPS_ENTRY_PARENT: + { + elog(DEBUG2, "Invalidation message for partition %u [%u]", + relid, MyProcPid); - /* Invalidate PartRelationInfo cache */ - invalidate_pathman_relation_info(partitioned_table, NULL); + delay_invalidation_parent_rel(partitioned_table); + } + break; - /* TODO: add table to 'invalidated_rel' list */ - } + /* Both syscache and pathman's cache say it isn't a partition */ + case PPS_ENTRY_NOT_FOUND: + { + elog(DEBUG2, "Invalidation message for relation %u [%u]", + relid, MyProcPid); + } + break; - /* Both syscache and pathman's cache say it isn't a partition */ - else if (search == PPS_ENTRY_NOT_FOUND) - { - elog(DEBUG2, "Invalidation message for relation %u [%u]", - relid, MyProcPid); - } + /* We can't say anything (state is not transactional) */ + case PPS_NOT_SURE: + { + elog(DEBUG2, "Invalidation message for vague relation %u [%u]", + relid, MyProcPid); - /* We can't say anything (state is not transactional) */ - else if (search == PPS_NOT_SURE) - { - elog(DEBUG2, "Invalidation message for vague relation %u [%u]", - relid, MyProcPid); + delay_invalidation_vague_rel(relid); + } + break; - /* TODO: add table to 'PPS_NOT_SURE' list */ + default: + elog(ERROR, "Not implemented yet"); + break; } } diff --git a/src/init.c b/src/init.c index 2b51822db7..208ffcd850 100644 --- a/src/init.c +++ b/src/init.c @@ -396,6 +396,9 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, attrs[Anum_pathman_config_partrel - 1]-> atttypid == REGCLASSOID); + /* Check that number of columns == Natts_pathman_config */ + Assert(RelationGetDescr(rel)->natts == Natts_pathman_config); + snapshot = RegisterSnapshot(GetLatestSnapshot()); scan = heap_beginscan(rel, snapshot, 1, key); @@ -405,7 +408,15 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, /* Extract data if necessary */ if (values && isnull) - heap_deformtuple(htup, RelationGetDescr(rel), values, isnull); + { + heap_deform_tuple(htup, RelationGetDescr(rel), values, isnull); + + /* Perform checks for non-NULL columns */ + Assert(!isnull[Anum_pathman_config_id - 1]); + Assert(!isnull[Anum_pathman_config_partrel - 1]); + Assert(!isnull[Anum_pathman_config_attname - 1]); + Assert(!isnull[Anum_pathman_config_parttype - 1]); + } /* Set xmin if necessary */ if (xmin) @@ -457,6 +468,9 @@ read_pathman_config(void) attrs[Anum_pathman_config_partrel - 1]-> atttypid == REGCLASSOID); + /* Check that number of columns == Natts_pathman_config */ + Assert(RelationGetDescr(rel)->natts == Natts_pathman_config); + snapshot = RegisterSnapshot(GetLatestSnapshot()); scan = heap_beginscan(rel, snapshot, 0, NULL); @@ -485,7 +499,7 @@ read_pathman_config(void) /* Check that relation 'relid' exists */ if (get_rel_type_id(relid) == InvalidOid) { - DisablePathman(); + DisablePathman(); /* disable pg_pathman since config is broken */ ereport(ERROR, (errmsg("Table \"%s\" contains nonexistent relation %u", diff --git a/src/nodes_common.c b/src/nodes_common.c index 849a778501..5caca19dc5 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -241,19 +241,19 @@ unpack_runtimeappend_private(RuntimeAppendState *scan_state, CustomScan *cscan) /* Transform partition ranges into plain array of partition Oids */ Oid * -get_partition_oids(List *ranges, int *n, PartRelationInfo *prel) +get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel) { ListCell *range_cell; - int allocated = INITIAL_ALLOC_NUM; - int used = 0; + uint32 allocated = INITIAL_ALLOC_NUM; + uint32 used = 0; Oid *result = (Oid *) palloc(allocated * sizeof(Oid)); Oid *children = PrelGetChildrenArray(prel, true); foreach (range_cell, ranges) { - int i; - int a = lfirst_irange(range_cell).ir_lower; - int b = lfirst_irange(range_cell).ir_upper; + uint32 i; + uint32 a = lfirst_irange(range_cell).ir_lower, + b = lfirst_irange(range_cell).ir_upper; for (i = a; i <= b; i++) { @@ -263,7 +263,7 @@ get_partition_oids(List *ranges, int *n, PartRelationInfo *prel) result = repalloc(result, allocated * sizeof(Oid)); } - Assert(i < PrelChildrenCount(prel)); + Assert(i < (uint32) abs(PrelChildrenCount(prel))); result[used++] = children[i]; } } @@ -412,10 +412,6 @@ create_append_scan_state_common(CustomScan *node, unpack_runtimeappend_private(scan_state, node); - /* Fill in relation info using main table's relid */ - scan_state->prel = get_pathman_relation_info(scan_state->relid, NULL); - Assert(scan_state->prel); - scan_state->cur_plans = NULL; scan_state->ncur_plans = 0; scan_state->running_idx = 0; @@ -485,7 +481,6 @@ end_append_common(CustomScanState *node) { RuntimeAppendState *scan_state = (RuntimeAppendState *) node; - clear_walker_context(&scan_state->wcxt); clear_plan_states(&scan_state->css); hash_destroy(scan_state->children_table); } @@ -495,17 +490,18 @@ rescan_append_common(CustomScanState *node) { RuntimeAppendState *scan_state = (RuntimeAppendState *) node; ExprContext *econtext = node->ss.ps.ps_ExprContext; - PartRelationInfo *prel = scan_state->prel; + PartRelationInfo *prel; List *ranges; ListCell *lc; Oid *parts; int nparts; + prel = get_pathman_relation_info(scan_state->relid, NULL); + Assert(prel); + ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); - InitWalkerContextCustomNode(&scan_state->wcxt, scan_state->prel, - econtext, CurrentMemoryContext, false, - &scan_state->wcxt_cached); + InitWalkerContext(&scan_state->wcxt, prel, econtext, false); foreach (lc, scan_state->custom_exprs) { diff --git a/src/nodes_common.h b/src/nodes_common.h index 07aaa6e0dc..d66b02e27e 100644 --- a/src/nodes_common.h +++ b/src/nodes_common.h @@ -60,7 +60,7 @@ clear_plan_states(CustomScanState *scan_state) } } -Oid * get_partition_oids(List *ranges, int *n, PartRelationInfo *prel); +Oid * get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel); Path * create_append_path_common(PlannerInfo *root, AppendPath *inner_append, diff --git a/src/partition_filter.c b/src/partition_filter.c index 8750754083..43fe6e24cb 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -15,6 +15,7 @@ #include "utils/guc.h" #include "utils/memutils.h" #include "nodes/nodeFuncs.h" +#include "utils/lsyscache.h" bool pg_pathman_enable_partition_filter = true; @@ -116,18 +117,18 @@ partition_filter_begin(CustomScanState *node, EState *estate, int eflags) HASHCTL *result_rels_table_config = &state->result_rels_table_config; node->custom_ps = list_make1(ExecInitNode(state->subplan, estate, eflags)); - state->prel = get_pathman_relation_info(state->partitioned_table, NULL); state->savedRelInfo = NULL; memset(result_rels_table_config, 0, sizeof(HASHCTL)); result_rels_table_config->keysize = sizeof(Oid); - result_rels_table_config->entrysize = sizeof(ResultRelInfoHandle); + result_rels_table_config->entrysize = sizeof(ResultRelInfoHolder); result_rels_table = hash_create("ResultRelInfo storage", 10, result_rels_table_config, HASH_ELEM | HASH_BLOBS); state->result_rels_table = result_rels_table; + state->warning_triggered = false; } TupleTableSlot * @@ -135,7 +136,7 @@ partition_filter_exec(CustomScanState *node) { #define CopyToTempConst(const_field, attr_field) \ ( state->temp_const.const_field = \ - slot->tts_tupleDescriptor->attrs[attnum - 1]->attr_field ) + slot->tts_tupleDescriptor->attrs[prel->attnum - 1]->attr_field ) PartitionFilterState *state = (PartitionFilterState *) node; @@ -152,16 +153,33 @@ partition_filter_exec(CustomScanState *node) if (!TupIsNull(slot)) { - MemoryContext old_cxt; + PartRelationInfo *prel; - List *ranges; - int nparts; - Oid *parts; - Oid selected_partid; + MemoryContext old_cxt; - bool isnull; - AttrNumber attnum = state->prel->attnum; - Datum value = slot_getattr(slot, attnum, &isnull); + List *ranges; + int nparts; + Oid *parts; + Oid selected_partid; + + WalkerContext wcxt; + bool isnull; + Datum value; + + /* Fetch PartRelationInfo for this partitioned relation */ + prel = get_pathman_relation_info(state->partitioned_table, NULL); + if (!prel) + { + if (!state->warning_triggered) + elog(WARNING, "Relation \"%s\" is not partitioned, " + "PartitionFilter will behave as a normal INSERT", + get_rel_name_or_relid(state->partitioned_table)); + + return slot; + } + + /* Extract partitioned column value */ + value = slot_getattr(slot, prel->attnum, &isnull); /* Fill const with value ... */ state->temp_const.constvalue = value; @@ -174,15 +192,13 @@ partition_filter_exec(CustomScanState *node) CopyToTempConst(constlen, attlen); CopyToTempConst(constbyval, attbyval); - InitWalkerContextCustomNode(&state->wcxt, state->prel, econtext, - CurrentMemoryContext, true, - &state->wcxt_cached); + InitWalkerContext(&wcxt, prel, econtext, true); /* Switch to per-tuple context */ old_cxt = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); - ranges = walk_expr_tree((Expr *) &state->temp_const, &state->wcxt)->rangeset; - parts = get_partition_oids(ranges, &nparts, state->prel); + ranges = walk_expr_tree((Expr *) &state->temp_const, &wcxt)->rangeset; + parts = get_partition_oids(ranges, &nparts, prel); if (nparts > 1) elog(ERROR, "PartitionFilter selected more than one partition"); @@ -191,9 +207,6 @@ partition_filter_exec(CustomScanState *node) selected_partid = create_partitions(state->partitioned_table, state->temp_const.constvalue, state->temp_const.consttype); - - /* Now we have to refresh state->wcxt->ranges manually */ - refresh_walker_context_ranges(&state->wcxt); } else selected_partid = parts[0]; @@ -202,9 +215,8 @@ partition_filter_exec(CustomScanState *node) MemoryContextSwitchTo(old_cxt); ResetExprContext(econtext); - /* Replace main table with suitable partition */ - estate->es_result_relation_info = getResultRelInfo(selected_partid, - state); + /* Replace parent table with a suitable partition */ + estate->es_result_relation_info = getResultRelInfo(selected_partid, state); return slot; } @@ -218,10 +230,10 @@ partition_filter_end(CustomScanState *node) PartitionFilterState *state = (PartitionFilterState *) node; HASH_SEQ_STATUS stat; - ResultRelInfoHandle *rri_handle; + ResultRelInfoHolder *rri_handle; /* ResultRelInfo holder */ hash_seq_init(&stat, state->result_rels_table); - while ((rri_handle = (ResultRelInfoHandle *) hash_seq_search(&stat)) != NULL) + while ((rri_handle = (ResultRelInfoHolder *) hash_seq_search(&stat)) != NULL) { ExecCloseIndices(rri_handle->resultRelInfo); heap_close(rri_handle->resultRelInfo->ri_RelationDesc, @@ -231,8 +243,6 @@ partition_filter_end(CustomScanState *node) Assert(list_length(node->custom_ps) == 1); ExecEndNode((PlanState *) linitial(node->custom_ps)); - - clear_walker_context(&state->wcxt); } void @@ -249,6 +259,9 @@ partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *e } +/* + * Construct ResultRelInfo for a partition. + */ static ResultRelInfo * getResultRelInfo(Oid partid, PartitionFilterState *state) { @@ -263,7 +276,7 @@ getResultRelInfo(Oid partid, PartitionFilterState *state) palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(field_type)); \ } while (0) - ResultRelInfoHandle *resultRelInfoHandle; + ResultRelInfoHolder *resultRelInfoHandle; bool found; resultRelInfoHandle = hash_search(state->result_rels_table, @@ -313,6 +326,9 @@ getResultRelInfo(Oid partid, PartitionFilterState *state) resultRelInfoHandle->partid = partid; resultRelInfoHandle->resultRelInfo = resultRelInfo; + + /* Make 'range table index' point to the parent relation */ + resultRelInfo->ri_RangeTableIndex = state->savedRelInfo->ri_RangeTableIndex; } return resultRelInfoHandle->resultRelInfo; diff --git a/src/partition_filter.h b/src/partition_filter.h index dfcadcac34..d16cb0c0d4 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -23,14 +23,13 @@ typedef struct { Oid partid; ResultRelInfo *resultRelInfo; -} ResultRelInfoHandle; +} ResultRelInfoHolder; typedef struct { CustomScanState css; Oid partitioned_table; - PartRelationInfo *prel; OnConflictAction onConflictAction; ResultRelInfo *savedRelInfo; @@ -40,9 +39,7 @@ typedef struct HTAB *result_rels_table; HASHCTL result_rels_table_config; - WalkerContext wcxt; - bool wcxt_cached; /* does wcxt contain cached data, - e.g. RangeEntry array? */ + bool warning_triggered; } PartitionFilterState; @@ -52,6 +49,10 @@ extern CustomScanMethods partition_filter_plan_methods; extern CustomExecMethods partition_filter_exec_methods; +void rowmark_add_tableoids(Query *parse); + +void postprocess_lock_rows(List *rtable, Plan *plan); + void add_partition_filters(List *rtable, Plan *plan); void init_partition_filter_static_data(void); diff --git a/src/pathman.h b/src/pathman.h index bdd48704a0..eccfb77e4d 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -33,8 +33,13 @@ #error "You are trying to build pg_pathman with PostgreSQL version lower than 9.5. Please, check your environment." #endif -#define ALL NIL -#define INITIAL_BLOCKS_COUNT 8192 +/* Print Datum as CString to server log */ +#ifdef USE_ASSERT_CHECKING +#include "utils.h" +#define DebugPrintDatum(datum, typid) ( datum_to_cstring((datum), (typid)) ) +#elif +#define DebugPrintDatum(datum, typid) ( "[use --enable-cassert]" ) +#endif /* @@ -48,6 +53,9 @@ #define Anum_pathman_config_parttype 4 #define Anum_pathman_config_range_interval 5 +/* type modifier (typmod) for 'range_interval' */ +#define PATHMAN_CONFIG_interval_typmod -1 + #define PATHMAN_CONFIG_partrel_idx "pathman_config_partrel_idx" @@ -98,6 +106,7 @@ extern PathmanState *pmstate; #define DisablePathman() \ do { \ pg_pathman_enable = false; \ + initialization_needed = true; \ } while (0) @@ -110,7 +119,6 @@ search_rangerel_result search_range_partition_eq(const Datum value, const PartRelationInfo *prel, RangeEntry *out_re); -Oid create_partitions_bg_worker(Oid relid, Datum value, Oid value_type); uint32 make_hash(uint32 value, uint32 partitions); void handle_modification_query(Query *parse); @@ -121,8 +129,9 @@ void disable_inheritance_subselect(Query *parse); /* copied from allpaths.h */ void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); -void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte, - PathKey *pathkeyAsc, PathKey *pathkeyDesc); +void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, + RangeTblEntry *rte, PathKey *pathkeyAsc, + PathKey *pathkeyDesc); typedef struct { @@ -138,75 +147,38 @@ typedef struct /* Main partitioning structure */ const PartRelationInfo *prel; - /* Long-living context for cached values */ - MemoryContext persistent_mcxt; - - /* Cached values */ - const RangeEntry *ranges; /* cached RangeEntry array (copy) */ - size_t nranges; /* number of RangeEntries */ ExprContext *econtext; /* for ExecEvalExpr() */ - /* Runtime values */ - bool hasLeast, - hasGreatest; - Datum least, - greatest; - bool for_insert; /* are we in PartitionFilter now? */ } WalkerContext; /* * Usual initialization procedure for WalkerContext */ -#define InitWalkerContext(context, prel_info, ecxt, mcxt, for_ins) \ +#define InitWalkerContext(context, prel_info, ecxt, for_ins) \ do { \ (context)->prel = (prel_info); \ (context)->econtext = (ecxt); \ - (context)->ranges = NULL; \ - (context)->nranges = 0; \ - (context)->hasLeast = false; \ - (context)->hasGreatest = false; \ - (context)->persistent_mcxt = (mcxt); \ (context)->for_insert = (for_ins); \ } while (0) -/* - * We'd like to persist RangeEntry (ranges) array - * in case of range partitioning, so 'wcxt' is stored - * inside of Custom Node - */ -#define InitWalkerContextCustomNode(context, prel_info, ecxt, mcxt, for_ins, isCached) \ - do { \ - if (!*isCached) \ - { \ - (context)->prel = prel_info; \ - (context)->econtext = ecxt; \ - (context)->ranges = NULL; \ - (context)->nranges = 0; \ - (context)->persistent_mcxt = (mcxt); \ - (context)->for_insert = (for_ins); \ - *isCached = true; \ - } \ - (context)->hasLeast = false; \ - (context)->hasGreatest = false; \ - } while (0) - /* Check that WalkerContext contains ExprContext (plan execution stage) */ #define WcxtHasExprContext(wcxt) ( (wcxt)->econtext ) -Oid create_partitions_internal(Oid relid, Datum value, Oid value_type); +/* + * Functions for partition creation, use create_partitions(). + */ Oid create_partitions(Oid relid, Datum value, Oid value_type); +Oid create_partitions_bg_worker(Oid relid, Datum value, Oid value_type); +Oid create_partitions_internal(Oid relid, Datum value, Oid value_type); void select_range_partitions(const Datum value, FmgrInfo *cmp_func, const RangeEntry *ranges, - const size_t nranges, + const int nranges, const int strategy, WrapperNode *result); WrapperNode *walk_expr_tree(Expr *expr, WalkerContext *context); -void finish_least_greatest(WrapperNode *wrap, WalkerContext *context); -void refresh_walker_context_ranges(WalkerContext *context); -void clear_walker_context(WalkerContext *context); #endif /* PATHMAN_H */ diff --git a/src/pg_pathman.c b/src/pg_pathman.c index a8c1ebc3ee..7b3d03b449 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -19,23 +19,28 @@ #include "postgres.h" #include "access/heapam.h" +#include "access/htup_details.h" #include "access/transam.h" #include "access/xact.h" +#include "catalog/pg_cast.h" #include "catalog/pg_type.h" #include "executor/spi.h" +#include "foreign/fdwapi.h" +#include "fmgr.h" #include "miscadmin.h" #include "optimizer/clauses.h" #include "optimizer/prep.h" #include "optimizer/restrictinfo.h" #include "optimizer/cost.h" -#include "utils/rel.h" +#include "utils/builtins.h" +#include "utils/datum.h" #include "utils/guc.h" #include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/syscache.h" #include "utils/selfuncs.h" #include "utils/snapmgr.h" -#include "utils/memutils.h" -#include "foreign/fdwapi.h" -#include "fmgr.h" PG_MODULE_MAGIC; @@ -54,6 +59,12 @@ void _PG_init(void); static Node *wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysTrue); static bool disable_inheritance_subselect_walker(Node *node, void *context); +/* "Partition creation"-related functions */ +static bool spawn_partitions(const PartRelationInfo *prel, FmgrInfo *cmp_proc, + Datum interval_binary, Oid interval_type, + Datum leading_bound, Datum value, bool forward, + Oid *last_partition); + /* Expression tree handlers */ static WrapperNode *handle_const(const Const *c, WalkerContext *context); static void handle_binary_opexpr(WalkerContext *context, WrapperNode *result, const Node *varnode, const Const *c); @@ -83,16 +94,16 @@ static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo * flinfo is a pointer to an instance of FmgrInfo * arg1, arg2 are Datum instances */ -#define check_lt(flinfo, arg1, arg2) \ - ((int) FunctionCall2(cmp_func, arg1, arg2) < 0) -#define check_le(flinfo, arg1, arg2) \ - ((int) FunctionCall2(cmp_func, arg1, arg2) <= 0) -#define check_eq(flinfo, arg1, arg2) \ - ((int) FunctionCall2(cmp_func, arg1, arg2) == 0) -#define check_ge(flinfo, arg1, arg2) \ - ((int) FunctionCall2(cmp_func, arg1, arg2) >= 0) -#define check_gt(flinfo, arg1, arg2) \ - ((int) FunctionCall2(cmp_func, arg1, arg2) > 0) +#define check_lt(finfo, arg1, arg2) \ + ((int) FunctionCall2(finfo, arg1, arg2) < 0) +#define check_le(finfo, arg1, arg2) \ + ((int) FunctionCall2(finfo, arg1, arg2) <= 0) +#define check_eq(finfo, arg1, arg2) \ + ((int) FunctionCall2(finfo, arg1, arg2) == 0) +#define check_ge(finfo, arg1, arg2) \ + ((int) FunctionCall2(finfo, arg1, arg2) >= 0) +#define check_gt(finfo, arg1, arg2) \ + ((int) FunctionCall2(finfo, arg1, arg2) > 0) /* We can transform Param into Const provided that 'econtext' is available */ #define IsConstValue(wcxt, node) \ @@ -178,7 +189,7 @@ disable_inheritance(Query *parse) case RTE_RELATION: if (rte->inh) { - /* Look up this relation in pathman relations */ + /* Look up this relation in pathman local cache */ prel = get_pathman_relation_info(rte->relid, NULL); if (prel) { @@ -309,10 +320,8 @@ handle_modification_query(Query *parse) return; /* Parse syntax tree and extract partition ranges */ - InitWalkerContext(&context, prel, NULL, CurrentMemoryContext, false); + InitWalkerContext(&context, prel, NULL, false); wrap = walk_expr_tree(expr, &context); - finish_least_greatest(wrap, &context); - clear_walker_context(&context); ranges = irange_list_intersect(ranges, wrap->rangeset); @@ -342,7 +351,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, foreach(l, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); - int childRTindex, + Index childRTindex, parentRTindex = rti; RelOptInfo *childrel; @@ -626,46 +635,6 @@ wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysTrue) return copyObject(wrap->orig); } -/* - * Refresh cached RangeEntry array within WalkerContext - * - * This is essential when we add new partitions - * while executing INSERT query on partitioned table. - */ -void -refresh_walker_context_ranges(WalkerContext *context) -{ - MemoryContext old_mcxt; - Oid partitioned_table = context->prel->key; - - context->prel = get_pathman_relation_info(partitioned_table, NULL); - - /* Clear old cached data */ - clear_walker_context(context); - - /* Switch to long-living context which should store data */ - old_mcxt = MemoryContextSwitchTo(context->persistent_mcxt); - - context->ranges = PrelGetRangesArray(context->prel, true); - context->nranges = PrelChildrenCount(context->prel); - - /* Switch back */ - MemoryContextSwitchTo(old_mcxt); -} - -/* - * Free all temporary data cached by WalkerContext - */ -void -clear_walker_context(WalkerContext *context) -{ - if (context->ranges) - { - context->ranges = NULL; - context->nranges = 0; - } -} - /* * Recursive function to walk through conditions tree */ @@ -709,41 +678,111 @@ walk_expr_tree(Expr *expr, WalkerContext *context) } } -void -finish_least_greatest(WrapperNode *wrap, WalkerContext *context) +/* + * Append\prepend partitions if there's no partition to store 'value'. + * + * Used by create_partitions_internal(). + * + * NB: 'value' type is not needed since we've already taken + * it into account while searching for the 'cmp_proc'. + */ +static bool +spawn_partitions(const PartRelationInfo *prel, + FmgrInfo *cmp_proc, /* cmp(value, leading_bound) */ + Datum interval_binary, /* interval in binary form */ + Oid interval_type, /* INTERVALOID or prel->atttype */ + Datum leading_bound, /* current global min\max */ + Datum value, /* type isn't needed */ + bool forward, + Oid *last_partition) /* append\prepend */ { - if (context->hasLeast && context->hasGreatest) +/* Cache "+"(leading_bound, interval) or "-"(leading_bound, interval) operator */ +#define CacheOperator(finfo, opname, arg1, arg2, is_cached) \ + do { \ + if (!is_cached) \ + { \ + fmgr_info(get_binary_operator_oid((opname), (arg1), (arg2)), \ + (finfo)); \ + is_cached = true; \ + } \ + } while (0) + +/* Use "<" for prepend & ">=" for append */ +#define do_compare(compar, a, b, fwd) \ + ( \ + (fwd) ? \ + check_ge((compar), (a), (b)) : \ + check_lt((compar), (a), (b)) \ + ) + + FmgrInfo interval_move_bound; /* move upper\lower boundary */ + bool interval_move_bound_cached = false; + bool done = false; + + Datum cur_part_leading = leading_bound; + + char *query; + + /* Create querty statement */ + query = psprintf("SELECT part::regclass " + "FROM %s.create_single_range_partition($1, $2, $3) AS part", + get_namespace_name(get_pathman_schema())); + + /* Execute comparison function cmp(value, cur_part_leading) */ + while ((done = do_compare(cmp_proc, value, cur_part_leading, forward))) { - switch (context->prel->atttype) + char *nulls = NULL; /* no params are NULL */ + Oid types[3] = { REGCLASSOID, prel->atttype, prel->atttype }; + Datum values[3]; + int ret; + + /* Assign the 'following' boundary to current 'leading' value */ + Datum cur_part_following = cur_part_leading; + + CacheOperator(&interval_move_bound, (forward ? "+" : "-"), + prel->atttype, interval_type, interval_move_bound_cached); + + /* Move leading bound by interval (leading +\- INTERVAL) */ + cur_part_leading = FunctionCall2(&interval_move_bound, + cur_part_leading, + interval_binary); + + /* Fill in 'values' with parent's Oid and correct boundaries... */ + values[0] = prel->key; /* partitioned table's Oid */ + values[1] = forward ? cur_part_following : cur_part_leading; /* value #1 */ + values[2] = forward ? cur_part_leading : cur_part_following; /* value #2 */ + + /* ...and create partition */ + ret = SPI_execute_with_args(query, 3, types, values, nulls, false, 0); + if (ret != SPI_OK_SELECT) + elog(ERROR, "Could not create partition"); + + /* Set 'last_partition' if necessary */ + if (last_partition) { - case INT4OID: - { - uint32 least = DatumGetInt32(context->least), - greatest = DatumGetInt32(context->greatest); - List *rangeset = NIL; + HeapTuple htup = SPI_tuptable->vals[0]; + Datum partid; + bool isnull; - if (greatest - least + 1 < PrelChildrenCount(context->prel)) - { - uint32 value, - hash; - - for (value = least; value <= greatest; value++) - { - hash = make_hash(value, PrelChildrenCount(context->prel)); - rangeset = irange_list_union(rangeset, - list_make1_irange(make_irange(hash, hash, true))); - } - wrap->rangeset = irange_list_intersect(wrap->rangeset, - rangeset); - } - } - break; - default: - elog(ERROR, "Invalid datatype: %u", context->prel->atttype); + Assert(SPI_processed == 1); + Assert(SPI_tuptable->tupdesc->natts == 1); + partid = SPI_getbinval(htup, SPI_tuptable->tupdesc, 1, &isnull); + + *last_partition = DatumGetObjectId(partid); } + +#ifdef USE_ASSERT_CHECKING + elog(DEBUG2, "%s partition with following='%s' & leading='%s' [%u]", + (forward ? "Appending" : "Prepending"), + DebugPrintDatum(cur_part_following, prel->atttype), + DebugPrintDatum(cur_part_leading, prel->atttype), + MyProcPid); +#endif } - context->hasLeast = false; - context->hasGreatest = false; + + pfree(query); + + return done; } /* @@ -754,58 +793,98 @@ finish_least_greatest(WrapperNode *wrap, WalkerContext *context) Oid create_partitions_internal(Oid relid, Datum value, Oid value_type) { - int ret; - char *sql; - PartRelationInfo *prel; - FmgrInfo cmp_func; - MemoryContext old_mcxt = CurrentMemoryContext; - Oid partid = InvalidOid; /* default value */ + MemoryContext old_mcxt = CurrentMemoryContext; + Oid partid = InvalidOid; /* default value */ - if ((prel = get_pathman_relation_info(relid, NULL)) == NULL) + PG_TRY(); { - elog(LOG, "Cannot fetch PartRelationInfo for relation %u [%u]", - relid, MyProcPid); + const PartRelationInfo *prel; + Datum values[Natts_pathman_config]; + bool isnull[Natts_pathman_config]; - return InvalidOid; - } + /* Get both PartRelationInfo & PATHMAN_CONFIG contents for this relation */ + if ((prel = get_pathman_relation_info(relid, NULL)) != NULL && + pathman_config_contains_relation(relid, values, isnull, NULL)) + { + Datum min_rvalue, + max_rvalue; - if ((ret = SPI_connect()) < 0) - { - elog(LOG, "create_partitions_internal(): SPI_connect returned %d", ret); + Oid interval_type = InvalidOid; + Datum interval_binary, /* assigned 'width' of a single partition */ + interval_text; + const char *interval_cstring; - return InvalidOid; - } + FmgrInfo interval_type_cmp; - /* Comparison function */ - fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); + if (prel->parttype != PT_RANGE) + elog(ERROR, "Relation \"%s\" is not partitioned by RANGE", + get_rel_name_or_relid(relid)); - /* Perform PL procedure */ - sql = psprintf("SELECT %s.append_partitions_on_demand_internal($1, $2)", - get_namespace_name(get_pathman_schema())); + /* Fill the FmgrInfo struct with a cmp(value, part_attribute) function */ + fill_type_cmp_fmgr_info(&interval_type_cmp, value_type, prel->atttype); - PG_TRY(); - { - Oid oids[] = { OIDOID, value_type }; - Datum vals[] = { ObjectIdGetDatum(relid), value }; - bool nulls[] = { false, false }; - bool isnull; - - /* TODO: maybe this could be rewritten with FunctionCall */ - ret = SPI_execute_with_args(sql, 2, oids, vals, nulls, false, 0); - if (ret == SPI_OK_SELECT) - { - TupleDesc tupdesc = SPI_tuptable->tupdesc; - HeapTuple tuple = SPI_tuptable->vals[0]; + /* Convert interval from TEXT to CSTRING */ + interval_text = values[Anum_pathman_config_range_interval - 1]; + interval_cstring = TextDatumGetCString(interval_text); - Assert(SPI_processed == 1); + /* Read max & min range values from PartRelationInfo */ + min_rvalue = prel->ranges[0].min; + max_rvalue = prel->ranges[PrelChildrenCount(prel) - 1].max; - partid = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 1, &isnull)); + /* If this is a *date type*, cast 'range_interval' to INTERVAL */ + if (is_date_type_internal(value_type)) + { + int32 interval_typmod = PATHMAN_CONFIG_interval_typmod; + + /* Convert interval from CSTRING to internal form */ + interval_binary = DirectFunctionCall3(interval_in, + CStringGetDatum(interval_cstring), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(interval_typmod)); + interval_type = INTERVALOID; + } + /* Otherwise cast it to the partitioned column's type */ + else + { + HeapTuple htup; + Oid typein_proc = InvalidOid; + + htup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(prel->atttype)); + if (HeapTupleIsValid(htup)) + { + typein_proc = ((Form_pg_type) GETSTRUCT(htup))->typinput; + ReleaseSysCache(htup); + } + else + elog(ERROR, "Cannot find input function for type %u", + prel->atttype); + + /* Convert interval from CSTRING to 'prel->atttype' */ + interval_binary = OidFunctionCall1(typein_proc, value); + interval_type = prel->atttype; + } + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "Could not connect using SPI"); + + /* while (value >= MAX) ... */ + spawn_partitions(prel, &interval_type_cmp, interval_binary, + interval_type, max_rvalue, value, true, &partid); + + /* while (value < MIN) ... */ + spawn_partitions(prel, &interval_type_cmp, interval_binary, + interval_type, min_rvalue, value, false, &partid); + + SPI_finish(); /* close SPI connection */ } + else + elog(ERROR, "Relation %u is not partitioned by pg_pathman", relid); } PG_CATCH(); { ErrorData *edata; + /* Switch to the original context & copy edata */ MemoryContextSwitchTo(old_mcxt); edata = CopyErrorData(); FlushErrorState(); @@ -814,11 +893,11 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) edata->message, MyProcPid); FreeErrorData(edata); + + SPI_finish(); /* no problem if not connected */ } PG_END_TRY(); - SPI_finish(); - return partid; } @@ -831,6 +910,7 @@ Oid create_partitions(Oid relid, Datum value, Oid value_type) { TransactionId rel_xmin; + Oid last_partition = InvalidOid; /* Check that table is partitioned and fetch xmin */ if (pathman_config_contains_relation(relid, NULL, NULL, &rel_xmin)) @@ -839,19 +919,24 @@ create_partitions(Oid relid, Datum value, Oid value_type) if (TransactionIdPrecedes(rel_xmin, GetCurrentTransactionId())) { elog(DEBUG2, "create_partitions(): chose BGW [%u]", MyProcPid); - return create_partitions_bg_worker(relid, value, value_type); + last_partition = create_partitions_bg_worker(relid, value, value_type); } /* Else it'd better for the current backend to create partitions */ else { elog(DEBUG2, "create_partitions(): chose backend [%u]", MyProcPid); - return create_partitions_internal(relid, value, value_type); + last_partition = create_partitions_internal(relid, value, value_type); } } else elog(ERROR, "Relation %u is not partitioned by pg_pathman", relid); - return InvalidOid; /* keep compiler happy */ + /* Check that 'last_partition' is valid */ + if (last_partition == InvalidOid) + elog(ERROR, "Could not create new partitions for relation \"%s\"", + get_rel_name_or_relid(relid)); + + return last_partition; } /* @@ -862,7 +947,7 @@ void select_range_partitions(const Datum value, FmgrInfo *cmp_func, const RangeEntry *ranges, - const size_t nranges, + const int nranges, const int strategy, WrapperNode *result) { @@ -1059,7 +1144,7 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, if (strategy == BTEqualStrategyNumber) { Datum value = OidFunctionCall1(prel->hash_proc, c->constvalue); - uint32 hash = make_hash(DatumGetUInt32(value), + uint32 hash = make_hash(DatumGetInt32(value), PrelChildrenCount(prel)); result->rangeset = list_make1_irange(make_irange(hash, hash, true)); @@ -1069,14 +1154,10 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, case PT_RANGE: { - /* Refresh 'ranges' cache if necessary */ - if (!context->ranges) - refresh_walker_context_ranges(context); - select_range_partitions(c->constvalue, &cmp_func, - context->ranges, - context->nranges, + context->prel->ranges, + PrelChildrenCount(context->prel), strategy, result); return; @@ -1145,8 +1226,8 @@ search_range_partition_eq(const Datum value, RangeEntry *out_re) /* returned RangeEntry */ { RangeEntry *ranges; - size_t nranges; - WrapperNode result; + int nranges; + WrapperNode result; ranges = PrelGetRangesArray(prel, true); nranges = PrelChildrenCount(prel); @@ -1223,7 +1304,7 @@ handle_const(const Const *c, WalkerContext *context) case PT_HASH: { Datum value = OidFunctionCall1(prel->hash_proc, c->constvalue); - uint32 hash = make_hash(DatumGetUInt32(value), + uint32 hash = make_hash(DatumGetInt32(value), PrelChildrenCount(prel)); result->rangeset = list_make1_irange(make_irange(hash, hash, true)); } @@ -1235,14 +1316,10 @@ handle_const(const Const *c, WalkerContext *context) tce = lookup_type_cache(c->consttype, TYPECACHE_CMP_PROC_FINFO); - /* Refresh 'ranges' cache if necessary */ - if (!context->ranges) - refresh_walker_context_ranges(context); - select_range_partitions(c->constvalue, &tce->cmp_proc_finfo, - context->ranges, - context->nranges, + context->prel->ranges, + PrelChildrenCount(context->prel), BTEqualStrategyNumber, result); } @@ -1466,7 +1543,7 @@ handle_arrexpr(const ScalarArrayOpExpr *expr, WalkerContext *context) { /* Invoke base hash function for value type */ value = OidFunctionCall1(prel->hash_proc, elem_values[i]); - hash = make_hash(DatumGetUInt32(value), PrelChildrenCount(prel)); + hash = make_hash(DatumGetInt32(value), PrelChildrenCount(prel)); result->rangeset = irange_list_union(result->rangeset, list_make1_irange(make_irange(hash, hash, true))); } @@ -1580,7 +1657,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte, PathKey *pathkeyAsc, PathKey *pathkeyDesc) { - int parentRTindex = rti; + Index parentRTindex = rti; List *live_childrels = NIL; List *subpaths = NIL; bool subpaths_valid = true; @@ -1597,7 +1674,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, foreach(l, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); - int childRTindex; + Index childRTindex; RangeTblEntry *childRTE; RelOptInfo *childrel; ListCell *lcp; diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 568201bd2c..af888684a5 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -12,10 +12,12 @@ #include "init.h" #include "utils.h" +#include "access/htup_details.h" #include "access/nbtree.h" #include "access/xact.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #include "utils/typcache.h" #include "utils/array.h" #include "utils/memutils.h" @@ -26,6 +28,7 @@ PG_FUNCTION_INFO_V1( on_partitions_created ); PG_FUNCTION_INFO_V1( on_partitions_updated ); PG_FUNCTION_INFO_V1( on_partitions_removed ); PG_FUNCTION_INFO_V1( find_or_create_range_partition); +PG_FUNCTION_INFO_V1( get_range_condition ); PG_FUNCTION_INFO_V1( get_range_by_idx ); PG_FUNCTION_INFO_V1( get_partition_range ); PG_FUNCTION_INFO_V1( acquire_partitions_lock ); @@ -37,6 +40,10 @@ PG_FUNCTION_INFO_V1( get_type_hash_func ); PG_FUNCTION_INFO_V1( get_hash ); PG_FUNCTION_INFO_V1( build_check_constraint_name_attnum ); PG_FUNCTION_INFO_V1( build_check_constraint_name_attname ); +PG_FUNCTION_INFO_V1( is_date_type ); +PG_FUNCTION_INFO_V1( get_attribute_type_name ); +PG_FUNCTION_INFO_V1( is_attribute_nullable ); +PG_FUNCTION_INFO_V1( debug_capture ); static void on_partitions_created_internal(Oid partitioned_table, bool add_callbacks); @@ -104,7 +111,7 @@ on_partitions_removed(PG_FUNCTION_ARGS) Datum find_or_create_range_partition(PG_FUNCTION_ARGS) { - Oid relid = PG_GETARG_OID(0); + Oid parent_oid = PG_GETARG_OID(0); Datum value = PG_GETARG_DATUM(1); Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); PartRelationInfo *prel; @@ -112,7 +119,7 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) RangeEntry found_rentry; search_rangerel_result search_state; - prel = get_pathman_relation_info(relid, NULL); + prel = get_pathman_relation_info(parent_oid, NULL); if (!prel) PG_RETURN_NULL(); @@ -152,7 +159,7 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) PG_RETURN_OID(found_rentry.child_oid); } - child_oid = create_partitions(relid, value, value_type); + child_oid = create_partitions(parent_oid, value, value_type); LWLockRelease(pmstate->load_config_lock); LWLockRelease(pmstate->edit_partitions_lock); @@ -174,7 +181,7 @@ get_partition_range(PG_FUNCTION_ARGS) Oid parent_oid = PG_GETARG_OID(0); Oid child_oid = PG_GETARG_OID(1); const int nelems = 2; - int i; + uint32 i; bool found = false; Datum *elems; PartRelationInfo *prel; @@ -214,13 +221,53 @@ get_partition_range(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } +/* + * Formats range condition for a CHECK CONSTRAINT. + */ +Datum +get_range_condition(PG_FUNCTION_ARGS) +{ + text *attname = PG_GETARG_TEXT_P(0); + + Datum min_bound = PG_GETARG_DATUM(1), + max_bound = PG_GETARG_DATUM(2); + + Oid min_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 1), + max_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + + char *subst_str; /* substitution string */ + char *result; + + /* This is not going to trigger (not now, at least), just for the safety */ + if (min_bound_type != max_bound_type) + elog(ERROR, "Cannot build range condition: " + "boundaries should be of the same type"); + + /* Check if we need single quotes */ + /* TODO: check for primitive types instead, that would be better */ + if (is_date_type_internal(min_bound_type) || + is_string_type_internal(min_bound_type)) + { + subst_str = "%1$s >= '%2$s' AND %1$s < '%3$s'"; + } + else + subst_str = "%1$s >= %2$s AND %1$s < %3$s"; + + /* Create range condition CSTRING */ + result = psprintf(subst_str, + text_to_cstring(attname), + datum_to_cstring(min_bound, min_bound_type), + datum_to_cstring(max_bound, max_bound_type)); + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} /* * Returns N-th range (in form of array) * * First argument is the parent relid. - * Second argument is the index of the range (if it is negative then the last - * range will be returned). + * Second argument is the index of the range (if it is + * negative then the last range will be returned). */ Datum get_range_by_idx(PG_FUNCTION_ARGS) @@ -231,27 +278,32 @@ get_range_by_idx(PG_FUNCTION_ARGS) RangeEntry *ranges; RangeEntry re; Datum *elems; - TypeCacheEntry *tce; prel = get_pathman_relation_info(parent_oid, NULL); + if (!prel) + elog(ERROR, "Cannot get partitioning cache entry for relation %u", parent_oid); - if (!prel || idx >= PrelChildrenCount(prel)) - PG_RETURN_NULL(); + if (((uint32) abs(idx)) >= PrelChildrenCount(prel)) + elog(ERROR, "Partition #%d does not exist (max is #%u)", + idx, PrelChildrenCount(prel) - 1); - tce = lookup_type_cache(prel->atttype, 0); ranges = PrelGetRangesArray(prel, true); if (idx >= 0) re = ranges[idx]; - else + else if(idx == -1) re = ranges[PrelChildrenCount(prel) - 1]; + else + elog(ERROR, "Negative indices other than -1 (last partition) are not allowed"); elems = palloc(2 * sizeof(Datum)); elems[0] = re.min; elems[1] = re.max; - PG_RETURN_ARRAYTYPE_P( - construct_array(elems, 2, prel->atttype, - tce->typlen, tce->typbyval, tce->typalign)); + PG_RETURN_ARRAYTYPE_P(construct_array(elems, 2, + prel->atttype, + prel->attlen, + prel->attbyval, + prel->attalign)); } /* @@ -301,7 +353,7 @@ get_max_range_value(PG_FUNCTION_ARGS) Datum check_overlap(PG_FUNCTION_ARGS) { - Oid partitioned_table = PG_GETARG_OID(0); + Oid parent_oid = PG_GETARG_OID(0); Datum p1 = PG_GETARG_DATUM(1), p2 = PG_GETARG_DATUM(2); @@ -314,9 +366,9 @@ check_overlap(PG_FUNCTION_ARGS) PartRelationInfo *prel; RangeEntry *ranges; - int i; + uint32 i; - prel = get_pathman_relation_info(partitioned_table, NULL); + prel = get_pathman_relation_info(parent_oid, NULL); if (!prel || prel->parttype != PT_RANGE) PG_RETURN_NULL(); @@ -385,7 +437,7 @@ build_check_constraint_name_attnum(PG_FUNCTION_ARGS) AttrNumber attnum = PG_GETARG_INT16(1); const char *result; - if (!get_rel_name(relid)) + if (get_rel_type_id(relid) == InvalidOid) elog(ERROR, "Invalid relation %u", relid); /* We explicitly do not support system attributes */ @@ -406,15 +458,82 @@ build_check_constraint_name_attname(PG_FUNCTION_ARGS) AttrNumber attnum = get_attnum(relid, text_to_cstring(attname)); const char *result; - if (!get_rel_name(relid)) + if (get_rel_type_id(relid) == InvalidOid) elog(ERROR, "Invalid relation %u", relid); if (attnum == InvalidAttrNumber) - elog(ERROR, "Relation '%s' has no column '%s'", - get_rel_name(relid), + elog(ERROR, "Relation \"%s\" has no column '%s'", + get_rel_name_or_relid(relid), text_to_cstring(attname)); result = build_check_constraint_name_internal(relid, attnum); PG_RETURN_TEXT_P(cstring_to_text(result)); } + +Datum +is_date_type(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(is_date_type_internal(PG_GETARG_OID(0))); +} + +Datum +get_attribute_type_name(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + text *attname = PG_GETARG_TEXT_P(1); + char *result; + HeapTuple tp; + + tp = SearchSysCacheAttName(relid, text_to_cstring(attname)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + result = format_type_be(att_tup->atttypid); + ReleaseSysCache(tp); + + PG_RETURN_TEXT_P(cstring_to_text(result)); + } + else + elog(ERROR, "Cannot find type name for attribute \"%s\" " + "of relation \"%s\"", + text_to_cstring(attname), get_rel_name_or_relid(relid)); + + PG_RETURN_NULL(); /* keep compiler happy */ +} + +Datum +is_attribute_nullable(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + text *attname = PG_GETARG_TEXT_P(1); + bool result = true; + HeapTuple tp; + + tp = SearchSysCacheAttName(relid, text_to_cstring(attname)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + result = !att_tup->attnotnull; + ReleaseSysCache(tp); + } + else + elog(ERROR, "Cannot find type name for attribute \"%s\" " + "of relation \"%s\"", + text_to_cstring(attname), get_rel_name_or_relid(relid)); + + PG_RETURN_BOOL(result); /* keep compiler happy */ +} + +/* + * DEBUG: set breakpoint here. + */ +Datum +debug_capture(PG_FUNCTION_ARGS) +{ + /* Write something (doesn't really matter) */ + elog(WARNING, "debug_capture"); + + PG_RETURN_VOID(); +} + diff --git a/src/relation_info.c b/src/relation_info.c index 200c50c041..3ea02b73ea 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -19,16 +19,37 @@ #include "miscadmin.h" #include "storage/lmgr.h" #include "utils/builtins.h" -#include "utils/hsearch.h" #include "utils/fmgroids.h" +#include "utils/hsearch.h" #include "utils/lsyscache.h" +#include "utils/memutils.h" #include "utils/snapmgr.h" +static List *delayed_invalidation_parent_rels = NIL; +static List *delayed_invalidation_vague_rels = NIL; + +/* Add unique Oid to list, allocate in TopMemoryContext */ +#define list_add_unique(list, oid) \ + do { \ + MemoryContext old_mcxt = MemoryContextSwitchTo(TopMemoryContext); \ + list = list_append_unique_oid(list, ObjectIdGetDatum(oid)); \ + MemoryContextSwitchTo(old_mcxt); \ + } while (0) + +#define free_invalidation_list(list) \ + do { \ + list_free(list); \ + list = NIL; \ + } while (0) + + static Oid try_syscache_parent_search(Oid partition, PartParentSearch *status); static Oid get_parent_of_partition_internal(Oid partition, PartParentSearch *status, HASHACTION action); +static bool perform_parent_refresh(Oid parent); + #define FreeChildrenArray(prel) \ do { \ @@ -103,13 +124,16 @@ refresh_pathman_relation_info(Oid relid, /* Initialize PartRelationInfo using syscache & typcache */ prel->attnum = get_attnum(relid, part_column_name); prel->atttype = get_atttype(relid, prel->attnum); - prel->atttypmod = get_atttypmod(relid, prel->attnum); - prel->attbyval = get_typbyval(prel->atttype); + prel->atttypmod = get_atttypmod(relid, prel->attnum); /* Fetch HASH & CMP fuctions for atttype */ typcache = lookup_type_cache(prel->atttype, TYPECACHE_CMP_PROC | TYPECACHE_HASH_PROC); + prel->attbyval = typcache->typbyval; + prel->attlen = typcache->typlen; + prel->attalign = typcache->typalign; + prel->cmp_proc = typcache->cmp_proc; prel->hash_proc = typcache->hash_proc; @@ -142,26 +166,28 @@ refresh_pathman_relation_info(Oid relid, return prel; } -/* Invalidate PartRelationInfo cache entry. */ +/* Invalidate PartRelationInfo cache entry. Create new entry if 'found' is NULL */ void invalidate_pathman_relation_info(Oid relid, bool *found) { - bool found_prel; PartRelationInfo *prel = hash_search(partitioned_rels, (const void *) &relid, - HASH_ENTER, &found_prel); + (found ? HASH_FIND : HASH_ENTER), + found); - /* We should create entry if it doesn't exist */ - if (!found_prel) + if(found && PrelIsValid(prel)) + { + FreeChildrenArray(prel); + FreeRangesArray(prel); + } + else { prel->children = NULL; prel->ranges = NULL; } - prel->valid = false; /* now cache entry is invalid */ - - /* Set 'found' if needed */ - if (found) *found = found_prel; + if (prel) + prel->valid = false; /* now cache entry is invalid */ elog(DEBUG2, "Invalidating record for relation %u in pg_pathman's cache [%u]", @@ -227,6 +253,92 @@ remove_pathman_relation_info(Oid relid) } +/* + * Functions for delayed invalidation. + */ + +/* Add new delayed invalidation job for a [ex-]parent relation */ +void +delay_invalidation_parent_rel(Oid parent) +{ + list_add_unique(delayed_invalidation_parent_rels, parent); +} + +/* Add new delayed invalidation job for a vague relation */ +void +delay_invalidation_vague_rel(Oid vague_rel) +{ + list_add_unique(delayed_invalidation_vague_rels, vague_rel); +} + +/* Finish all pending invalidation jobs if possible */ +void +finish_delayed_invalidation(void) +{ + /* Exit early if there's nothing to do */ + if (delayed_invalidation_parent_rels == NIL && + delayed_invalidation_vague_rels == NIL) + { + return; + } + + /* Check that current state is transactional */ + if (IsTransactionState()) + { + ListCell *lc; + + //elog(WARNING, "invalidating..."); + + /* Process relations that are (or were) definitely partitioned */ + foreach (lc, delayed_invalidation_parent_rels) + { + Oid parent = lfirst_oid(lc); + + if (!pathman_config_contains_relation(parent, NULL, NULL, NULL)) + remove_pathman_relation_info(parent); + else + invalidate_pathman_relation_info(parent, NULL); + } + + /* Process all other vague cases */ + foreach (lc, delayed_invalidation_vague_rels) + { + Oid vague_rel = lfirst_oid(lc); + + /* It might be a partitioned table or a partition */ + if (!perform_parent_refresh(vague_rel)) + { + PartParentSearch search; + Oid parent; + + parent = get_parent_of_partition(vague_rel, &search); + + switch (search) + { + case PPS_ENTRY_PART_PARENT: + perform_parent_refresh(parent); + break; + + case PPS_ENTRY_PARENT: + remove_pathman_relation_info(parent); + break; + + case PPS_NOT_SURE: + elog(ERROR, "This should never happen"); + break; + + default: + break; + } + } + } + + free_invalidation_list(delayed_invalidation_parent_rels); + free_invalidation_list(delayed_invalidation_vague_rels); + } +} + + /* * cache\forget\get PartParentInfo functions. */ @@ -304,7 +416,7 @@ get_parent_of_partition_internal(Oid partition, if (ppar) { - if (status) *status = PPS_ENTRY_FOUND; + if (status) *status = PPS_ENTRY_PART_PARENT; parent = ppar->parent_rel; } /* Try fetching parent from syscache if 'status' is provided */ @@ -354,13 +466,14 @@ try_syscache_parent_search(Oid partition, PartParentSearch *status) { parent = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent; + if (status) *status = PPS_ENTRY_PARENT; + /* Check that PATHMAN_CONFIG contains this table */ if (pathman_config_contains_relation(parent, NULL, NULL, NULL)) { /* We've found the entry, update status */ - if (status) *status = PPS_ENTRY_FOUND; + if (status) *status = PPS_ENTRY_PART_PARENT; } - else parent = InvalidOid; /* invalidate 'parent' */ break; /* there should be no more rows */ } @@ -373,6 +486,30 @@ try_syscache_parent_search(Oid partition, PartParentSearch *status) } } +static bool +perform_parent_refresh(Oid parent) +{ + Datum values[Natts_pathman_config]; + bool isnull[Natts_pathman_config]; + + if (pathman_config_contains_relation(parent, values, isnull, NULL)) + { + text *attname; + PartType parttype; + + parttype = DatumGetPartType(values[Anum_pathman_config_parttype - 1]); + attname = DatumGetTextP(values[Anum_pathman_config_attname - 1]); + + if (!refresh_pathman_relation_info(parent, parttype, + text_to_cstring(attname))) + return false; + } + else + return false; + + return true; +} + /* * Safe PartType wrapper. */ diff --git a/src/relation_info.h b/src/relation_info.h index be23764a29..ed140a92ba 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -14,6 +14,7 @@ #include "dsm_array.h" #include "postgres.h" +#include "access/attnum.h" #include "port/atomics.h" @@ -51,10 +52,12 @@ typedef struct RangeEntry *ranges; /* per-partition range entry or NULL */ PartType parttype; /* partitioning type (HASH | RANGE) */ - Index attnum; /* partitioned column's index */ + AttrNumber attnum; /* partitioned column's index */ Oid atttype; /* partitioned column's type */ - int32 atttypmod; /* partitioned column's type modifier */ + int32 atttypmod; /* partitioned column type modifier */ bool attbyval; /* is partitioned column stored by value? */ + int16 attlen; /* length of the partitioned column's type */ + int attalign; Oid cmp_proc, /* comparison fuction for 'atttype' */ hash_proc; /* hash function for 'atttype' */ @@ -92,7 +95,8 @@ typedef struct typedef enum { PPS_ENTRY_NOT_FOUND = 0, - PPS_ENTRY_FOUND, /* entry was found in pathman's or system cache */ + PPS_ENTRY_PARENT, /* entry was found, but pg_pathman doesn't know it */ + PPS_ENTRY_PART_PARENT, /* entry is parent and is known by pg_pathman */ PPS_NOT_SURE /* can't determine (not transactional state) */ } PartParentSearch; @@ -103,7 +107,7 @@ typedef enum #define PrelChildrenCount(prel) ( (prel)->children_count ) -#define PrelIsValid(prel) ( (prel)->valid ) +#define PrelIsValid(prel) ( (prel) && (prel)->valid ) PartRelationInfo *refresh_pathman_relation_info(Oid relid, @@ -113,6 +117,10 @@ void invalidate_pathman_relation_info(Oid relid, bool *found); void remove_pathman_relation_info(Oid relid); PartRelationInfo *get_pathman_relation_info(Oid relid, bool *found); +void delay_invalidation_parent_rel(Oid parent); +void delay_invalidation_vague_rel(Oid vague_rel); +void finish_delayed_invalidation(void); + void cache_parent_of_partition(Oid partition, Oid parent); Oid forget_parent_of_partition(Oid partition, PartParentSearch *status); Oid get_parent_of_partition(Oid partition, PartParentSearch *status); diff --git a/src/runtimeappend.h b/src/runtimeappend.h index f34c94aa2f..c5a6aa6de4 100644 --- a/src/runtimeappend.h +++ b/src/runtimeappend.h @@ -33,7 +33,6 @@ typedef struct { CustomScanState css; Oid relid; /* relid of the partitioned table */ - PartRelationInfo *prel; /* Restrictions to be checked during ReScan and Exec */ List *custom_exprs; diff --git a/src/utils.c b/src/utils.c index 0217ef3790..7e27cac1d7 100644 --- a/src/utils.c +++ b/src/utils.c @@ -10,25 +10,28 @@ #include "utils.h" +#include "access/htup_details.h" #include "access/nbtree.h" #include "access/sysattr.h" #include "access/xact.h" -#include "access/htup_details.h" #include "catalog/heap.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" #include "catalog/pg_extension.h" #include "commands/extension.h" #include "executor/spi.h" +#include "fmgr.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "nodes/makefuncs.h" #include "optimizer/var.h" #include "optimizer/restrictinfo.h" +#include "parser/parse_oper.h" #include "rewrite/rewriteManip.h" #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #include "utils/fmgroids.h" @@ -149,11 +152,12 @@ lock_rows_visitor(Plan *plan, void *context) foreach (mark_lc, tableoids) { TargetEntry *te = (TargetEntry *) lfirst(mark_lc); - Oid cur_oid; + const char *cur_oid_str = &(te->resname[TABLEOID_STR_BASE_LEN]); + Datum cur_oid_datum; - cur_oid = str_to_oid(&(te->resname[TABLEOID_STR_BASE_LEN])); + cur_oid_datum = DirectFunctionCall1(oidin, CStringGetDatum(cur_oid_str)); - if (cur_oid == parent_oid) + if (DatumGetObjectId(cur_oid_datum) == parent_oid) { char resname[64]; @@ -506,17 +510,6 @@ change_varnos_in_restrinct_info(RestrictInfo *rinfo, change_varno_context *conte } } -/* - * Convert number-as-string to Oid. - */ -Oid -str_to_oid(const char *cstr) -{ - Datum result = DirectFunctionCall1(oidin, CStringGetDatum(cstr)); - - return DatumGetObjectId(result); -} - /* * Basic plan tree walker * @@ -619,6 +612,7 @@ rowmark_add_tableoids(Query *parse) TargetEntry *tle; char resname[64]; + /* Check that table is partitioned */ if (!get_pathman_relation_info(parent, NULL)) continue; @@ -695,3 +689,81 @@ get_pathman_schema(void) return result; } + +/* + * Check if this is a "date"-related type. + */ +bool +is_date_type_internal(Oid typid) +{ + return typid == TIMESTAMPOID || + typid == TIMESTAMPTZOID || + typid == DATEOID; +} + +/* + * Check if this is a string type. + */ +bool +is_string_type_internal(Oid typid) +{ + return typid == TEXTOID || + typid == CSTRINGOID; +} + +/* + * Try to find binary operator. + * + * Returns operator function's Oid or throws an ERROR on InvalidOid. + */ +Oid +get_binary_operator_oid(char *oprname, Oid arg1, Oid arg2) +{ + Oid funcid = InvalidOid; + Operator op; + + op = oper(NULL, list_make1(makeString(oprname)), arg1, arg2, true, -1); + if (op) + { + funcid = oprfuncid(op); + ReleaseSysCache(op); + } + else + elog(ERROR, "Cannot find operator \"%s\"(%u, %u)", oprname, arg1, arg2); + + return funcid; +} + +/* + * Get CSTRING representation of Datum using the type Oid. + */ +char * +datum_to_cstring(Datum datum, Oid typid) +{ + char *result; + HeapTuple tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + + if (HeapTupleIsValid(tup)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tup); + FmgrInfo finfo; + + fmgr_info(typtup->typoutput, &finfo); + result = DatumGetCString(FunctionCall1(&finfo, datum)); + ReleaseSysCache(tup); + } + else + result = pstrdup("[error]"); + + return result; +} + +/* + * Try to get relname or at least relid as cstring. + */ +char * +get_rel_name_or_relid(Oid relid) +{ + return DatumGetCString(DirectFunctionCall1(regclassout, + ObjectIdGetDatum(relid))); +} diff --git a/src/utils.h b/src/utils.h index e69a0f219c..179062c47a 100644 --- a/src/utils.h +++ b/src/utils.h @@ -26,42 +26,54 @@ typedef struct } change_varno_context; -void execute_on_xact_mcxt_reset(MemoryContext xact_context, - MemoryContextCallbackFunction cb_proc, - void *arg); - -bool clause_contains_params(Node *clause); - +/* + * Plan tree modification. + */ +void plan_tree_walker(Plan *plan, + void (*visitor) (Plan *plan, void *context), + void *context); List * build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); +void change_varnos(Node *node, Oid old_varno, Oid new_varno); +TriggerDesc * append_trigger_descs(TriggerDesc *src, + TriggerDesc *more, + bool *grown_up); +/* + * Rowmark processing. + */ +void rowmark_add_tableoids(Query *parse); +void postprocess_lock_rows(List *rtable, Plan *plan); + +/* + * Various traits. + */ +bool clause_contains_params(Node *clause); +bool is_date_type_internal(Oid typid); +bool is_string_type_internal(Oid typid); bool check_rinfo_for_partitioned_attr(List *rinfo, Index varno, AttrNumber varattno); -TriggerDesc * append_trigger_descs(TriggerDesc *src, - TriggerDesc *more, - bool *grown_up); +/* + * Misc. + */ +Oid get_pathman_schema(void); +List * list_reverse(List *l); +/* + * Handy execution-stage functions. + */ +char * get_rel_name_or_relid(Oid relid); +Oid get_binary_operator_oid(char *opname, Oid arg1, Oid arg2); void fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, Oid type2); +void execute_on_xact_mcxt_reset(MemoryContext xact_context, + MemoryContextCallbackFunction cb_proc, + void *arg); +char * datum_to_cstring(Datum datum, Oid typid); -List * list_reverse(List *l); - -void change_varnos(Node *node, Oid old_varno, Oid new_varno); - -Oid str_to_oid(const char *cstr); - -void plan_tree_walker(Plan *plan, - void (*visitor) (Plan *plan, void *context), - void *context); - -void rowmark_add_tableoids(Query *parse); - -void postprocess_lock_rows(List *rtable, Plan *plan); - -Oid get_pathman_schema(void); #endif diff --git a/src/worker.c b/src/worker.c index 346b53a6e0..1d37d034e8 100644 --- a/src/worker.c +++ b/src/worker.c @@ -51,30 +51,6 @@ typedef struct } PartitionArgs; -#ifdef USE_ASSERT_CHECKING - - #include "access/htup_details.h" - #include "utils/syscache.h" - - #define PrintUnpackedDatum(datum, typid) \ - do { \ - HeapTuple tup = SearchSysCache1(TYPEOID, \ - ObjectIdGetDatum(typid)); \ - if (HeapTupleIsValid(tup)) \ - { \ - Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tup); \ - FmgrInfo finfo; \ - fmgr_info(typtup->typoutput, &finfo); \ - elog(LOG, "BGW: arg->value is '%s' [%u]", \ - DatumGetCString(FunctionCall1(&finfo, datum)), \ - MyProcPid); \ - ReleaseSysCache(tup); \ - } \ - } while (0) -#elif - #define PrintUnpackedDatum(datum, typid) (true) -#endif - #define PackDatumToByteArray(array, datum, datum_size, typbyval) \ do { \ memcpy((void *) (array), \ @@ -98,7 +74,8 @@ typedef struct (const void *) array, \ datum_size); \ } \ - PrintUnpackedDatum(datum, typid); \ + elog(LOG, "BGW: arg->value is '%s' [%u]", \ + DebugPrintDatum(datum, typid), MyProcPid); \ } while (0) @@ -153,6 +130,8 @@ create_partitions_bg_worker_segment(Oid relid, Datum value, Oid value_type) /* * Starts background worker that will create new partitions, * waits till it finishes the job and returns the result (new partition oid) + * + * NB: This function should not be called directly, use create_partitions() instead. */ Oid create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) @@ -238,7 +217,7 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) if (child_oid == InvalidOid) elog(ERROR, "Attempt to append new partitions to relation \"%s\" failed", - get_rel_name(relid)); + get_rel_name_or_relid(relid)); return child_oid; } @@ -286,7 +265,10 @@ bg_worker_main(Datum main_arg) value, /* unpacked Datum */ args->value_type); - CommitTransactionCommand(); + if (args->result == InvalidOid) + AbortCurrentTransaction(); + else + CommitTransactionCommand(); dsm_detach(segment); } From 476acbeba31a160bd5446a6d6e14fa9263bd4f51 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 9 Aug 2016 22:52:11 +0300 Subject: [PATCH 032/184] fix incorrect HASH_REMOVE usages, remove useless params in Prel-related macros and functions --- src/hooks.c | 6 +++--- src/init.c | 3 +-- src/nodes_common.c | 6 +++--- src/partition_filter.c | 4 ++-- src/pg_pathman.c | 13 ++++++------- src/pl_funcs.c | 24 ++++++++++++------------ src/relation_info.c | 22 ++++++++++++++++------ src/relation_info.h | 6 +++--- src/utils.c | 2 +- 9 files changed, 47 insertions(+), 39 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 26a3ef7cb8..41b3b4e8fa 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -61,7 +61,7 @@ pathman_join_pathlist_hook(PlannerInfo *root, /* Check that innerrel is a BASEREL with inheritors & PartRelationInfo */ if (innerrel->reloptkind != RELOPT_BASEREL || !inner_rte->inh || - !(inner_prel = get_pathman_relation_info(inner_rte->relid, NULL))) + !(inner_prel = get_pathman_relation_info(inner_rte->relid))) { return; /* Obviously not our case */ } @@ -177,7 +177,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb return; /* Proceed iff relation 'rel' is partitioned */ - if ((prel = get_pathman_relation_info(rte->relid, NULL)) != NULL) + if ((prel = get_pathman_relation_info(rte->relid)) != NULL) { ListCell *lc; Oid *children; @@ -225,7 +225,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb rte->inh = true; /* we must restore 'inh' flag! */ - children = PrelGetChildrenArray(prel, true); + children = PrelGetChildrenArray(prel); ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); /* Make wrappers over restrictions and collect final rangeset */ diff --git a/src/init.c b/src/init.c index 208ffcd850..b6724eecc9 100644 --- a/src/init.c +++ b/src/init.c @@ -125,8 +125,7 @@ init_local_config(void) PART_RELS_SIZE * CHILD_FACTOR, &ctl, HASH_ELEM | HASH_BLOBS); - CacheRegisterRelcacheCallback(pathman_relcache_hook, - PointerGetDatum(NULL)); + CacheRegisterRelcacheCallback(pathman_relcache_hook, PointerGetDatum(NULL)); } /* diff --git a/src/nodes_common.c b/src/nodes_common.c index 5caca19dc5..1a5ffeb56d 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -247,7 +247,7 @@ get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel) uint32 allocated = INITIAL_ALLOC_NUM; uint32 used = 0; Oid *result = (Oid *) palloc(allocated * sizeof(Oid)); - Oid *children = PrelGetChildrenArray(prel, true); + Oid *children = PrelGetChildrenArray(prel); foreach (range_cell, ranges) { @@ -349,7 +349,7 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, { RuntimeAppendPath *rpath = (RuntimeAppendPath *) best_path; CustomScan *cscan; - PartRelationInfo *prel = get_pathman_relation_info(rpath->relid, NULL); + PartRelationInfo *prel = get_pathman_relation_info(rpath->relid); cscan = makeNode(CustomScan); cscan->custom_scan_tlist = NIL; /* initial value (empty list) */ @@ -496,7 +496,7 @@ rescan_append_common(CustomScanState *node) Oid *parts; int nparts; - prel = get_pathman_relation_info(scan_state->relid, NULL); + prel = get_pathman_relation_info(scan_state->relid); Assert(prel); ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); diff --git a/src/partition_filter.c b/src/partition_filter.c index 43fe6e24cb..3bf3c710d0 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -167,7 +167,7 @@ partition_filter_exec(CustomScanState *node) Datum value; /* Fetch PartRelationInfo for this partitioned relation */ - prel = get_pathman_relation_info(state->partitioned_table, NULL); + prel = get_pathman_relation_info(state->partitioned_table); if (!prel) { if (!state->warning_triggered) @@ -389,7 +389,7 @@ partition_filter_visitor(Plan *plan, void *context) { Index rindex = lfirst_int(lc2); Oid relid = getrelid(rindex, rtable); - PartRelationInfo *prel = get_pathman_relation_info(relid, NULL); + PartRelationInfo *prel = get_pathman_relation_info(relid); /* Check that table is partitioned */ if (prel) diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 7b3d03b449..0895f80f90 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -190,7 +190,7 @@ disable_inheritance(Query *parse) if (rte->inh) { /* Look up this relation in pathman local cache */ - prel = get_pathman_relation_info(rte->relid, NULL); + prel = get_pathman_relation_info(rte->relid); if (prel) { /* We'll set this flag later */ @@ -300,7 +300,6 @@ handle_modification_query(Query *parse) RangeTblEntry *rte; WrapperNode *wrap; Expr *expr; - bool found; WalkerContext context; Assert(parse->commandType == CMD_UPDATE || @@ -308,9 +307,9 @@ handle_modification_query(Query *parse) Assert(parse->resultRelation > 0); rte = rt_fetch(parse->resultRelation, parse->rtable); - prel = get_pathman_relation_info(rte->relid, &found); + prel = get_pathman_relation_info(rte->relid); - if (!found) + if (!prel) return; /* Parse syntax tree and extract partition ranges */ @@ -331,7 +330,7 @@ handle_modification_query(Query *parse) IndexRange irange = linitial_irange(ranges); if (irange.ir_lower == irange.ir_upper) { - Oid *children = PrelGetChildrenArray(prel, true); + Oid *children = PrelGetChildrenArray(prel); rte->relid = children[irange.ir_lower]; rte->inh = false; } @@ -803,7 +802,7 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) bool isnull[Natts_pathman_config]; /* Get both PartRelationInfo & PATHMAN_CONFIG contents for this relation */ - if ((prel = get_pathman_relation_info(relid, NULL)) != NULL && + if ((prel = get_pathman_relation_info(relid)) != NULL && pathman_config_contains_relation(relid, values, isnull, NULL)) { Datum min_rvalue, @@ -1229,7 +1228,7 @@ search_range_partition_eq(const Datum value, int nranges; WrapperNode result; - ranges = PrelGetRangesArray(prel, true); + ranges = PrelGetRangesArray(prel); nranges = PrelChildrenCount(prel); select_range_partitions(value, diff --git a/src/pl_funcs.c b/src/pl_funcs.c index af888684a5..5cb4ed7127 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -119,7 +119,7 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) RangeEntry found_rentry; search_rangerel_result search_state; - prel = get_pathman_relation_info(parent_oid, NULL); + prel = get_pathman_relation_info(parent_oid); if (!prel) PG_RETURN_NULL(); @@ -190,13 +190,13 @@ get_partition_range(PG_FUNCTION_ARGS) TypeCacheEntry *tce; ArrayType *arr; - prel = get_pathman_relation_info(parent_oid, NULL); + prel = get_pathman_relation_info(parent_oid); if (!prel) PG_RETURN_NULL(); - ranges = PrelGetRangesArray(prel, true); - parts = PrelGetChildrenArray(prel, true); + ranges = PrelGetRangesArray(prel); + parts = PrelGetChildrenArray(prel); tce = lookup_type_cache(prel->atttype, 0); /* Looking for specified partition */ @@ -279,7 +279,7 @@ get_range_by_idx(PG_FUNCTION_ARGS) RangeEntry re; Datum *elems; - prel = get_pathman_relation_info(parent_oid, NULL); + prel = get_pathman_relation_info(parent_oid); if (!prel) elog(ERROR, "Cannot get partitioning cache entry for relation %u", parent_oid); @@ -287,7 +287,7 @@ get_range_by_idx(PG_FUNCTION_ARGS) elog(ERROR, "Partition #%d does not exist (max is #%u)", idx, PrelChildrenCount(prel) - 1); - ranges = PrelGetRangesArray(prel, true); + ranges = PrelGetRangesArray(prel); if (idx >= 0) re = ranges[idx]; else if(idx == -1) @@ -316,12 +316,12 @@ get_min_range_value(PG_FUNCTION_ARGS) PartRelationInfo *prel; RangeEntry *ranges; - prel = get_pathman_relation_info(parent_oid, NULL); + prel = get_pathman_relation_info(parent_oid); if (!prel || prel->parttype != PT_RANGE || PrelChildrenCount(prel) == 0) PG_RETURN_NULL(); - ranges = PrelGetRangesArray(prel, true); + ranges = PrelGetRangesArray(prel); PG_RETURN_DATUM(ranges[0].min); } @@ -336,12 +336,12 @@ get_max_range_value(PG_FUNCTION_ARGS) PartRelationInfo *prel; RangeEntry *ranges; - prel = get_pathman_relation_info(parent_oid, NULL); + prel = get_pathman_relation_info(parent_oid); if (!prel || prel->parttype != PT_RANGE || PrelChildrenCount(prel) == 0) PG_RETURN_NULL(); - ranges = PrelGetRangesArray(prel, true); + ranges = PrelGetRangesArray(prel); PG_RETURN_DATUM(ranges[PrelChildrenCount(prel) - 1].max); } @@ -368,7 +368,7 @@ check_overlap(PG_FUNCTION_ARGS) RangeEntry *ranges; uint32 i; - prel = get_pathman_relation_info(parent_oid, NULL); + prel = get_pathman_relation_info(parent_oid); if (!prel || prel->parttype != PT_RANGE) PG_RETURN_NULL(); @@ -377,7 +377,7 @@ check_overlap(PG_FUNCTION_ARGS) fill_type_cmp_fmgr_info(&cmp_func_1, p1_type, prel->atttype); fill_type_cmp_fmgr_info(&cmp_func_2, p2_type, prel->atttype); - ranges = PrelGetRangesArray(prel, true); + ranges = PrelGetRangesArray(prel); for (i = 0; i < PrelChildrenCount(prel); i++) { int c1 = FunctionCall2(&cmp_func_1, p1, ranges[i].max); diff --git a/src/relation_info.c b/src/relation_info.c index 3ea02b73ea..5fdc8517fc 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -124,7 +124,7 @@ refresh_pathman_relation_info(Oid relid, /* Initialize PartRelationInfo using syscache & typcache */ prel->attnum = get_attnum(relid, part_column_name); prel->atttype = get_atttype(relid, prel->attnum); - prel->atttypmod = get_atttypmod(relid, prel->attnum); + prel->atttypmod = get_atttypmod(relid, prel->attnum); /* Fetch HASH & CMP fuctions for atttype */ typcache = lookup_type_cache(prel->atttype, @@ -196,11 +196,11 @@ invalidate_pathman_relation_info(Oid relid, bool *found) /* Get PartRelationInfo from local cache. */ PartRelationInfo * -get_pathman_relation_info(Oid relid, bool *found) +get_pathman_relation_info(Oid relid) { PartRelationInfo *prel = hash_search(partitioned_rels, (const void *) &relid, - HASH_FIND, found); + HASH_FIND, NULL); /* Refresh PartRelationInfo if needed */ if (prel && !PrelIsValid(prel)) @@ -238,8 +238,7 @@ remove_pathman_relation_info(Oid relid) { PartRelationInfo *prel = hash_search(partitioned_rels, (const void *) &relid, - HASH_REMOVE, NULL); - + HASH_FIND, NULL); if (prel) { /* Free these arrays iff they're not NULL */ @@ -247,6 +246,11 @@ remove_pathman_relation_info(Oid relid) FreeRangesArray(prel); } + /* Now let's remove the entry completely */ + hash_search(partitioned_rels, + (const void *) &relid, + HASH_REMOVE, NULL); + elog(DEBUG2, "Removing record for relation %u in pg_pathman's cache [%u]", relid, MyProcPid); @@ -393,7 +397,7 @@ get_parent_of_partition_internal(Oid partition, Oid parent; PartParentInfo *ppar = hash_search(parent_cache, (const void *) &partition, - action, NULL); + HASH_FIND, NULL); /* Set 'action_str' */ switch (action) @@ -418,6 +422,12 @@ get_parent_of_partition_internal(Oid partition, { if (status) *status = PPS_ENTRY_PART_PARENT; parent = ppar->parent_rel; + + /* Remove entry if necessary */ + if (action == HASH_REMOVE) + hash_search(parent_cache, + (const void *) &partition, + HASH_REMOVE, NULL); } /* Try fetching parent from syscache if 'status' is provided */ else if (status) diff --git a/src/relation_info.h b/src/relation_info.h index ed140a92ba..9806f0c5b6 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -101,9 +101,9 @@ typedef enum } PartParentSearch; -#define PrelGetChildrenArray(prel, copy) ( (prel)->children ) +#define PrelGetChildrenArray(prel) ( (prel)->children ) -#define PrelGetRangesArray(prel, copy) ( (prel)->ranges ) +#define PrelGetRangesArray(prel) ( (prel)->ranges ) #define PrelChildrenCount(prel) ( (prel)->children_count ) @@ -115,7 +115,7 @@ PartRelationInfo *refresh_pathman_relation_info(Oid relid, const char *part_column_name); void invalidate_pathman_relation_info(Oid relid, bool *found); void remove_pathman_relation_info(Oid relid); -PartRelationInfo *get_pathman_relation_info(Oid relid, bool *found); +PartRelationInfo *get_pathman_relation_info(Oid relid); void delay_invalidation_parent_rel(Oid parent); void delay_invalidation_vague_rel(Oid vague_rel); diff --git a/src/utils.c b/src/utils.c index 7e27cac1d7..56cf1f067b 100644 --- a/src/utils.c +++ b/src/utils.c @@ -613,7 +613,7 @@ rowmark_add_tableoids(Query *parse) char resname[64]; /* Check that table is partitioned */ - if (!get_pathman_relation_info(parent, NULL)) + if (!get_pathman_relation_info(parent)) continue; var = makeVar(rc->rti, From 834db2d4f4a1ff0cfa1c83f6dd59ff7725cd611e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 9 Aug 2016 23:33:51 +0300 Subject: [PATCH 033/184] move HASH & RANGE function declarations from init.sql to their corresponding files, get_partition_range() -> +get_range_by_part_oid() --- hash.sql | 14 +++++++++++ init.sql | 61 --------------------------------------------- range.sql | 56 ++++++++++++++++++++++++++++++++++++++--- src/hooks.c | 2 +- src/pl_funcs.c | 10 +++----- src/relation_info.c | 2 ++ 6 files changed, 73 insertions(+), 72 deletions(-) diff --git a/hash.sql b/hash.sql index 2accbe1d29..ed3883c3de 100644 --- a/hash.sql +++ b/hash.sql @@ -176,3 +176,17 @@ BEGIN END LOOP; END $$ LANGUAGE plpgsql; + +/* + * Returns hash function OID for specified type + */ +CREATE OR REPLACE FUNCTION @extschema@.get_type_hash_func(OID) +RETURNS OID AS 'pg_pathman', 'get_type_hash_func' +LANGUAGE C STRICT; + +/* + * Calculates hash for integer value + */ +CREATE OR REPLACE FUNCTION @extschema@.get_hash(INTEGER, INTEGER) +RETURNS INTEGER AS 'pg_pathman', 'get_hash' +LANGUAGE C STRICT; diff --git a/init.sql b/init.sql index b5aae811d2..d5145df365 100644 --- a/init.sql +++ b/init.sql @@ -312,15 +312,6 @@ CREATE OR REPLACE FUNCTION @extschema@.is_date_type(cls REGTYPE) RETURNS BOOLEAN AS 'pg_pathman', 'is_date_type' LANGUAGE C STRICT; -/* - * Checks if range overlaps with existing partitions. - * Returns TRUE if overlaps and FALSE otherwise. - */ -CREATE OR REPLACE FUNCTION @extschema@.check_overlap( - parent_relid OID, range_min ANYELEMENT, range_max ANYELEMENT) -RETURNS BOOLEAN AS 'pg_pathman', 'check_overlap' -LANGUAGE C STRICT; - CREATE OR REPLACE FUNCTION @extschema@.on_create_partitions(relid OID) RETURNS VOID AS 'pg_pathman', 'on_partitions_created' @@ -335,58 +326,6 @@ RETURNS VOID AS 'pg_pathman', 'on_partitions_removed' LANGUAGE C STRICT; -CREATE OR REPLACE FUNCTION @extschema@.find_or_create_range_partition(relid OID, value ANYELEMENT) -RETURNS OID AS 'pg_pathman', 'find_or_create_range_partition' -LANGUAGE C STRICT; - - -/* - * Returns min and max values for specified RANGE partition. - */ -CREATE OR REPLACE FUNCTION @extschema@.get_partition_range( - parent_relid OID, partition_relid OID, dummy ANYELEMENT) -RETURNS ANYARRAY AS 'pg_pathman', 'get_partition_range' -LANGUAGE C STRICT; - - -/* - * Returns N-th range (in form of array) - */ -CREATE OR REPLACE FUNCTION @extschema@.get_range_by_idx( - parent_relid OID, idx INTEGER, dummy ANYELEMENT) -RETURNS ANYARRAY AS 'pg_pathman', 'get_range_by_idx' -LANGUAGE C STRICT; - -/* - * Returns min value of the first range for relation - */ -CREATE OR REPLACE FUNCTION @extschema@.get_min_range_value( - parent_relid OID, dummy ANYELEMENT) -RETURNS ANYELEMENT AS 'pg_pathman', 'get_min_range_value' -LANGUAGE C STRICT; - -/* - * Returns max value of the last range for relation - */ -CREATE OR REPLACE FUNCTION @extschema@.get_max_range_value( - parent_relid OID, dummy ANYELEMENT) -RETURNS ANYELEMENT AS 'pg_pathman', 'get_max_range_value' -LANGUAGE C STRICT; - -/* - * Returns hash function OID for specified type - */ -CREATE OR REPLACE FUNCTION @extschema@.get_type_hash_func(OID) -RETURNS OID AS 'pg_pathman', 'get_type_hash_func' -LANGUAGE C STRICT; - -/* - * Calculates hash for integer value - */ -CREATE OR REPLACE FUNCTION @extschema@.get_hash(INTEGER, INTEGER) -RETURNS INTEGER AS 'pg_pathman', 'get_hash' -LANGUAGE C STRICT; - /* * Checks if attribute is nullable */ diff --git a/range.sql b/range.sql index 3f344ce753..e9a01b27c5 100644 --- a/range.sql +++ b/range.sql @@ -465,7 +465,7 @@ BEGIN END IF; /* Get partition values range */ - p_range := @extschema@.get_partition_range(v_parent_relid, v_child_relid, 0); + p_range := @extschema@.get_range_by_part_oid(v_parent_relid, v_child_relid, 0); IF p_range IS NULL THEN RAISE EXCEPTION 'Could not find specified partition'; END IF; @@ -602,8 +602,8 @@ BEGIN * first and second elements of array are MIN and MAX of partition1 * third and forth elements are MIN and MAX of partition2 */ - p_range := @extschema@.get_partition_range(p_parent_relid, p_part1::oid, 0) || - @extschema@.get_partition_range(p_parent_relid, p_part2::oid, 0); + p_range := @extschema@.get_range_by_part_oid(p_parent_relid, p_part1, 0) || + @extschema@.get_range_by_part_oid(p_parent_relid, p_part2, 0); /* Check if ranges are adjacent */ IF p_range[1] != p_range[4] AND p_range[2] != p_range[3] THEN @@ -1080,6 +1080,7 @@ BEGIN END $$ LANGUAGE plpgsql; + /* * Construct CHECK constraint condition for a range partition. */ @@ -1089,3 +1090,52 @@ CREATE OR REPLACE FUNCTION @extschema@.get_range_condition( p_end_value ANYELEMENT) RETURNS TEXT AS 'pg_pathman', 'get_range_condition' LANGUAGE C STRICT; + +/* + * Returns N-th range (as an array of two elements). + */ +CREATE OR REPLACE FUNCTION @extschema@.get_range_by_idx( + parent_relid OID, idx INTEGER, dummy ANYELEMENT) +RETURNS ANYARRAY AS 'pg_pathman', 'get_range_by_idx' +LANGUAGE C STRICT; + +/* + * Returns min and max values for specified RANGE partition. + */ +CREATE OR REPLACE FUNCTION @extschema@.get_range_by_part_oid( + parent_relid OID, partition_relid OID, dummy ANYELEMENT) +RETURNS ANYARRAY AS 'pg_pathman', 'get_range_by_part_oid' +LANGUAGE C STRICT; + +/* + * Returns min value of the first partition's RangeEntry. + */ +CREATE OR REPLACE FUNCTION @extschema@.get_min_range_value( + parent_relid OID, dummy ANYELEMENT) +RETURNS ANYELEMENT AS 'pg_pathman', 'get_min_range_value' +LANGUAGE C STRICT; + +/* + * Returns max value of the last partition's RangeEntry. + */ +CREATE OR REPLACE FUNCTION @extschema@.get_max_range_value( + parent_relid OID, dummy ANYELEMENT) +RETURNS ANYELEMENT AS 'pg_pathman', 'get_max_range_value' +LANGUAGE C STRICT; + +/* + * Checks if range overlaps with existing partitions. + * Returns TRUE if overlaps and FALSE otherwise. + */ +CREATE OR REPLACE FUNCTION @extschema@.check_overlap( + parent_relid OID, range_min ANYELEMENT, range_max ANYELEMENT) +RETURNS BOOLEAN AS 'pg_pathman', 'check_overlap' +LANGUAGE C STRICT; + +/* + * Needed for an UPDATE trigger. + */ +CREATE OR REPLACE FUNCTION @extschema@.find_or_create_range_partition( + relid OID, value ANYELEMENT) +RETURNS OID AS 'pg_pathman', 'find_or_create_range_partition' +LANGUAGE C STRICT; diff --git a/src/hooks.c b/src/hooks.c index 41b3b4e8fa..2efeb21549 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -396,7 +396,7 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) PlannedStmt *result; - /* TODO: fix these commands (traverse whole query tree) */ + /* FIXME: fix these commands (traverse whole query tree) */ if (IsPathmanReady()) { switch(parse->commandType) diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 5cb4ed7127..42932739b9 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -30,7 +30,7 @@ PG_FUNCTION_INFO_V1( on_partitions_removed ); PG_FUNCTION_INFO_V1( find_or_create_range_partition); PG_FUNCTION_INFO_V1( get_range_condition ); PG_FUNCTION_INFO_V1( get_range_by_idx ); -PG_FUNCTION_INFO_V1( get_partition_range ); +PG_FUNCTION_INFO_V1( get_range_by_part_oid ); PG_FUNCTION_INFO_V1( acquire_partitions_lock ); PG_FUNCTION_INFO_V1( release_partitions_lock ); PG_FUNCTION_INFO_V1( check_overlap ); @@ -169,14 +169,10 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) } /* - * Returns range (min, max) as output parameters - * - * first argument is the parent relid - * second is the partition relid - * third and forth are MIN and MAX output parameters + * Returns range (min, max) as output parameters. */ Datum -get_partition_range(PG_FUNCTION_ARGS) +get_range_by_part_oid(PG_FUNCTION_ARGS) { Oid parent_oid = PG_GETARG_OID(0); Oid child_oid = PG_GETARG_OID(1); diff --git a/src/relation_info.c b/src/relation_info.c index 5fdc8517fc..6d25c8652e 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -219,6 +219,7 @@ get_pathman_relation_info(Oid relid) attname = TextDatumGetCString(values[Anum_pathman_config_attname - 1]); /* Refresh partitioned table cache entry */ + /* TODO: possible refactoring, pass found 'prel' instead of searching */ refresh_pathman_relation_info(relid, part_type, attname); } /* Else clear remaining cache entry */ @@ -347,6 +348,7 @@ finish_delayed_invalidation(void) * cache\forget\get PartParentInfo functions. */ +/* Create "partition+parent" pair in local cache */ void cache_parent_of_partition(Oid partition, Oid parent) { From 7d31d32908c619a3d02f989384a6798a4b49811e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 10 Aug 2016 00:14:57 +0300 Subject: [PATCH 034/184] print relation name instead of Oid whenever possible, new TODOs --- range.sql | 9 +++++-- src/init.c | 16 +++++++------ src/pg_pathman.c | 8 ++++--- src/pl_funcs.c | 61 +++++++++++++++++++++++++----------------------- 4 files changed, 53 insertions(+), 41 deletions(-) diff --git a/range.sql b/range.sql index e9a01b27c5..44a4e5f4ce 100644 --- a/range.sql +++ b/range.sql @@ -8,7 +8,9 @@ * ------------------------------------------------------------------------ */ -CREATE OR REPLACE FUNCTION @extschema@.get_sequence_name(plain_schema TEXT, plain_relname TEXT) +CREATE OR REPLACE FUNCTION @extschema@.get_sequence_name( + plain_schema TEXT, + plain_relname TEXT) RETURNS TEXT AS $$ BEGIN @@ -17,7 +19,10 @@ END $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION @extschema@.create_or_replace_sequence(plain_schema TEXT, plain_relname TEXT, OUT seq_name TEXT) +CREATE OR REPLACE FUNCTION @extschema@.create_or_replace_sequence( + plain_schema TEXT, + plain_relname TEXT, + OUT seq_name TEXT) AS $$ BEGIN seq_name := @extschema@.get_sequence_name(plain_schema, plain_relname); diff --git a/src/init.c b/src/init.c index b6724eecc9..08b93ed60d 100644 --- a/src/init.c +++ b/src/init.c @@ -187,8 +187,8 @@ fill_prel_with_partitions(const Oid *partitions, prel->children[hash] = partitions[i]; else elog(ERROR, - "Wrong constraint format for HASH partition %u", - partitions[i]); + "Wrong constraint format for HASH partition \"%s\"", + get_rel_name_or_relid(partitions[i])); } break; @@ -205,13 +205,14 @@ fill_prel_with_partitions(const Oid *partitions, } else elog(ERROR, - "Wrong constraint format for RANGE partition %u", - partitions[i]); + "Wrong constraint format for RANGE partition \"%s\"", + get_rel_name_or_relid(partitions[i])); } break; default: - elog(ERROR, "Unknown partitioning type for relation %u", prel->key); + elog(ERROR, "Unknown partitioning type for relation \"%s\"", + get_rel_name_or_relid(prel->key)); } } @@ -237,8 +238,9 @@ fill_prel_with_partitions(const Oid *partitions, for (i = 0; i < PrelChildrenCount(prel); i++) { if (prel->children[i] == InvalidOid) - elog(ERROR, "pg_pathman's cache for relation %u " - "has not been properly initialized", prel->key); + elog(ERROR, "pg_pathman's cache for relation \"%s\" " + "has not been properly initialized", + get_rel_name_or_relid(prel->key)); } #endif } diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 0895f80f90..e876f053f7 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -754,7 +754,7 @@ spawn_partitions(const PartRelationInfo *prel, /* ...and create partition */ ret = SPI_execute_with_args(query, 3, types, values, nulls, false, 0); if (ret != SPI_OK_SELECT) - elog(ERROR, "Could not create partition"); + elog(ERROR, "Could not spawn a partition"); /* Set 'last_partition' if necessary */ if (last_partition) @@ -877,7 +877,8 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) SPI_finish(); /* close SPI connection */ } else - elog(ERROR, "Relation %u is not partitioned by pg_pathman", relid); + elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", + get_rel_name_or_relid(relid)); } PG_CATCH(); { @@ -928,7 +929,8 @@ create_partitions(Oid relid, Datum value, Oid value_type) } } else - elog(ERROR, "Relation %u is not partitioned by pg_pathman", relid); + elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", + get_rel_name_or_relid(relid)); /* Check that 'last_partition' is valid */ if (last_partition == InvalidOid) diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 42932739b9..075de1be5e 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -277,7 +277,8 @@ get_range_by_idx(PG_FUNCTION_ARGS) prel = get_pathman_relation_info(parent_oid); if (!prel) - elog(ERROR, "Cannot get partitioning cache entry for relation %u", parent_oid); + elog(ERROR, "Cannot get partitioning cache entry for relation \"%s\"", + get_rel_name_or_relid(parent_oid)); if (((uint32) abs(idx)) >= PrelChildrenCount(prel)) elog(ERROR, "Partition #%d does not exist (max is #%u)", @@ -334,6 +335,7 @@ get_max_range_value(PG_FUNCTION_ARGS) prel = get_pathman_relation_info(parent_oid); + /* TODO: separate all these checks, they look ugly together */ if (!prel || prel->parttype != PT_RANGE || PrelChildrenCount(prel) == 0) PG_RETURN_NULL(); @@ -392,6 +394,7 @@ check_overlap(PG_FUNCTION_ARGS) Datum acquire_partitions_lock(PG_FUNCTION_ARGS) { + /* FIXME: have to find another way (shmem maybe?) */ LWLockAcquire(pmstate->edit_partitions_lock, LW_EXCLUSIVE); PG_RETURN_NULL(); } @@ -426,6 +429,32 @@ get_hash(PG_FUNCTION_ARGS) PG_RETURN_UINT32(make_hash(value, part_count)); } +Datum +get_attribute_type_name(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + text *attname = PG_GETARG_TEXT_P(1); + char *result; + HeapTuple tp; + + /* NOTE: for now it's the most efficient way */ + tp = SearchSysCacheAttName(relid, text_to_cstring(attname)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + result = format_type_be(att_tup->atttypid); + ReleaseSysCache(tp); + + PG_RETURN_TEXT_P(cstring_to_text(result)); + } + else + elog(ERROR, "Cannot find type name for attribute \"%s\" " + "of relation \"%s\"", + text_to_cstring(attname), get_rel_name_or_relid(relid)); + + PG_RETURN_NULL(); /* keep compiler happy */ +} + Datum build_check_constraint_name_attnum(PG_FUNCTION_ARGS) { @@ -459,8 +488,7 @@ build_check_constraint_name_attname(PG_FUNCTION_ARGS) if (attnum == InvalidAttrNumber) elog(ERROR, "Relation \"%s\" has no column '%s'", - get_rel_name_or_relid(relid), - text_to_cstring(attname)); + get_rel_name_or_relid(relid), text_to_cstring(attname)); result = build_check_constraint_name_internal(relid, attnum); @@ -473,31 +501,6 @@ is_date_type(PG_FUNCTION_ARGS) PG_RETURN_BOOL(is_date_type_internal(PG_GETARG_OID(0))); } -Datum -get_attribute_type_name(PG_FUNCTION_ARGS) -{ - Oid relid = PG_GETARG_OID(0); - text *attname = PG_GETARG_TEXT_P(1); - char *result; - HeapTuple tp; - - tp = SearchSysCacheAttName(relid, text_to_cstring(attname)); - if (HeapTupleIsValid(tp)) - { - Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); - result = format_type_be(att_tup->atttypid); - ReleaseSysCache(tp); - - PG_RETURN_TEXT_P(cstring_to_text(result)); - } - else - elog(ERROR, "Cannot find type name for attribute \"%s\" " - "of relation \"%s\"", - text_to_cstring(attname), get_rel_name_or_relid(relid)); - - PG_RETURN_NULL(); /* keep compiler happy */ -} - Datum is_attribute_nullable(PG_FUNCTION_ARGS) { @@ -522,7 +525,7 @@ is_attribute_nullable(PG_FUNCTION_ARGS) } /* - * DEBUG: set breakpoint here. + * NOTE: used for DEBUG, set breakpoint here. */ Datum debug_capture(PG_FUNCTION_ARGS) From f9fa69a15fe1cf560939595b4baaad93a9f2d5d9 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 10 Aug 2016 11:44:12 +0300 Subject: [PATCH 035/184] Rewrite pathman_ddl_trigger_func(), store RangeEntry's min & max in TopMemoryContext if needed, update FreeChildrenArray & FreeRangesArray --- init.sql | 25 ++++++++----- src/init.c | 54 +++++++++++++++++++++++---- src/pathman.h | 2 - src/relation_info.c | 89 ++++++++++++++++++++++++++++++--------------- 4 files changed, 120 insertions(+), 50 deletions(-) diff --git a/init.sql b/init.sql index d5145df365..809fe20c74 100644 --- a/init.sql +++ b/init.sql @@ -197,17 +197,22 @@ CREATE OR REPLACE FUNCTION @extschema@.pathman_ddl_trigger_func() RETURNS event_trigger AS $$ DECLARE - obj record; + obj record; + pg_class_oid oid; + BEGIN - FOR obj IN SELECT * FROM pg_event_trigger_dropped_objects() as events - JOIN @extschema@.pathman_config as cfg - ON partrel::oid = events.objid - LOOP - IF obj.object_type = 'table' THEN - EXECUTE 'DELETE FROM @extschema@.pathman_config WHERE partrel = $1' - USING obj.objid; - END IF; - END LOOP; + pg_class_oid = 'pg_class'::regclass; + + /* Handle 'DROP TABLE' events */ + WITH to_be_deleted AS ( + SELECT cfg.partrel AS rel + FROM pg_event_trigger_dropped_objects() AS events + JOIN @extschema@.pathman_config AS cfg + ON cfg.partrel::oid = events.objid + WHERE events.classid = pg_class_oid + ) + DELETE FROM @extschema@.pathman_config + WHERE partrel IN (SELECT rel FROM to_be_deleted); END $$ LANGUAGE plpgsql; diff --git a/src/init.c b/src/init.c index 08b93ed60d..3145c11565 100644 --- a/src/init.c +++ b/src/init.c @@ -27,6 +27,7 @@ #include "executor/spi.h" #include "miscadmin.h" #include "optimizer/clauses.h" +#include "utils/datum.h" #include "utils/inval.h" #include "utils/fmgroids.h" #include "utils/syscache.h" @@ -66,7 +67,9 @@ static bool validate_hash_constraint(const Expr *expr, const PartRelationInfo *prel, uint32 *part_hash); -static bool read_opexpr_const(const OpExpr *opexpr, AttrNumber varattno, Datum *val); +static bool read_opexpr_const(const OpExpr *opexpr, + const PartRelationInfo *prel, + Datum *val); static int oid_cmp(const void *p1, const void *p2); @@ -219,6 +222,7 @@ fill_prel_with_partitions(const Oid *partitions, /* Finalize 'prel' for a RANGE-partitioned table */ if (prel->parttype == PT_RANGE) { + MemoryContext old_mcxt; TypeCacheEntry *tce = lookup_type_cache(prel->atttype, TYPECACHE_CMP_PROC_FINFO); @@ -230,6 +234,21 @@ fill_prel_with_partitions(const Oid *partitions, /* Initialize 'prel->children' array */ for (i = 0; i < PrelChildrenCount(prel); i++) prel->children[i] = prel->ranges[i].child_oid; + + /* Copy all min & max Datums to the persistent mcxt */ + old_mcxt = MemoryContextSwitchTo(TopMemoryContext); + for (i = 0; i < PrelChildrenCount(prel); i++) + { + prel->ranges[i].max = datumCopy(prel->ranges[i].max, + prel->attbyval, + prel->attlen); + + prel->ranges[i].min = datumCopy(prel->ranges[i].min, + prel->attbyval, + prel->attlen); + } + MemoryContextSwitchTo(old_mcxt); + } #ifdef USE_ASSERT_CHECKING @@ -596,7 +615,7 @@ validate_range_constraint(const Expr *expr, if (BTGreaterEqualStrategyNumber == get_op_opfamily_strategy(opexpr->opno, tce->btree_opf)) { - if (!read_opexpr_const(opexpr, prel->attnum, min)) + if (!read_opexpr_const(opexpr, prel, min)) return false; } else @@ -607,7 +626,7 @@ validate_range_constraint(const Expr *expr, if (BTLessStrategyNumber == get_op_opfamily_strategy(opexpr->opno, tce->btree_opf)) { - if (!read_opexpr_const(opexpr, prel->attnum, max)) + if (!read_opexpr_const(opexpr, prel, max)) return false; } else @@ -620,19 +639,38 @@ validate_range_constraint(const Expr *expr, * Reads const value from expressions of kind: VAR >= CONST or VAR < CONST */ static bool -read_opexpr_const(const OpExpr *opexpr, AttrNumber varattno, Datum *val) +read_opexpr_const(const OpExpr *opexpr, + const PartRelationInfo *prel, + Datum *val) { - const Node *left = linitial(opexpr->args); - const Node *right = lsecond(opexpr->args); + const Node *left; + const Node *right; + const Const *constant; + + if (list_length(opexpr->args) != 2) + return false; + + left = linitial(opexpr->args); + right = lsecond(opexpr->args); if (!IsA(left, Var) || !IsA(right, Const)) return false; - if (((Var *) left)->varoattno != varattno) + if (((Var *) left)->varoattno != prel->attnum) return false; if (((Const *) right)->constisnull) return false; - *val = ((Const *) right)->constvalue; + constant = (Const *) right; + + /* Check that types match */ + if (prel->atttype != constant->consttype) + { + elog(WARNING, "Constant type in some check constraint does " + "not match the partitioned column's type"); + return false; + } + + *val = constant->constvalue; return true; } diff --git a/src/pathman.h b/src/pathman.h index eccfb77e4d..1a9158d721 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -56,8 +56,6 @@ /* type modifier (typmod) for 'range_interval' */ #define PATHMAN_CONFIG_interval_typmod -1 -#define PATHMAN_CONFIG_partrel_idx "pathman_config_partrel_idx" - /* * pg_pathman's global state. diff --git a/src/relation_info.c b/src/relation_info.c index 6d25c8652e..55fbc450c0 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -51,29 +51,58 @@ static Oid get_parent_of_partition_internal(Oid partition, static bool perform_parent_refresh(Oid parent); -#define FreeChildrenArray(prel) \ - do { \ - uint32 i; \ - /* Remove relevant PartParentInfos */ \ - if ((prel)->children) \ - { \ - for (i = 0; i < PrelChildrenCount(prel); i++) \ - { \ - Oid child = (prel)->children[i]; \ - /* If it's *always been* relid's partition, free cache */ \ - if (relid == get_parent_of_partition(child, NULL)) \ - forget_parent_of_partition(child, NULL); \ - } \ - pfree((prel)->children); \ - (prel)->children = NULL; \ - } \ - } while (0) +/* + * Useful static functions for freeing memory. + */ -#define FreeRangesArray(prel) \ - do { \ - if ((prel)->ranges) pfree((prel)->ranges); \ - (prel)->ranges = NULL; \ - } while (0) +static inline void +FreeChildrenArray(PartRelationInfo *prel) +{ + uint32 i; + + Assert(PrelIsValid(prel)); + + /* Remove relevant PartParentInfos */ + if ((prel)->children) + { + for (i = 0; i < PrelChildrenCount(prel); i++) + { + Oid child = (prel)->children[i]; + + /* If it's *always been* relid's partition, free cache */ + if (prel->key == get_parent_of_partition(child, NULL)) + forget_parent_of_partition(child, NULL); + } + + pfree((prel)->children); + (prel)->children = NULL; + } +} + +static inline void +FreeRangesArray(PartRelationInfo *prel) +{ + uint32 i; + + Assert(PrelIsValid(prel)); + + /* Remove RangeEntries array */ + if ((prel)->ranges) + { + /* Remove persistent entries if not byVal */ + if (!(prel)->attbyval) + { + for (i = 0; i < PrelChildrenCount(prel); i++) + { + pfree(DatumGetPointer((prel)->ranges[i].min)); + pfree(DatumGetPointer((prel)->ranges[i].max)); + } + } + + pfree((prel)->ranges); + (prel)->ranges = NULL; + } +} /* @@ -103,17 +132,17 @@ refresh_pathman_relation_info(Oid relid, "Creating new record for relation %u in pg_pathman's cache [%u]", relid, MyProcPid); - /* First we assume that this entry is invalid */ - prel->valid = false; - /* Clear outdated resources */ - if (found) + if (found && PrelIsValid(prel)) { /* Free these arrays iff they're not NULL */ FreeChildrenArray(prel); FreeRangesArray(prel); } + /* First we assume that this entry is invalid */ + prel->valid = false; + /* Make both arrays point to NULL */ prel->children = NULL; prel->ranges = NULL; @@ -180,14 +209,14 @@ invalidate_pathman_relation_info(Oid relid, bool *found) FreeChildrenArray(prel); FreeRangesArray(prel); } - else + /* not found => we create a new one */ + else if (!found) { prel->children = NULL; prel->ranges = NULL; } - if (prel) - prel->valid = false; /* now cache entry is invalid */ + prel->valid = false; /* now cache entry is invalid */ elog(DEBUG2, "Invalidating record for relation %u in pg_pathman's cache [%u]", @@ -240,7 +269,7 @@ remove_pathman_relation_info(Oid relid) PartRelationInfo *prel = hash_search(partitioned_rels, (const void *) &relid, HASH_FIND, NULL); - if (prel) + if (prel && PrelIsValid(prel)) { /* Free these arrays iff they're not NULL */ FreeChildrenArray(prel); From 79466c6d6c2439a7d0e2a97a44db3b41335eb50c Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 10 Aug 2016 17:32:30 +0300 Subject: [PATCH 036/184] PartitionFilter: invalidate PartRelationInfo on partition creation, fix invalidate_pathman_relation_info() --- src/hooks.c | 14 ++++-- src/init.c | 55 +++++++++++++------- src/init.h | 1 + src/partition_filter.c | 3 ++ src/pathman.h | 4 ++ src/pg_pathman.c | 7 +++ src/relation_info.c | 112 ++++++++++++++++------------------------- src/relation_info.h | 59 ++++++++++++++++++++++ 8 files changed, 163 insertions(+), 92 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 2efeb21549..7effb629bc 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -452,6 +452,10 @@ pathman_post_parse_analysis_hook(ParseState *pstate, Query *query) if (post_parse_analyze_hook_next) post_parse_analyze_hook_next(pstate, query); + /* Finish delayed invalidation jobs */ + if (IsPathmanReady()) + finish_delayed_invalidation(); + /* Load config if pg_pathman exists & it's still necessary */ if (IsPathmanEnabled() && initialization_needed && @@ -461,10 +465,6 @@ pathman_post_parse_analysis_hook(ParseState *pstate, Query *query) load_config(); /* perform main cache initialization */ } - /* Finish delayed invalidation jobs */ - if (IsPathmanReady()) - finish_delayed_invalidation(); - inheritance_disabled_relids = NIL; inheritance_enabled_relids = NIL; } @@ -495,6 +495,9 @@ pathman_relcache_hook(Datum arg, Oid relid) PartParentSearch search; Oid partitioned_table; + if (!IsPathmanReady()) + return; + /* Invalidate PartParentInfo cache if needed */ partitioned_table = forget_parent_of_partition(relid, &search); @@ -516,6 +519,9 @@ pathman_relcache_hook(Datum arg, Oid relid) { elog(DEBUG2, "Invalidation message for relation %u [%u]", relid, MyProcPid); + + if (relid == get_pathman_config_relid()) + delay_pathman_shutdown(); } break; diff --git a/src/init.c b/src/init.c index 3145c11565..7f546f5bca 100644 --- a/src/init.c +++ b/src/init.c @@ -78,8 +78,11 @@ static int oid_cmp(const void *p1, const void *p2); * Create local PartRelationInfo cache & load pg_pathman's config. */ void -load_config() +load_config(void) { + /* cache PATHMAN_CONFIG relation Oid */ + pathman_config_relid = get_relname_relid(PATHMAN_CONFIG, get_pathman_schema()); + init_local_config(); /* create 'relations' hash table */ read_pathman_config(); /* read PATHMAN_CONFIG table & fill cache */ @@ -88,6 +91,34 @@ load_config() elog(DEBUG2, "pg_pathman's config has been loaded successfully"); } +/* + * Destroy local caches & free memory. + */ +void +unload_config(void) +{ + HASH_SEQ_STATUS status; + PartRelationInfo *prel; + + hash_seq_init(&status, partitioned_rels); + while((prel = (PartRelationInfo *) hash_seq_search(&status)) != NULL) + { + if (PrelIsValid(prel)) + { + FreeChildrenArray(prel); + FreeRangesArray(prel); + } + } + + /* Now we can safely destroy hash tables */ + hash_destroy(partitioned_rels); + hash_destroy(parent_cache); + partitioned_rels = NULL; + parent_cache = NULL; + + initialization_needed = true; +} + /* * Estimate shmem amount needed for pg_pathman to run. */ @@ -105,12 +136,6 @@ init_local_config(void) { HASHCTL ctl; - if (partitioned_rels) - { - elog(DEBUG2, "pg_pathman's partitioned relations table already exists"); - return; - } - memset(&ctl, 0, sizeof(ctl)); ctl.keysize = sizeof(Oid); ctl.entrysize = sizeof(PartRelationInfo); @@ -392,7 +417,6 @@ bool pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, TransactionId *xmin) { - Oid pathman_config; Relation rel; HeapScanDesc scan; ScanKeyData key[1]; @@ -400,16 +424,13 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, HeapTuple htup; bool contains_rel = false; - /* Get PATHMAN_CONFIG table Oid */ - pathman_config = get_relname_relid(PATHMAN_CONFIG, get_pathman_schema()); - ScanKeyInit(&key[0], Anum_pathman_config_partrel, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relid)); - /* Open relation with latest snapshot available */ - rel = heap_open(pathman_config, AccessShareLock); + /* Open PATHMAN_CONFIG with latest snapshot available */ + rel = heap_open(get_pathman_config_relid(), AccessShareLock); /* Check that 'partrel' column is if regclass type */ Assert(RelationGetDescr(rel)-> @@ -471,17 +492,13 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, static void read_pathman_config(void) { - Oid pathman_config; Relation rel; HeapScanDesc scan; Snapshot snapshot; HeapTuple htup; - /* Get PATHMAN_CONFIG table Oid */ - pathman_config = get_relname_relid(PATHMAN_CONFIG, get_pathman_schema()); - - /* Open relation with latest snapshot available */ - rel = heap_open(pathman_config, AccessShareLock); + /* Open PATHMAN_CONFIG with latest snapshot available */ + rel = heap_open(get_pathman_config_relid(), AccessShareLock); /* Check that 'partrel' column is if regclass type */ Assert(RelationGetDescr(rel)-> diff --git a/src/init.h b/src/init.h index 6f0a6a645b..9576cd0cbf 100644 --- a/src/init.h +++ b/src/init.h @@ -28,6 +28,7 @@ Size estimate_pathman_shmem_size(void); void init_local_config(void); void init_shmem_config(void); void load_config(void); +void unload_config(void); void fill_prel_with_partitions(const Oid *partitions, const uint32 parts_count, diff --git a/src/partition_filter.c b/src/partition_filter.c index 3bf3c710d0..5cfa608afa 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -207,6 +207,9 @@ partition_filter_exec(CustomScanState *node) selected_partid = create_partitions(state->partitioned_table, state->temp_const.constvalue, state->temp_const.consttype); + + /* get_pathman_relation_info() will refresh this entry */ + invalidate_pathman_relation_info(state->partitioned_table, NULL); } else selected_partid = parts[0]; diff --git a/src/pathman.h b/src/pathman.h index 1a9158d721..4c345c6f33 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -56,6 +56,10 @@ /* type modifier (typmod) for 'range_interval' */ #define PATHMAN_CONFIG_interval_typmod -1 +extern Oid pathman_config_relid; + +Oid get_pathman_config_relid(void); + /* * pg_pathman's global state. diff --git a/src/pg_pathman.c b/src/pg_pathman.c index e876f053f7..c4c88fb656 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -50,6 +50,7 @@ List *inheritance_disabled_relids = NIL; List *inheritance_enabled_relids = NIL; bool pg_pathman_enable = true; PathmanState *pmstate; +Oid pathman_config_relid = InvalidOid; /* pg module functions */ @@ -2075,3 +2076,9 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, } } } + +Oid +get_pathman_config_relid(void) +{ + return pathman_config_relid; +} diff --git a/src/relation_info.c b/src/relation_info.c index 55fbc450c0..dce192183b 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -26,8 +26,9 @@ #include "utils/snapmgr.h" -static List *delayed_invalidation_parent_rels = NIL; -static List *delayed_invalidation_vague_rels = NIL; +static List *delayed_invalidation_parent_rels = NIL; +static List *delayed_invalidation_vague_rels = NIL; +static bool delayed_shutdown = false; /* Add unique Oid to list, allocate in TopMemoryContext */ #define list_add_unique(list, oid) \ @@ -51,60 +52,6 @@ static Oid get_parent_of_partition_internal(Oid partition, static bool perform_parent_refresh(Oid parent); -/* - * Useful static functions for freeing memory. - */ - -static inline void -FreeChildrenArray(PartRelationInfo *prel) -{ - uint32 i; - - Assert(PrelIsValid(prel)); - - /* Remove relevant PartParentInfos */ - if ((prel)->children) - { - for (i = 0; i < PrelChildrenCount(prel); i++) - { - Oid child = (prel)->children[i]; - - /* If it's *always been* relid's partition, free cache */ - if (prel->key == get_parent_of_partition(child, NULL)) - forget_parent_of_partition(child, NULL); - } - - pfree((prel)->children); - (prel)->children = NULL; - } -} - -static inline void -FreeRangesArray(PartRelationInfo *prel) -{ - uint32 i; - - Assert(PrelIsValid(prel)); - - /* Remove RangeEntries array */ - if ((prel)->ranges) - { - /* Remove persistent entries if not byVal */ - if (!(prel)->attbyval) - { - for (i = 0; i < PrelChildrenCount(prel); i++) - { - pfree(DatumGetPointer((prel)->ranges[i].min)); - pfree(DatumGetPointer((prel)->ranges[i].max)); - } - } - - pfree((prel)->ranges); - (prel)->ranges = NULL; - } -} - - /* * refresh\invalidate\get\remove PartRelationInfo functions. */ @@ -132,6 +79,12 @@ refresh_pathman_relation_info(Oid relid, "Creating new record for relation %u in pg_pathman's cache [%u]", relid, MyProcPid); + /* + * NOTE: Trick clang analyzer (first access without NULL pointer check). + * Access to field 'valid' results in a dereference of a null pointer. + */ + prel->cmp_proc = InvalidOid; + /* Clear outdated resources */ if (found && PrelIsValid(prel)) { @@ -199,24 +152,33 @@ refresh_pathman_relation_info(Oid relid, void invalidate_pathman_relation_info(Oid relid, bool *found) { - PartRelationInfo *prel = hash_search(partitioned_rels, - (const void *) &relid, - (found ? HASH_FIND : HASH_ENTER), - found); + bool prel_found; + HASHACTION action = found ? HASH_FIND : HASH_ENTER; + PartRelationInfo *prel; - if(found && PrelIsValid(prel)) + prel = hash_search(partitioned_rels, + (const void *) &relid, + action, &prel_found); + + if ((action == HASH_FIND || + (action == HASH_ENTER && prel_found)) && PrelIsValid(prel)) { FreeChildrenArray(prel); FreeRangesArray(prel); + + prel->valid = false; /* now cache entry is invalid */ } - /* not found => we create a new one */ - else if (!found) + /* Handle invalid PartRelationInfo */ + else if (prel) { prel->children = NULL; prel->ranges = NULL; + + prel->valid = false; /* now cache entry is invalid */ } - prel->valid = false; /* now cache entry is invalid */ + /* Set 'found' if necessary */ + if (found) *found = prel_found; elog(DEBUG2, "Invalidating record for relation %u in pg_pathman's cache [%u]", @@ -291,6 +253,13 @@ remove_pathman_relation_info(Oid relid) * Functions for delayed invalidation. */ +/* Add new delayed pathman shutdown job (DROP EXTENSION) */ +void +delay_pathman_shutdown(void) +{ + delayed_shutdown = true; +} + /* Add new delayed invalidation job for a [ex-]parent relation */ void delay_invalidation_parent_rel(Oid parent) @@ -311,7 +280,8 @@ finish_delayed_invalidation(void) { /* Exit early if there's nothing to do */ if (delayed_invalidation_parent_rels == NIL && - delayed_invalidation_vague_rels == NIL) + delayed_invalidation_vague_rels == NIL && + delayed_shutdown == false) { return; } @@ -319,9 +289,14 @@ finish_delayed_invalidation(void) /* Check that current state is transactional */ if (IsTransactionState()) { - ListCell *lc; + ListCell *lc; - //elog(WARNING, "invalidating..."); + if (delayed_shutdown) + { + delayed_shutdown = false; + unload_config(); + return; + } /* Process relations that are (or were) definitely partitioned */ foreach (lc, delayed_invalidation_parent_rels) @@ -541,8 +516,7 @@ perform_parent_refresh(Oid parent) parttype = DatumGetPartType(values[Anum_pathman_config_parttype - 1]); attname = DatumGetTextP(values[Anum_pathman_config_attname - 1]); - if (!refresh_pathman_relation_info(parent, parttype, - text_to_cstring(attname))) + if (!refresh_pathman_relation_info(parent, parttype, text_to_cstring(attname))) return false; } else diff --git a/src/relation_info.h b/src/relation_info.h index 9806f0c5b6..7724121f5a 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -101,6 +101,10 @@ typedef enum } PartParentSearch; +/* + * PartRelationInfo field access macros. + */ + #define PrelGetChildrenArray(prel) ( (prel)->children ) #define PrelGetRangesArray(prel) ( (prel)->ranges ) @@ -117,6 +121,7 @@ void invalidate_pathman_relation_info(Oid relid, bool *found); void remove_pathman_relation_info(Oid relid); PartRelationInfo *get_pathman_relation_info(Oid relid); +void delay_pathman_shutdown(void); void delay_invalidation_parent_rel(Oid parent); void delay_invalidation_vague_rel(Oid vague_rel); void finish_delayed_invalidation(void); @@ -127,4 +132,58 @@ Oid get_parent_of_partition(Oid partition, PartParentSearch *status); PartType DatumGetPartType(Datum datum); + +/* + * Useful static functions for freeing memory. + */ + +static inline void +FreeChildrenArray(PartRelationInfo *prel) +{ + uint32 i; + + Assert(PrelIsValid(prel)); + + /* Remove relevant PartParentInfos */ + if ((prel)->children) + { + for (i = 0; i < PrelChildrenCount(prel); i++) + { + Oid child = (prel)->children[i]; + + /* If it's *always been* relid's partition, free cache */ + if (prel->key == get_parent_of_partition(child, NULL)) + forget_parent_of_partition(child, NULL); + } + + pfree((prel)->children); + (prel)->children = NULL; + } +} + +static inline void +FreeRangesArray(PartRelationInfo *prel) +{ + uint32 i; + + Assert(PrelIsValid(prel)); + + /* Remove RangeEntries array */ + if ((prel)->ranges) + { + /* Remove persistent entries if not byVal */ + if (!(prel)->attbyval) + { + for (i = 0; i < PrelChildrenCount(prel); i++) + { + pfree(DatumGetPointer((prel)->ranges[i].min)); + pfree(DatumGetPointer((prel)->ranges[i].max)); + } + } + + pfree((prel)->ranges); + (prel)->ranges = NULL; + } +} + #endif From 7d45123e43017f8bbe7bad1ec51ecedf88963ba4 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 10 Aug 2016 18:50:19 +0300 Subject: [PATCH 037/184] passing regression tests, minor code fix --- expected/pg_pathman.out | 76 ++++++----------------------------------- src/pathman.h | 2 +- 2 files changed, 11 insertions(+), 67 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 5bbaf24fda..aa23e4ec1c 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -12,7 +12,6 @@ SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); ERROR: Partitioning key 'value' must be NOT NULL ALTER TABLE test.hash_rel ALTER COLUMN value SET NOT NULL; SELECT pathman.create_hash_partitions('test.hash_rel', 'Value', 3); -NOTICE: Copying data to partitions... create_hash_partitions ------------------------ 3 @@ -549,7 +548,7 @@ WHERE j1.dt < '2015-03-01' AND j2.dt >= '2015-02-01' ORDER BY j2.dt; * Test CTE query */ EXPLAIN (COSTS OFF) - WITH ttt AS (SELECT * FROM test.range_rel WHERE dt >= '2015-02-01' AND dt < '2015-03-15') + WITH ttt AS (SELECT * FROM test.range_rel WHERE dt >= '2015-02-01' AND dt < '2015-03-15') SELECT * FROM ttt; QUERY PLAN -------------------------------------------------------------------------------------------- @@ -562,7 +561,7 @@ SELECT * FROM ttt; (6 rows) EXPLAIN (COSTS OFF) - WITH ttt AS (SELECT * FROM test.hash_rel WHERE value = 2) + WITH ttt AS (SELECT * FROM test.hash_rel WHERE value = 2) SELECT * FROM ttt; QUERY PLAN -------------------------------------- @@ -782,7 +781,6 @@ create table test.run_values as select generate_series(1, 10000) val; create table test.runtime_test_1(id serial primary key, val real); insert into test.runtime_test_1 select generate_series(1, 10000), random(); select pathman.create_hash_partitions('test.runtime_test_1', 'id', 6); -NOTICE: Copying data to partitions... create_hash_partitions ------------------------ 6 @@ -793,7 +791,6 @@ create table test.runtime_test_2 (id serial, category_id int not null, name text insert into test.runtime_test_2 (select id, (id % 6) + 1 as category_id, 'good' || id::text as name, random() as rating from generate_series(1, 100000) id); create index on test.runtime_test_2 (category_id, rating); select pathman.create_hash_partitions('test.runtime_test_2', 'category_id', 6); -NOTICE: Copying data to partitions... create_hash_partitions ------------------------ 6 @@ -803,7 +800,6 @@ create table test.vals as (select generate_series(1, 10000) as val); create table test.runtime_test_3(val text, id serial not null); insert into test.runtime_test_3(id, val) select * from generate_series(1, 10000) k, format('k = %s', k); select pathman.create_hash_partitions('test.runtime_test_3', 'id', 4); -NOTICE: Copying data to partitions... create_hash_partitions ------------------------ 4 @@ -861,10 +857,6 @@ NOTICE: drop cascades to 16 other objects */ /* Split first partition in half */ SELECT pathman.split_range_partition('test.num_range_rel_1', 500); -NOTICE: Creating new partition... -NOTICE: Copying data to new partition... -NOTICE: Altering original partition... -NOTICE: Done! split_range_partition ----------------------- {0,1000} @@ -881,10 +873,6 @@ EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id BETWEEN 100 AND 70 (5 rows) SELECT pathman.split_range_partition('test.range_rel_1', '2015-01-15'::DATE); -NOTICE: Creating new partition... -NOTICE: Copying data to new partition... -NOTICE: Altering original partition... -NOTICE: Done! split_range_partition ------------------------- {01-01-2015,02-01-2015} @@ -892,10 +880,6 @@ NOTICE: Done! /* Merge two partitions into one */ SELECT pathman.merge_range_partitions('test.num_range_rel_1', 'test.num_range_rel_' || currval('test.num_range_rel_seq')); -NOTICE: Altering first partition... -NOTICE: Copying data... -NOTICE: Dropping second partition... -NOTICE: Done! merge_range_partitions ------------------------ @@ -910,10 +894,6 @@ EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id BETWEEN 100 AND 70 (3 rows) SELECT pathman.merge_range_partitions('test.range_rel_1', 'test.range_rel_' || currval('test.range_rel_seq')); -NOTICE: Altering first partition... -NOTICE: Copying data... -NOTICE: Dropping second partition... -NOTICE: Done! merge_range_partitions ------------------------ @@ -921,8 +901,6 @@ NOTICE: Done! /* Append and prepend partitions */ SELECT pathman.append_range_partition('test.num_range_rel'); -NOTICE: Appending new partition... -NOTICE: Done! append_range_partition ------------------------ test.num_range_rel_6 @@ -936,8 +914,6 @@ EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 4000; (2 rows) SELECT pathman.prepend_range_partition('test.num_range_rel'); -NOTICE: Prepending new partition... -NOTICE: Done! prepend_range_partition ------------------------- test.num_range_rel_7 @@ -957,16 +933,12 @@ SELECT pathman.drop_range_partition('test.num_range_rel_7'); (1 row) SELECT pathman.append_range_partition('test.range_rel'); -NOTICE: Appending new partition... -NOTICE: Done! append_range_partition ------------------------ test.range_rel_6 (1 row) SELECT pathman.prepend_range_partition('test.range_rel'); -NOTICE: Prepending new partition... -NOTICE: Done! prepend_range_partition ------------------------- test.range_rel_7 @@ -999,7 +971,6 @@ EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-12-15' A SELECT pathman.add_range_partition('test.range_rel', '2014-12-01'::DATE, '2015-01-02'::DATE); ERROR: Specified range overlaps with existing partitions SELECT pathman.add_range_partition('test.range_rel', '2014-12-01'::DATE, '2015-01-01'::DATE); -NOTICE: Done! add_range_partition --------------------- test.range_rel_8 @@ -1051,15 +1022,15 @@ EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-11-15' A (4 rows) CREATE TABLE test.range_rel_test1 ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP, - txt TEXT, - abc INTEGER); + id SERIAL PRIMARY KEY, + dt TIMESTAMP, + txt TEXT, + abc INTEGER); SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test1', '2013-01-01'::DATE, '2014-01-01'::DATE); ERROR: Partition must have the exact same structure as parent CREATE TABLE test.range_rel_test2 ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP); + id SERIAL PRIMARY KEY, + dt TIMESTAMP); SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test2', '2013-01-01'::DATE, '2014-01-01'::DATE); ERROR: Partition must have the exact same structure as parent /* @@ -1093,7 +1064,6 @@ SELECT COUNT(*) FROM ONLY test.hash_rel; (1 row) SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); -NOTICE: Copying data to partitions... create_hash_partitions ------------------------ 3 @@ -1174,7 +1144,7 @@ SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; DROP TABLE test.range_rel CASCADE; NOTICE: drop cascades to 16 other objects SELECT * FROM pathman.pathman_config; - id | relname | attname | parttype | range_interval + id | partrel | attname | parttype | range_interval ----+---------+---------+----------+---------------- (0 rows) @@ -1236,7 +1206,6 @@ CREATE TABLE test."TeSt" (a INT NOT NULL, b INT); SELECT pathman.create_hash_partitions('test.TeSt', 'a', 3); ERROR: relation "test.test" does not exist at character 39 SELECT pathman.create_hash_partitions('test."TeSt"', 'a', 3); -NOTICE: Copying data to partitions... create_hash_partitions ------------------------ 3 @@ -1317,36 +1286,24 @@ NOTICE: Copying data to partitions... (1 row) SELECT pathman.append_range_partition('test."RangeRel"'); -NOTICE: Appending new partition... -NOTICE: Done! append_range_partition ------------------------ test."RangeRel_4" (1 row) SELECT pathman.prepend_range_partition('test."RangeRel"'); -NOTICE: Prepending new partition... -NOTICE: Done! prepend_range_partition ------------------------- test."RangeRel_5" (1 row) SELECT pathman.merge_range_partitions('test."RangeRel_1"', 'test."RangeRel_' || currval('test."RangeRel_seq"') || '"'); -NOTICE: Altering first partition... -NOTICE: Copying data... -NOTICE: Dropping second partition... -NOTICE: Done! merge_range_partitions ------------------------ (1 row) SELECT pathman.split_range_partition('test."RangeRel_1"', '2015-01-01'::DATE); -NOTICE: Creating new partition... -NOTICE: Copying data to new partition... -NOTICE: Altering original partition... -NOTICE: Done! split_range_partition ------------------------- {12-31-2014,01-02-2015} @@ -1374,7 +1331,7 @@ NOTICE: Copying data to partitions... DROP TABLE test."RangeRel" CASCADE; NOTICE: drop cascades to 5 other objects SELECT * FROM pathman.pathman_config; - id | relname | attname | parttype | range_interval + id | partrel | attname | parttype | range_interval ----+--------------------+---------+----------+---------------- 9 | test.num_range_rel | id | 2 | 1000 (1 row) @@ -1418,7 +1375,6 @@ CREATE TABLE hash_rel ( value INTEGER NOT NULL); INSERT INTO hash_rel (value) SELECT g FROM generate_series(1, 10000) as g; SELECT create_hash_partitions('hash_rel', 'value', 3); -NOTICE: Copying data to partitions... create_hash_partitions ------------------------ 3 @@ -1451,36 +1407,24 @@ NOTICE: Copying data to partitions... (1 row) SELECT merge_range_partitions('range_rel_1', 'range_rel_2'); -NOTICE: Altering first partition... -NOTICE: Copying data... -NOTICE: Dropping second partition... -NOTICE: Done! merge_range_partitions ------------------------ (1 row) SELECT split_range_partition('range_rel_1', '2010-02-15'::date); -NOTICE: Creating new partition... -NOTICE: Copying data to new partition... -NOTICE: Altering original partition... -NOTICE: Done! split_range_partition ------------------------- {01-01-2010,03-01-2010} (1 row) SELECT append_range_partition('range_rel'); -NOTICE: Appending new partition... -NOTICE: Done! append_range_partition ------------------------ public.range_rel_14 (1 row) SELECT prepend_range_partition('range_rel'); -NOTICE: Prepending new partition... -NOTICE: Done! prepend_range_partition ------------------------- public.range_rel_15 diff --git a/src/pathman.h b/src/pathman.h index 4c345c6f33..1964f2fa1b 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -37,7 +37,7 @@ #ifdef USE_ASSERT_CHECKING #include "utils.h" #define DebugPrintDatum(datum, typid) ( datum_to_cstring((datum), (typid)) ) -#elif +#else #define DebugPrintDatum(datum, typid) ( "[use --enable-cassert]" ) #endif From d18ff3e2abee5cdb50780773548398075cd8be21 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 11 Aug 2016 16:46:19 +0300 Subject: [PATCH 038/184] fix PackDatumToByteArray() & UnpackDatumFromByteArray(), install pathman_relcache_hook() only once, introduce fini_local_cache() --- src/init.c | 73 +++++++++++++++++++++++++++++---------------- src/init.h | 1 - src/pg_pathman.c | 3 +- src/worker.c | 77 ++++++++++++++++++++++++++++++++---------------- 4 files changed, 100 insertions(+), 54 deletions(-) diff --git a/src/init.c b/src/init.c index 7f546f5bca..f45186129f 100644 --- a/src/init.c +++ b/src/init.c @@ -44,14 +44,17 @@ /* Storage for PartRelationInfos */ -HTAB *partitioned_rels = NULL; +HTAB *partitioned_rels = NULL; /* Storage for PartParentInfos */ -HTAB *parent_cache = NULL; +HTAB *parent_cache = NULL; -bool initialization_needed = true; +bool initialization_needed = true; +static bool relcache_callback_needed = true; +static void init_local_cache(void); +static void fini_local_cache(void); static void read_pathman_config(void); static Expr *get_partition_constraint_expr(Oid partition, AttrNumber part_attno); @@ -80,12 +83,20 @@ static int oid_cmp(const void *p1, const void *p2); void load_config(void) { - /* cache PATHMAN_CONFIG relation Oid */ + /* Cache PATHMAN_CONFIG relation Oid */ pathman_config_relid = get_relname_relid(PATHMAN_CONFIG, get_pathman_schema()); - init_local_config(); /* create 'relations' hash table */ + init_local_cache(); /* create 'partitioned_rels' hash table */ read_pathman_config(); /* read PATHMAN_CONFIG table & fill cache */ + /* Register pathman_relcache_hook(), currently we can't unregister it */ + if (relcache_callback_needed) + { + CacheRegisterRelcacheCallback(pathman_relcache_hook, PointerGetDatum(NULL)); + relcache_callback_needed = false; + } + + /* Mark pg_pathman as initialized */ initialization_needed = false; elog(DEBUG2, "pg_pathman's config has been loaded successfully"); @@ -97,26 +108,12 @@ load_config(void) void unload_config(void) { - HASH_SEQ_STATUS status; - PartRelationInfo *prel; - - hash_seq_init(&status, partitioned_rels); - while((prel = (PartRelationInfo *) hash_seq_search(&status)) != NULL) - { - if (PrelIsValid(prel)) - { - FreeChildrenArray(prel); - FreeRangesArray(prel); - } - } - - /* Now we can safely destroy hash tables */ - hash_destroy(partitioned_rels); - hash_destroy(parent_cache); - partitioned_rels = NULL; - parent_cache = NULL; + fini_local_cache(); /* destroy 'partitioned_rels' hash table */ + /* Mark pg_pathman as uninitialized */ initialization_needed = true; + + elog(DEBUG2, "pg_pathman's config has been unloaded successfully"); } /* @@ -131,8 +128,8 @@ estimate_pathman_shmem_size(void) /* * Initialize per-process resources. */ -void -init_local_config(void) +static void +init_local_cache(void) { HASHCTL ctl; @@ -152,8 +149,32 @@ init_local_config(void) parent_cache = hash_create("pg_pathman's partition parents cache", PART_RELS_SIZE * CHILD_FACTOR, &ctl, HASH_ELEM | HASH_BLOBS); +} + +/* + * Safely free per-process resources. + */ +static void +fini_local_cache(void) +{ + HASH_SEQ_STATUS status; + PartRelationInfo *prel; - CacheRegisterRelcacheCallback(pathman_relcache_hook, PointerGetDatum(NULL)); + hash_seq_init(&status, partitioned_rels); + while((prel = (PartRelationInfo *) hash_seq_search(&status)) != NULL) + { + if (PrelIsValid(prel)) + { + FreeChildrenArray(prel); + FreeRangesArray(prel); + } + } + + /* Now we can safely destroy hash tables */ + hash_destroy(partitioned_rels); + hash_destroy(parent_cache); + partitioned_rels = NULL; + parent_cache = NULL; } /* diff --git a/src/init.h b/src/init.h index 9576cd0cbf..010b302a12 100644 --- a/src/init.h +++ b/src/init.h @@ -25,7 +25,6 @@ extern bool initialization_needed; Size estimate_pathman_shmem_size(void); -void init_local_config(void); void init_shmem_config(void); void load_config(void); void unload_config(void); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index c4c88fb656..1c7799ddc4 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -860,7 +860,8 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) prel->atttype); /* Convert interval from CSTRING to 'prel->atttype' */ - interval_binary = OidFunctionCall1(typein_proc, value); + interval_binary = OidFunctionCall1(typein_proc, + CStringGetDatum(interval_cstring)); interval_type = prel->atttype; } diff --git a/src/worker.c b/src/worker.c index 1d37d034e8..12351968d6 100644 --- a/src/worker.c +++ b/src/worker.c @@ -51,32 +51,51 @@ typedef struct } PartitionArgs; -#define PackDatumToByteArray(array, datum, datum_size, typbyval) \ - do { \ - memcpy((void *) (array), \ - (const void *) ((typbyval) ? \ - (Pointer) (&datum) : \ - DatumGetPointer(datum)), \ - datum_size); \ - } while (0) /* - * 'typid' is not necessary, but it is used by PrintUnpackedDatum(). + * Useful datum packing\unpacking functions for BGW. */ -#define UnpackDatumFromByteArray(array, datum, datum_size, typbyval, typid) \ - do { \ - if (typbyval) \ - memcpy((void *) &datum, (const void *) array, datum_size); \ - else \ - { \ - datum = PointerGetDatum(palloc(datum_size)); \ - memcpy((void *) DatumGetPointer(datum), \ - (const void *) array, \ - datum_size); \ - } \ - elog(LOG, "BGW: arg->value is '%s' [%u]", \ - DebugPrintDatum(datum, typid), MyProcPid); \ - } while (0) + +static void +PackDatumToByteArray(void *byte_array, Datum datum, Size datum_size, bool typbyval) +{ + if (typbyval) + /* We have to copy all Datum's bytes */ + datum_size = Max(sizeof(Datum), datum_size); + + memcpy((void *) byte_array, + (const void *) (typbyval ? + (Pointer) &datum : /* treat Datum as byte array */ + DatumGetPointer(datum)), /* extract pointer to data */ + datum_size); +} + +static void +UnpackDatumFromByteArray(Datum *datum, Size datum_size, bool typbyval, + const void *byte_array) +{ + void *dst; + + if (typbyval) + { + /* Write Data to Datum directly */ + dst = datum; + + /* We have to copy all Datum's bytes */ + datum_size = Max(sizeof(Datum), datum_size); + } + else + { + /* Allocate space for Datum's internals */ + dst = palloc(datum_size); + + /* Save pointer to Datum */ + *datum = PointerGetDatum(dst); + } + + memcpy(dst, byte_array, datum_size); +} + /* @@ -121,7 +140,7 @@ create_partitions_bg_worker_segment(Oid relid, Datum value, Oid value_type) args->value_size = datum_size; args->value_byval = typcache->typbyval; - PackDatumToByteArray(&args->value, value, + PackDatumToByteArray((void *) args->value, value, datum_size, args->value_byval); return segment; @@ -255,10 +274,16 @@ bg_worker_main(Datum main_arg) bg_worker_load_config(create_partitions_bgw); /* Upack Datum from segment to 'value' */ - UnpackDatumFromByteArray(&args->value, value, + UnpackDatumFromByteArray(&value, args->value_size, args->value_byval, - args->value_type); + (const void *) args->value); + +#ifdef USE_ASSERT_CHECKING + elog(LOG, "%s: arg->value is '%s' [%u]", + create_partitions_bgw, + DebugPrintDatum(value, args->value_type), MyProcPid); +#endif /* Create partitions */ args->result = create_partitions_internal(args->partitioned_table, From b80945e89f74faa15dcea13ff8fbd3e8b63439b1 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 11 Aug 2016 17:58:54 +0300 Subject: [PATCH 039/184] add new TODO regarding the postgres version --- src/pathman.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/pathman.h b/src/pathman.h index 1964f2fa1b..566fe1aa08 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -25,12 +25,15 @@ #include "nodes/execnodes.h" #include "optimizer/planner.h" #include "parser/parsetree.h" -#include "storage/lwlock.h" /* Check PostgreSQL version */ -#if PG_VERSION_NUM < 90500 - #error "You are trying to build pg_pathman with PostgreSQL version lower than 9.5. Please, check your environment." +/* + * TODO: a fix for WaitForBackgroundWorkerShutdown() + * has been accepted, so we have to update this number. + */ +#if PG_VERSION_NUM < 90503 + #error "Cannot build pg_pathman with PostgreSQL version lower than 9.5.3" #endif /* Print Datum as CString to server log */ From de0525297883bb4b2906ab13c8e1a22894abc6dd Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 11 Aug 2016 18:49:51 +0300 Subject: [PATCH 040/184] improve comments, better delayed_shutdown checking --- src/relation_info.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/relation_info.c b/src/relation_info.c index dce192183b..a6fd71c101 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -26,9 +26,13 @@ #include "utils/snapmgr.h" +/* + * We delay all invalidation jobs received in relcache hook. + */ static List *delayed_invalidation_parent_rels = NIL; static List *delayed_invalidation_vague_rels = NIL; -static bool delayed_shutdown = false; +static bool delayed_shutdown = false; /* pathman was dropped */ + /* Add unique Oid to list, allocate in TopMemoryContext */ #define list_add_unique(list, oid) \ @@ -291,11 +295,24 @@ finish_delayed_invalidation(void) { ListCell *lc; + /* Handle the probable 'DROP EXTENSION' case */ if (delayed_shutdown) { delayed_shutdown = false; - unload_config(); - return; + + /* Check that PATHMAN_CONFIG table has indeed been dropped */ + if (InvalidOid == get_relname_relid(PATHMAN_CONFIG, get_pathman_schema())) + { + /* Ok, let's unload pg_pathman's config */ + unload_config(); + + /* Disregard all remaining invalidation jobs */ + free_invalidation_list(delayed_invalidation_parent_rels); + free_invalidation_list(delayed_invalidation_vague_rels); + + /* No need to continue, exit */ + return; + } } /* Process relations that are (or were) definitely partitioned */ @@ -324,16 +341,19 @@ finish_delayed_invalidation(void) switch (search) { + /* It's still parent */ case PPS_ENTRY_PART_PARENT: perform_parent_refresh(parent); break; + /* It *might have been* parent before (not in PATHMAN_CONFIG) */ case PPS_ENTRY_PARENT: remove_pathman_relation_info(parent); break; + /* How come we still don't know?? */ case PPS_NOT_SURE: - elog(ERROR, "This should never happen"); + elog(ERROR, "Unknown table status, this should never happen"); break; default: @@ -482,6 +502,11 @@ try_syscache_parent_search(Oid partition, PartParentSearch *status) { parent = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent; + /* + * NB: don't forget that 'inh' flag does not immediately + * mean that this is a pg_pathman's partition. It might + * be just a casual inheriting table. + */ if (status) *status = PPS_ENTRY_PARENT; /* Check that PATHMAN_CONFIG contains this table */ From 0a1e171645c7ddb2ec518cf7546785e3d6f6f15c Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 11 Aug 2016 19:00:05 +0300 Subject: [PATCH 041/184] comments for try_perform_parent_refresh() --- src/relation_info.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/relation_info.c b/src/relation_info.c index a6fd71c101..83a3e44667 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -49,11 +49,11 @@ static bool delayed_shutdown = false; /* pathman was dropped */ } while (0) +static bool try_perform_parent_refresh(Oid parent); static Oid try_syscache_parent_search(Oid partition, PartParentSearch *status); static Oid get_parent_of_partition_internal(Oid partition, PartParentSearch *status, HASHACTION action); -static bool perform_parent_refresh(Oid parent); /* @@ -332,7 +332,7 @@ finish_delayed_invalidation(void) Oid vague_rel = lfirst_oid(lc); /* It might be a partitioned table or a partition */ - if (!perform_parent_refresh(vague_rel)) + if (!try_perform_parent_refresh(vague_rel)) { PartParentSearch search; Oid parent; @@ -343,7 +343,7 @@ finish_delayed_invalidation(void) { /* It's still parent */ case PPS_ENTRY_PART_PARENT: - perform_parent_refresh(parent); + try_perform_parent_refresh(parent); break; /* It *might have been* parent before (not in PATHMAN_CONFIG) */ @@ -527,8 +527,13 @@ try_syscache_parent_search(Oid partition, PartParentSearch *status) } } +/* + * Try to refresh cache entry for relation 'parent'. + * + * Return true on success. + */ static bool -perform_parent_refresh(Oid parent) +try_perform_parent_refresh(Oid parent) { Datum values[Natts_pathman_config]; bool isnull[Natts_pathman_config]; From 2d493986a523fc4095fba6501f862cfaab51f7e1 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 12 Aug 2016 15:00:29 +0300 Subject: [PATCH 042/184] check for delay_pathman_shutdown() every time hook is called, refactoring, more comments, fix TODO regarding the collation Oid --- src/hooks.c | 10 +++++++--- src/init.c | 9 ++++----- src/nodes_common.c | 2 +- src/pathman.h | 10 +++++----- src/relation_info.c | 36 +++++++++++++++++++++++------------- src/relation_info.h | 3 ++- 6 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 7effb629bc..8c52e1bd23 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -498,6 +498,10 @@ pathman_relcache_hook(Datum arg, Oid relid) if (!IsPathmanReady()) return; + /* Invalidation event for PATHMAN_CONFIG table (probably DROP) */ + if (relid == get_pathman_config_relid()) + delay_pathman_shutdown(); + /* Invalidate PartParentInfo cache if needed */ partitioned_table = forget_parent_of_partition(relid, &search); @@ -517,11 +521,11 @@ pathman_relcache_hook(Datum arg, Oid relid) /* Both syscache and pathman's cache say it isn't a partition */ case PPS_ENTRY_NOT_FOUND: { + /* NOTE: Remove NOT_USED when it's time */ +#ifdef NOT_USED elog(DEBUG2, "Invalidation message for relation %u [%u]", relid, MyProcPid); - - if (relid == get_pathman_config_relid()) - delay_pathman_shutdown(); +#endif } break; diff --git a/src/init.c b/src/init.c index f45186129f..22b8633903 100644 --- a/src/init.c +++ b/src/init.c @@ -269,13 +269,11 @@ fill_prel_with_partitions(const Oid *partitions, if (prel->parttype == PT_RANGE) { MemoryContext old_mcxt; - TypeCacheEntry *tce = lookup_type_cache(prel->atttype, - TYPECACHE_CMP_PROC_FINFO); /* Sort partitions by RangeEntry->min asc */ qsort_arg((void *) prel->ranges, PrelChildrenCount(prel), sizeof(RangeEntry), cmp_range_entries, - (void *) &tce->cmp_proc_finfo); + (void *) &prel->cmp_proc); /* Initialize 'prel->children' array */ for (i = 0; i < PrelChildrenCount(prel); i++) @@ -620,9 +618,10 @@ cmp_range_entries(const void *p1, const void *p2, void *arg) { const RangeEntry *v1 = (const RangeEntry *) p1; const RangeEntry *v2 = (const RangeEntry *) p2; - FmgrInfo *cmp_proc = (FmgrInfo *) arg; - return FunctionCall2(cmp_proc, v1->min, v2->min); + Oid cmp_proc_oid = *(Oid *) arg; + + return OidFunctionCall2(cmp_proc_oid, v1->min, v2->min); } /* diff --git a/src/nodes_common.c b/src/nodes_common.c index 1a5ffeb56d..da4c66fa47 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -160,7 +160,7 @@ append_part_attr_to_tlist(List *tlist, Index relno, PartRelationInfo *prel) prel->attnum, prel->atttype, prel->atttypmod, - InvalidOid, + prel->attcollid, 0); Index last_item = list_length(tlist) + 1; diff --git a/src/pathman.h b/src/pathman.h index 566fe1aa08..4fa5c765c7 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -50,11 +50,11 @@ */ #define PATHMAN_CONFIG "pathman_config" #define Natts_pathman_config 5 -#define Anum_pathman_config_id 1 -#define Anum_pathman_config_partrel 2 -#define Anum_pathman_config_attname 3 -#define Anum_pathman_config_parttype 4 -#define Anum_pathman_config_range_interval 5 +#define Anum_pathman_config_id 1 /* primary key */ +#define Anum_pathman_config_partrel 2 /* partitioned relation (regclass) */ +#define Anum_pathman_config_attname 3 /* partitioned column (text) */ +#define Anum_pathman_config_parttype 4 /* partitioning type (1|2) */ +#define Anum_pathman_config_range_interval 5 /* interval for RANGE pt. (text) */ /* type modifier (typmod) for 'range_interval' */ #define PATHMAN_CONFIG_interval_typmod -1 diff --git a/src/relation_info.c b/src/relation_info.c index 83a3e44667..537070d0f1 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -60,7 +60,7 @@ static Oid get_parent_of_partition_internal(Oid partition, * refresh\invalidate\get\remove PartRelationInfo functions. */ -/* Create or update PartRelationInfo in local cache. */ +/* Create or update PartRelationInfo in local cache. Might emit ERROR. */ PartRelationInfo * refresh_pathman_relation_info(Oid relid, PartType partitioning_type, @@ -109,16 +109,23 @@ refresh_pathman_relation_info(Oid relid, /* Initialize PartRelationInfo using syscache & typcache */ prel->attnum = get_attnum(relid, part_column_name); - prel->atttype = get_atttype(relid, prel->attnum); - prel->atttypmod = get_atttypmod(relid, prel->attnum); - /* Fetch HASH & CMP fuctions for atttype */ + /* Attribute number sanity check */ + if (prel->attnum == InvalidAttrNumber) + elog(ERROR, "Relation \"%s\" has no column \"%s\"", + get_rel_name_or_relid(relid), part_column_name); + + /* Fetch atttypid, atttypmod, and attcollation in a single cache lookup */ + get_atttypetypmodcoll(relid, prel->attnum, + &prel->atttype, &prel->atttypmod, &prel->attcollid); + + /* Fetch HASH & CMP fuctions and other stuff from type cache */ typcache = lookup_type_cache(prel->atttype, TYPECACHE_CMP_PROC | TYPECACHE_HASH_PROC); - prel->attbyval = typcache->typbyval; - prel->attlen = typcache->typlen; - prel->attalign = typcache->typalign; + prel->attbyval = typcache->typbyval; + prel->attlen = typcache->typlen; + prel->attalign = typcache->typalign; prel->cmp_proc = typcache->cmp_proc; prel->hash_proc = typcache->hash_proc; @@ -152,7 +159,7 @@ refresh_pathman_relation_info(Oid relid, return prel; } -/* Invalidate PartRelationInfo cache entry. Create new entry if 'found' is NULL */ +/* Invalidate PartRelationInfo cache entry. Create new entry if 'found' is NULL. */ void invalidate_pathman_relation_info(Oid relid, bool *found) { @@ -215,7 +222,8 @@ get_pathman_relation_info(Oid relid) /* Refresh partitioned table cache entry */ /* TODO: possible refactoring, pass found 'prel' instead of searching */ - refresh_pathman_relation_info(relid, part_type, attname); + prel = refresh_pathman_relation_info(relid, part_type, attname); + Assert(PrelIsValid(prel)); /* it MUST be valid if we got here */ } /* Else clear remaining cache entry */ else remove_pathman_relation_info(relid); @@ -400,7 +408,7 @@ forget_parent_of_partition(Oid partition, PartParentSearch *status) return get_parent_of_partition_internal(partition, status, HASH_REMOVE); } -/* Peturn partition parent's Oid */ +/* Return partition parent's Oid */ Oid get_parent_of_partition(Oid partition, PartParentSearch *status) { @@ -546,11 +554,13 @@ try_perform_parent_refresh(Oid parent) parttype = DatumGetPartType(values[Anum_pathman_config_parttype - 1]); attname = DatumGetTextP(values[Anum_pathman_config_attname - 1]); - if (!refresh_pathman_relation_info(parent, parttype, text_to_cstring(attname))) + /* If anything went wrong, return false (actually, it might throw ERROR) */ + if (!PrelIsValid(refresh_pathman_relation_info(parent, parttype, + text_to_cstring(attname)))) return false; } - else - return false; + /* Not a partitioned relation */ + else return false; return true; } diff --git a/src/relation_info.h b/src/relation_info.h index 7724121f5a..2437ba093f 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -57,7 +57,8 @@ typedef struct int32 atttypmod; /* partitioned column type modifier */ bool attbyval; /* is partitioned column stored by value? */ int16 attlen; /* length of the partitioned column's type */ - int attalign; + int attalign; /* alignment of the part column's type */ + Oid attcollid; /* collation of the partitioned column */ Oid cmp_proc, /* comparison fuction for 'atttype' */ hash_proc; /* hash function for 'atttype' */ From f83386318af73699c2334f167f3dd82d3d5fc64f Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 12 Aug 2016 16:29:44 +0300 Subject: [PATCH 043/184] several bugfixes, detect that pathman has been dropped more reliably --- src/init.c | 9 ++++++--- src/pg_pathman.c | 3 +++ src/relation_info.c | 13 ++++++++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/init.c b/src/init.c index 22b8633903..13f7300c8c 100644 --- a/src/init.c +++ b/src/init.c @@ -83,7 +83,7 @@ static int oid_cmp(const void *p1, const void *p2); void load_config(void) { - /* Cache PATHMAN_CONFIG relation Oid */ + /* Cache PATHMAN_CONFIG relation's Oid */ pathman_config_relid = get_relname_relid(PATHMAN_CONFIG, get_pathman_schema()); init_local_cache(); /* create 'partitioned_rels' hash table */ @@ -99,7 +99,7 @@ load_config(void) /* Mark pg_pathman as initialized */ initialization_needed = false; - elog(DEBUG2, "pg_pathman's config has been loaded successfully"); + elog(DEBUG2, "pg_pathman's config has been loaded successfully [%u]", MyProcPid); } /* @@ -108,12 +108,15 @@ load_config(void) void unload_config(void) { + /* Don't forget to reset cached PATHMAN_CONFIG relation's Oid */ + pathman_config_relid = InvalidOid; + fini_local_cache(); /* destroy 'partitioned_rels' hash table */ /* Mark pg_pathman as uninitialized */ initialization_needed = true; - elog(DEBUG2, "pg_pathman's config has been unloaded successfully"); + elog(DEBUG2, "pg_pathman's config has been unloaded successfully [%u]", MyProcPid); } /* diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 1c7799ddc4..a6212bb15f 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -2078,6 +2078,9 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, } } +/* + * Get cached PATHMAN_CONFIG relation Oid. + */ Oid get_pathman_config_relid(void) { diff --git a/src/relation_info.c b/src/relation_info.c index 537070d0f1..55ebe65fd9 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -137,7 +137,10 @@ refresh_pathman_relation_info(Oid relid, /* If there's no children at all, remove this entry */ if (prel_children_count == 0) + { remove_pathman_relation_info(relid); + return NULL; + } /* * Fill 'prel' with partition info, raise ERROR if anything is wrong. @@ -306,10 +309,18 @@ finish_delayed_invalidation(void) /* Handle the probable 'DROP EXTENSION' case */ if (delayed_shutdown) { + Oid cur_pathman_config_relid; + + /* Unset 'shutdown' flag */ delayed_shutdown = false; + /* Get current PATHMAN_CONFIG relid */ + cur_pathman_config_relid = get_relname_relid(PATHMAN_CONFIG, + get_pathman_schema()); + /* Check that PATHMAN_CONFIG table has indeed been dropped */ - if (InvalidOid == get_relname_relid(PATHMAN_CONFIG, get_pathman_schema())) + if (cur_pathman_config_relid == InvalidOid || + cur_pathman_config_relid != get_pathman_config_relid()) { /* Ok, let's unload pg_pathman's config */ unload_config(); From f6fe29c1ea9dc34235bdd5685f85022ec6ed820b Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 12 Aug 2016 16:48:14 +0300 Subject: [PATCH 044/184] fix isolation tests (rollback_on_create_partitions) --- expected/rollback_on_create_partitions.out | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/expected/rollback_on_create_partitions.out b/expected/rollback_on_create_partitions.out index 8d314634fc..3531107db8 100644 --- a/expected/rollback_on_create_partitions.out +++ b/expected/rollback_on_create_partitions.out @@ -21,7 +21,6 @@ Append -> Seq Scan on range_rel_8 -> Seq Scan on range_rel_9 -> Seq Scan on range_rel_10 -WARNING: Partitioning of table 'range_rel' has been aborted, removing partitions from pg_pathman's cache step rollback: ROLLBACK; step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; QUERY PLAN @@ -83,8 +82,6 @@ QUERY PLAN Seq Scan on range_rel step savepoint_c: SAVEPOINT c; -WARNING: All changes in partitioned table 'range_rel' will be discarded -WARNING: Partitioning of table 'range_rel' has been aborted, removing partitions from pg_pathman's cache step rollback: ROLLBACK; step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; QUERY PLAN @@ -129,7 +126,6 @@ drop_partitions 10 step savepoint_c: SAVEPOINT c; -WARNING: All changes in partitioned table 'range_rel' will be discarded step rollback_b: ROLLBACK TO SAVEPOINT b; step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; QUERY PLAN @@ -145,7 +141,6 @@ Append -> Seq Scan on range_rel_8 -> Seq Scan on range_rel_9 -> Seq Scan on range_rel_10 -WARNING: Partitioning of table 'range_rel' has been aborted, removing partitions from pg_pathman's cache step rollback: ROLLBACK; step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; QUERY PLAN @@ -166,7 +161,6 @@ drop_partitions 10 step savepoint_c: SAVEPOINT c; -WARNING: All changes in partitioned table 'range_rel' will be discarded step rollback_b: ROLLBACK TO SAVEPOINT b; step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; QUERY PLAN @@ -216,8 +210,6 @@ QUERY PLAN Seq Scan on range_rel step savepoint_c: SAVEPOINT c; -WARNING: All changes in partitioned table 'range_rel' will be discarded -WARNING: Partitioning of table 'range_rel' has been aborted, removing partitions from pg_pathman's cache step rollback_a: ROLLBACK TO SAVEPOINT a; step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; QUERY PLAN @@ -247,8 +239,6 @@ QUERY PLAN Seq Scan on range_rel step savepoint_c: SAVEPOINT c; -WARNING: All changes in partitioned table 'range_rel' will be discarded -WARNING: Partitioning of table 'range_rel' has been aborted, removing partitions from pg_pathman's cache step rollback_a: ROLLBACK TO SAVEPOINT a; step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; QUERY PLAN @@ -278,7 +268,6 @@ QUERY PLAN Seq Scan on range_rel step savepoint_c: SAVEPOINT c; -WARNING: All changes in partitioned table 'range_rel' will be discarded step rollback_b: ROLLBACK TO SAVEPOINT b; step drop_partitions: SELECT drop_partitions('range_rel'); drop_partitions @@ -288,8 +277,6 @@ step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; QUERY PLAN Seq Scan on range_rel -WARNING: All changes in partitioned table 'range_rel' will be discarded -WARNING: Partitioning of table 'range_rel' has been aborted, removing partitions from pg_pathman's cache step rollback: ROLLBACK; step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; QUERY PLAN @@ -314,7 +301,6 @@ QUERY PLAN Seq Scan on range_rel step savepoint_c: SAVEPOINT c; -WARNING: All changes in partitioned table 'range_rel' will be discarded step rollback_b: ROLLBACK TO SAVEPOINT b; step drop_partitions: SELECT drop_partitions('range_rel'); drop_partitions @@ -343,8 +329,6 @@ step drop_partitions: SELECT drop_partitions('range_rel'); drop_partitions 10 -WARNING: All changes in partitioned table 'range_rel' will be discarded -WARNING: Partitioning of table 'range_rel' has been aborted, removing partitions from pg_pathman's cache step rollback_a: ROLLBACK TO SAVEPOINT a; step create_partitions: SELECT create_range_partitions('range_rel', 'id', 1, 1000); create_range_partitions @@ -364,7 +348,6 @@ Append -> Seq Scan on range_rel_8 -> Seq Scan on range_rel_9 -> Seq Scan on range_rel_10 -WARNING: Partitioning of table 'range_rel' has been aborted, removing partitions from pg_pathman's cache step rollback: ROLLBACK; step show_rel: EXPLAIN (COSTS OFF) SELECT * FROM range_rel; QUERY PLAN @@ -384,8 +367,6 @@ step drop_partitions: SELECT drop_partitions('range_rel'); drop_partitions 10 -WARNING: All changes in partitioned table 'range_rel' will be discarded -WARNING: Partitioning of table 'range_rel' has been aborted, removing partitions from pg_pathman's cache step rollback_a: ROLLBACK TO SAVEPOINT a; step create_partitions: SELECT create_range_partitions('range_rel', 'id', 1, 1000); create_range_partitions From 1a0f23ef30b2f4e5b211dd58d2b347d21969c42d Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 17 Aug 2016 11:59:11 +0300 Subject: [PATCH 045/184] plpgsql code cleanup, introduce parent_of_partition() function, remove acquire_partitions_lock() & release_partitions_lock(), fix spawn_partitions() --- expected/pg_pathman.out | 12 - hash.sql | 130 ++++---- init.sql | 159 +++++----- range.sql | 662 +++++++++++++++++++--------------------- src/nodes_common.c | 1 - src/pg_pathman.c | 64 ++-- src/pl_funcs.c | 53 ++-- 7 files changed, 543 insertions(+), 538 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index aa23e4ec1c..0c4b889324 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -58,7 +58,6 @@ SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DAT ERROR: Not enough partitions to fit all the values of 'dt' SELECT pathman.create_range_partitions('test.range_rel', 'DT', '2015-01-01'::DATE, '1 month'::INTERVAL); NOTICE: sequence "range_rel_seq" does not exist, skipping -NOTICE: Copying data to partitions... create_range_partitions ------------------------- 4 @@ -81,7 +80,6 @@ CREATE TABLE test.num_range_rel ( txt TEXT); SELECT pathman.create_range_partitions('test.num_range_rel', 'id', 0, 1000, 4); NOTICE: sequence "num_range_rel_seq" does not exist, skipping -NOTICE: Copying data to partitions... create_range_partitions ------------------------- 4 @@ -1103,7 +1101,6 @@ CREATE TABLE test.range_rel ( id SERIAL PRIMARY KEY, dt TIMESTAMP NOT NULL); SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DATE, '10 days'::INTERVAL, 1); -NOTICE: Copying data to partitions... create_range_partitions ------------------------- 1 @@ -1153,7 +1150,6 @@ CREATE TABLE test.num_range_rel ( id SERIAL PRIMARY KEY, txt TEXT); SELECT pathman.create_range_partitions('test.num_range_rel', 'id', 1000, 1000, 4); -NOTICE: Copying data to partitions... create_range_partitions ------------------------- 4 @@ -1279,7 +1275,6 @@ INSERT INTO test."RangeRel" (dt, txt) SELECT g, md5(g::TEXT) FROM generate_series('2015-01-01', '2015-01-03', '1 day'::interval) as g; SELECT pathman.create_range_partitions('test."RangeRel"', 'dt', '2015-01-01'::DATE, '1 day'::INTERVAL); NOTICE: sequence "RangeRel_seq" does not exist, skipping -NOTICE: Copying data to partitions... create_range_partitions ------------------------- 3 @@ -1322,7 +1317,6 @@ NOTICE: 0 rows copied from test."RangeRel_1" (1 row) SELECT pathman.create_partitions_from_range('test."RangeRel"', 'dt', '2015-01-01'::DATE, '2015-01-05'::DATE, '1 day'::INTERVAL); -NOTICE: Copying data to partitions... create_partitions_from_range ------------------------------ 5 @@ -1341,7 +1335,6 @@ CREATE TABLE test."RangeRel" ( dt TIMESTAMP NOT NULL, txt TEXT); SELECT pathman.create_range_partitions('test."RangeRel"', 'id', 1, 100, 3); -NOTICE: Copying data to partitions... create_range_partitions ------------------------- 3 @@ -1358,7 +1351,6 @@ NOTICE: 0 rows copied from test."RangeRel_1" (1 row) SELECT pathman.create_partitions_from_range('test."RangeRel"', 'id', 1, 300, 100); -NOTICE: Copying data to partitions... create_partitions_from_range ------------------------------ 3 @@ -1400,7 +1392,6 @@ CREATE TABLE range_rel ( INSERT INTO range_rel (dt, value) SELECT g, extract(day from g) FROM generate_series('2010-01-01'::date, '2010-12-31'::date, '1 day') as g; SELECT create_range_partitions('range_rel', 'dt', '2010-01-01'::date, '1 month'::interval, 12); NOTICE: sequence "range_rel_seq" does not exist, skipping -NOTICE: Copying data to partitions... create_range_partitions ------------------------- 12 @@ -1530,7 +1521,6 @@ NOTICE: 44 rows copied from range_rel_1 (1 row) SELECT create_partitions_from_range('range_rel', 'id', 1, 1000, 100); -NOTICE: Copying data to partitions... create_partitions_from_range ------------------------------ 10 @@ -1544,7 +1534,6 @@ NOTICE: function public.range_rel_update_trigger_func() does not exist, skippin (1 row) SELECT create_partitions_from_range('range_rel', 'dt', '2015-01-01'::date, '2015-12-01'::date, '1 month'::interval); -NOTICE: Copying data to partitions... create_partitions_from_range ------------------------------ 12 @@ -1568,7 +1557,6 @@ ERROR: Relation 'messages' is referenced from other relations ALTER TABLE replies DROP CONSTRAINT replies_message_id_fkey; SELECT create_range_partitions('messages', 'id', 1, 100, 2); NOTICE: sequence "messages_seq" does not exist, skipping -NOTICE: Copying data to partitions... create_range_partitions ------------------------- 2 diff --git a/hash.sql b/hash.sql index ed3883c3de..fece5499d5 100644 --- a/hash.sql +++ b/hash.sql @@ -12,59 +12,60 @@ * Creates hash partitions for specified relation */ CREATE OR REPLACE FUNCTION @extschema@.create_hash_partitions( - relation REGCLASS - , attribute TEXT - , partitions_count INTEGER + parent_relid REGCLASS, + attribute TEXT, + partitions_count INTEGER ) RETURNS INTEGER AS $$ DECLARE - v_relname TEXT; - v_child_relname TEXT; - v_type TEXT; - v_plain_schema TEXT; - v_plain_relname TEXT; - v_hashfunc TEXT; + v_child_relname TEXT; + v_type TEXT; + v_plain_schema TEXT; + v_plain_relname TEXT; + v_hashfunc TEXT; + BEGIN - v_relname := @extschema@.validate_relname(relation); + PERFORM @extschema@.validate_relname(parent_relid); attribute := lower(attribute); - PERFORM @extschema@.common_relation_checks(relation, attribute); + PERFORM @extschema@.common_relation_checks(parent_relid, attribute); - v_type := @extschema@.get_attribute_type_name(v_relname, attribute); + v_type := @extschema@.get_attribute_type_name(parent_relid, attribute); SELECT * INTO v_plain_schema, v_plain_relname - FROM @extschema@.get_plain_schema_and_relname(relation); + FROM @extschema@.get_plain_schema_and_relname(parent_relid); - v_hashfunc := @extschema@.get_type_hash_func(v_type::regtype::oid)::regproc; + v_hashfunc := @extschema@.get_type_hash_func(v_type::regtype)::regproc; /* Insert new entry to pathman config */ INSERT INTO @extschema@.pathman_config (partrel, attname, parttype) - VALUES (relation, attribute, 1); + VALUES (parent_relid, attribute, 1); /* Create partitions and update pg_pathman configuration */ FOR partnum IN 0..partitions_count-1 LOOP v_child_relname := format('%s.%s', - v_plain_schema, + quote_ident(v_plain_schema), quote_ident(v_plain_relname || '_' || partnum)); - EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)' - , v_child_relname - , v_relname); - - EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (@extschema@.get_hash(%s(%s), %s) = %s)' - , v_child_relname - , @extschema@.build_check_constraint_name(v_child_relname::regclass, attribute) - , v_hashfunc - , attribute - , partitions_count - , partnum); + EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', + v_child_relname, + parent_relid::text); + + EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (@extschema@.get_hash(%s(%s), %s) = %s)', + v_child_relname, + @extschema@.build_check_constraint_name(v_child_relname::regclass, + attribute), + v_hashfunc, + attribute, + partitions_count, + partnum); END LOOP; /* Notify backend about changes */ - PERFORM @extschema@.on_create_partitions(relation::oid); + PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - PERFORM @extschema@.partition_data(relation); + PERFORM @extschema@.partition_data(parent_relid); RETURN partitions_count; END @@ -75,7 +76,7 @@ SET client_min_messages = WARNING; * Creates an update trigger */ CREATE OR REPLACE FUNCTION @extschema@.create_hash_update_trigger( - IN relation REGCLASS) + parent_relid REGCLASS) RETURNS VOID AS $$ DECLARE @@ -105,52 +106,53 @@ DECLARE END $body$ LANGUAGE plpgsql'; - trigger TEXT := 'CREATE TRIGGER %s - BEFORE UPDATE ON %s - FOR EACH ROW EXECUTE PROCEDURE %s()'; - - att_names TEXT; - old_fields TEXT; - new_fields TEXT; - att_val_fmt TEXT; - att_fmt TEXT; - relid INTEGER; - partitions_count INTEGER; - attr TEXT; - plain_schema TEXT; - plain_relname TEXT; - funcname TEXT; - triggername TEXT; - child_relname_format TEXT; - atttype TEXT; - hashfunc TEXT; + trigger TEXT := 'CREATE TRIGGER %s + BEFORE UPDATE ON %s + FOR EACH ROW EXECUTE PROCEDURE %s()'; + + att_names TEXT; + old_fields TEXT; + new_fields TEXT; + att_val_fmt TEXT; + att_fmt TEXT; + attr TEXT; + plain_schema TEXT; + plain_relname TEXT; + funcname TEXT; + triggername TEXT; + child_relname_format TEXT; + atttype TEXT; + hashfunc TEXT; + partitions_count INTEGER; BEGIN SELECT * INTO plain_schema, plain_relname - FROM @extschema@.get_plain_schema_and_relname(relation); + FROM @extschema@.get_plain_schema_and_relname(parent_relid); - relid := relation::oid; SELECT string_agg(attname, ', '), string_agg('OLD.' || attname, ', '), string_agg('NEW.' || attname, ', '), - string_agg('CASE WHEN NOT $' || attnum || ' IS NULL THEN ' || attname || ' = $' || attnum || - ' ELSE ' || attname || ' IS NULL END', ' AND '), + string_agg('CASE WHEN NOT $' || attnum || ' IS NULL THEN ' || + attname || ' = $' || attnum || ' ' || + 'ELSE ' || + attname || ' IS NULL END', + ' AND '), string_agg('$' || attnum, ', ') FROM pg_attribute - WHERE attrelid=relid AND attnum>0 + WHERE attrelid = parent_relid AND attnum > 0 INTO att_names, old_fields, new_fields, att_val_fmt, att_fmt; - attr := attname FROM @extschema@.pathman_config WHERE partrel = relation; + attr := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; IF attr IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(relation::TEXT); + RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); END IF; - partitions_count := COUNT(*) FROM pg_inherits WHERE inhparent = relation::oid; + partitions_count := COUNT(*) FROM pg_inherits WHERE inhparent = parent_relid::oid; /* Function name, trigger name and child relname template */ funcname := plain_schema || '.' || quote_ident(format('%s_update_trigger_func', plain_relname)); @@ -158,8 +160,8 @@ BEGIN triggername := quote_ident(format('%s_%s_update_trigger', plain_schema, plain_relname)); /* base hash function for type */ - atttype := @extschema@.get_attribute_type_name(relation, attr); - hashfunc := @extschema@.get_type_hash_func(atttype::regtype::oid)::regproc; + atttype := @extschema@.get_attribute_type_name(parent_relid, attr); + hashfunc := @extschema@.get_type_hash_func(atttype::regtype)::regproc; /* Format function definition and execute it */ func := format(func, funcname, attr, partitions_count, att_val_fmt, @@ -169,10 +171,10 @@ BEGIN /* Create triggers on child relations */ FOR num IN 0..partitions_count-1 LOOP - EXECUTE format(trigger - , triggername - , format(child_relname_format, num) - , funcname); + EXECUTE format(trigger, + triggername, + format(child_relname_format, num), + funcname); END LOOP; END $$ LANGUAGE plpgsql; @@ -180,7 +182,7 @@ $$ LANGUAGE plpgsql; /* * Returns hash function OID for specified type */ -CREATE OR REPLACE FUNCTION @extschema@.get_type_hash_func(OID) +CREATE OR REPLACE FUNCTION @extschema@.get_type_hash_func(REGTYPE) RETURNS OID AS 'pg_pathman', 'get_type_hash_func' LANGUAGE C STRICT; diff --git a/init.sql b/init.sql index 809fe20c74..fe7496ca86 100644 --- a/init.sql +++ b/init.sql @@ -35,28 +35,27 @@ SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config', ''); * Copy rows to partitions */ CREATE OR REPLACE FUNCTION @extschema@.partition_data( - p_parent regclass - , p_invalidate_cache_on_error BOOLEAN DEFAULT FALSE - , OUT p_total BIGINT) + p_parent REGCLASS, + p_invalidate_cache_on_error BOOLEAN DEFAULT FALSE, + OUT p_total BIGINT) AS $$ DECLARE - relname TEXT; - rec RECORD; - cnt BIGINT := 0; + relname TEXT; + rec RECORD; + cnt BIGINT := 0; + BEGIN relname := @extschema@.validate_relname(p_parent); p_total := 0; /* Create partitions and copy rest of the data */ - RAISE NOTICE 'Copying data to partitions...'; - EXECUTE format(' - WITH part_data AS ( - DELETE FROM ONLY %s RETURNING *) - INSERT INTO %s SELECT * FROM part_data' - , relname - , relname); + EXECUTE format('WITH part_data AS (DELETE FROM ONLY %1$s RETURNING *) + INSERT INTO %1$s SELECT * FROM part_data', + relname); + + /* Get number of inserted rows */ GET DIAGNOSTICS p_total = ROW_COUNT; RETURN; END @@ -66,7 +65,8 @@ LANGUAGE plpgsql; /* * Disable pathman partitioning for specified relation */ -CREATE OR REPLACE FUNCTION @extschema@.disable_partitioning(relation regclass) +CREATE OR REPLACE FUNCTION @extschema@.disable_partitioning( + relation REGCLASS) RETURNS VOID AS $$ BEGIN @@ -85,15 +85,17 @@ LANGUAGE plpgsql; * Aggregates several common relation checks before partitioning. Suitable for every partitioning type. */ CREATE OR REPLACE FUNCTION @extschema@.common_relation_checks( - p_relation REGCLASS - , p_attribute TEXT) + p_relation REGCLASS, + p_attribute TEXT) RETURNS BOOLEAN AS $$ DECLARE - v_rec RECORD; - is_referenced BOOLEAN; + v_rec RECORD; + is_referenced BOOLEAN; + BEGIN - IF EXISTS (SELECT * FROM @extschema@.pathman_config WHERE partrel = p_relation) THEN + IF EXISTS (SELECT * FROM @extschema@.pathman_config + WHERE partrel = p_relation) THEN RAISE EXCEPTION 'Relation "%" has already been partitioned', p_relation; END IF; @@ -106,7 +108,8 @@ BEGIN FROM pg_constraint WHERE confrelid = p_relation::regclass::oid) LOOP is_referenced := TRUE; - RAISE WARNING 'Foreign key ''%'' references to the relation ''%''', v_rec.conname, p_relation; + RAISE WARNING 'Foreign key ''%'' references to the relation ''%''', + v_rec.conname, p_relation; END LOOP; IF is_referenced THEN @@ -121,11 +124,15 @@ LANGUAGE plpgsql; /* * Returns relname without quotes or something */ -CREATE OR REPLACE FUNCTION @extschema@.get_plain_schema_and_relname(cls regclass, OUT schema TEXT, OUT relname TEXT) +CREATE OR REPLACE FUNCTION @extschema@.get_plain_schema_and_relname( + cls REGCLASS, + OUT schema TEXT, + OUT relname TEXT) AS $$ BEGIN - SELECT relnamespace::regnamespace, pg_class.relname FROM pg_class WHERE oid = cls::oid + SELECT relnamespace::regnamespace, pg_class.relname + FROM pg_class WHERE oid = cls::oid INTO schema, relname; END $$ @@ -134,7 +141,8 @@ LANGUAGE plpgsql; /* * Validates relation name. It must be schema qualified */ -CREATE OR REPLACE FUNCTION @extschema@.validate_relname(cls regclass) +CREATE OR REPLACE FUNCTION @extschema@.validate_relname( + cls REGCLASS) RETURNS TEXT AS $$ BEGIN @@ -147,9 +155,9 @@ LANGUAGE plpgsql; * Returns schema-qualified name for table */ CREATE OR REPLACE FUNCTION @extschema@.get_schema_qualified_name( - cls REGCLASS - , delimiter TEXT DEFAULT '_' - , suffix TEXT DEFAULT '') + cls REGCLASS, + delimiter TEXT DEFAULT '_', + suffix TEXT DEFAULT '') RETURNS TEXT AS $$ BEGIN @@ -165,11 +173,13 @@ LANGUAGE plpgsql; /* * Check if two relations have equal structures */ -CREATE OR REPLACE FUNCTION @extschema@.validate_relations_equality(relation1 OID, relation2 OID) +CREATE OR REPLACE FUNCTION @extschema@.validate_relations_equality( + relation1 OID, relation2 OID) RETURNS BOOLEAN AS $$ DECLARE - rec RECORD; + rec RECORD; + BEGIN FOR rec IN ( WITH @@ -181,11 +191,11 @@ BEGIN ) LOOP IF rec.name1 IS NULL OR rec.name2 IS NULL OR rec.name1 != rec.name2 THEN - RETURN False; + RETURN false; END IF; END LOOP; - RETURN True; + RETURN true; END $$ LANGUAGE plpgsql; @@ -197,8 +207,8 @@ CREATE OR REPLACE FUNCTION @extschema@.pathman_ddl_trigger_func() RETURNS event_trigger AS $$ DECLARE - obj record; - pg_class_oid oid; + obj record; + pg_class_oid oid; BEGIN pg_class_oid = 'pg_class'::regclass; @@ -220,13 +230,15 @@ LANGUAGE plpgsql; /* * Drop trigger */ -CREATE OR REPLACE FUNCTION @extschema@.drop_triggers(IN relation REGCLASS) +CREATE OR REPLACE FUNCTION @extschema@.drop_triggers( + relation REGCLASS) RETURNS VOID AS $$ DECLARE relname TEXT; schema TEXT; funcname TEXT; + BEGIN SELECT * INTO schema, relname FROM @extschema@.get_plain_schema_and_relname(relation); @@ -241,48 +253,52 @@ $$ LANGUAGE plpgsql; * If delete_data set to TRUE then partitions will be dropped with all the data */ CREATE OR REPLACE FUNCTION @extschema@.drop_partitions( - relation REGCLASS - , delete_data BOOLEAN DEFAULT FALSE) + parent_relid REGCLASS, + delete_data BOOLEAN DEFAULT FALSE) RETURNS INTEGER AS $$ DECLARE - v_rec RECORD; - v_rows INTEGER; - v_part_count INTEGER := 0; - v_relname TEXT; - conf_num_del INTEGER; + v_rec RECORD; + v_rows INTEGER; + v_part_count INTEGER := 0; + v_relname TEXT; + conf_num_del INTEGER; + BEGIN - v_relname := @extschema@.validate_relname(relation); + v_relname := @extschema@.validate_relname(parent_relid); /* Drop trigger first */ - PERFORM @extschema@.drop_triggers(relation); + PERFORM @extschema@.drop_triggers(parent_relid); WITH config_num_deleted AS (DELETE FROM @extschema@.pathman_config - WHERE partrel = relation + WHERE partrel = parent_relid RETURNING *) SELECT count(*) from config_num_deleted INTO conf_num_del; IF conf_num_del = 0 THEN - RAISE EXCEPTION 'table % has no partitions', relation::text; + RAISE EXCEPTION 'table % has no partitions', parent_relid::text; END IF; FOR v_rec IN (SELECT inhrelid::regclass::text AS tbl - FROM pg_inherits WHERE inhparent::regclass = relation) + FROM pg_inherits WHERE inhparent::regclass = parent_relid) LOOP IF NOT delete_data THEN EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) - INSERT INTO %s SELECT * FROM part_data' - , v_rec.tbl - , relation::text); + INSERT INTO %s SELECT * FROM part_data', + v_rec.tbl, + parent_relid::text); GET DIAGNOSTICS v_rows = ROW_COUNT; + + /* Show number of copied rows */ RAISE NOTICE '% rows copied from %', v_rows, v_rec.tbl; END IF; + EXECUTE format('DROP TABLE %s', v_rec.tbl); v_part_count := v_part_count + 1; END LOOP; /* Notify backend about changes */ - PERFORM @extschema@.on_remove_partitions(relation::oid); + PERFORM @extschema@.on_remove_partitions(parent_relid); RETURN v_part_count; END @@ -296,37 +312,27 @@ ON sql_drop EXECUTE PROCEDURE @extschema@.pathman_ddl_trigger_func(); -/* - * Acquire partitions lock to prevent concurrent partitions creation - */ -CREATE OR REPLACE FUNCTION @extschema@.acquire_partitions_lock() -RETURNS VOID AS 'pg_pathman', 'acquire_partitions_lock' -LANGUAGE C STRICT; - -/* - * Release partitions lock - */ -CREATE OR REPLACE FUNCTION @extschema@.release_partitions_lock() -RETURNS VOID AS 'pg_pathman', 'release_partitions_lock' -LANGUAGE C STRICT; - /* * Check if regclass is date or timestamp */ -CREATE OR REPLACE FUNCTION @extschema@.is_date_type(cls REGTYPE) +CREATE OR REPLACE FUNCTION @extschema@.is_date_type( + typid REGTYPE) RETURNS BOOLEAN AS 'pg_pathman', 'is_date_type' LANGUAGE C STRICT; -CREATE OR REPLACE FUNCTION @extschema@.on_create_partitions(relid OID) +CREATE OR REPLACE FUNCTION @extschema@.on_create_partitions( + relid REGCLASS) RETURNS VOID AS 'pg_pathman', 'on_partitions_created' LANGUAGE C STRICT; -CREATE OR REPLACE FUNCTION @extschema@.on_update_partitions(relid OID) +CREATE OR REPLACE FUNCTION @extschema@.on_update_partitions( + relid REGCLASS) RETURNS VOID AS 'pg_pathman', 'on_partitions_updated' LANGUAGE C STRICT; -CREATE OR REPLACE FUNCTION @extschema@.on_remove_partitions(relid OID) +CREATE OR REPLACE FUNCTION @extschema@.on_remove_partitions( + relid REGCLASS) RETURNS VOID AS 'pg_pathman', 'on_partitions_removed' LANGUAGE C STRICT; @@ -334,25 +340,29 @@ LANGUAGE C STRICT; /* * Checks if attribute is nullable */ -CREATE OR REPLACE FUNCTION @extschema@.is_attribute_nullable(REGCLASS, TEXT) +CREATE OR REPLACE FUNCTION @extschema@.is_attribute_nullable( + REGCLASS, TEXT) RETURNS BOOLEAN AS 'pg_pathman', 'is_attribute_nullable' LANGUAGE C STRICT; /* * Returns attribute type name for relation */ -CREATE OR REPLACE FUNCTION @extschema@.get_attribute_type_name(REGCLASS, TEXT) +CREATE OR REPLACE FUNCTION @extschema@.get_attribute_type_name( + REGCLASS, TEXT) RETURNS TEXT AS 'pg_pathman', 'get_attribute_type_name' LANGUAGE C STRICT; /* * Build check constraint name for a specified relation's column */ -CREATE OR REPLACE FUNCTION @extschema@.build_check_constraint_name(REGCLASS, INT2) +CREATE OR REPLACE FUNCTION @extschema@.build_check_constraint_name( + REGCLASS, INT2) RETURNS TEXT AS 'pg_pathman', 'build_check_constraint_name_attnum' LANGUAGE C STRICT; -CREATE OR REPLACE FUNCTION @extschema@.build_check_constraint_name(REGCLASS, TEXT) +CREATE OR REPLACE FUNCTION @extschema@.build_check_constraint_name( + REGCLASS, TEXT) RETURNS TEXT AS 'pg_pathman', 'build_check_constraint_name_attname' LANGUAGE C STRICT; @@ -362,3 +372,10 @@ LANGUAGE C STRICT; CREATE OR REPLACE FUNCTION @extschema@.debug_capture() RETURNS VOID AS 'pg_pathman', 'debug_capture' LANGUAGE C STRICT; + +/* + * Get parent of pg_pathman's partition. + */ +CREATE OR REPLACE FUNCTION @extschema@.parent_of_partition(REGCLASS) +RETURNS REGCLASS AS 'pg_pathman', 'parent_of_partition' +LANGUAGE C STRICT; diff --git a/range.sql b/range.sql index 44a4e5f4ce..203205231a 100644 --- a/range.sql +++ b/range.sql @@ -9,8 +9,8 @@ */ CREATE OR REPLACE FUNCTION @extschema@.get_sequence_name( - plain_schema TEXT, - plain_relname TEXT) + plain_schema TEXT, + plain_relname TEXT) RETURNS TEXT AS $$ BEGIN @@ -20,9 +20,9 @@ $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION @extschema@.create_or_replace_sequence( - plain_schema TEXT, - plain_relname TEXT, - OUT seq_name TEXT) + plain_schema TEXT, + plain_relname TEXT, + OUT seq_name TEXT) AS $$ BEGIN seq_name := @extschema@.get_sequence_name(plain_schema, plain_relname); @@ -36,15 +36,14 @@ LANGUAGE plpgsql; * Creates RANGE partitions for specified relation based on datetime attribute */ CREATE OR REPLACE FUNCTION @extschema@.create_range_partitions( - p_relation REGCLASS - , p_attribute TEXT - , p_start_value ANYELEMENT - , p_interval INTERVAL - , p_count INTEGER DEFAULT NULL) + parent_relid REGCLASS, + p_attribute TEXT, + p_start_value ANYELEMENT, + p_interval INTERVAL, + p_count INTEGER DEFAULT NULL) RETURNS INTEGER AS $$ DECLARE - v_relname TEXT; v_rows_count INTEGER; v_max p_start_value%TYPE; v_cur_value p_start_value%TYPE := p_start_value; @@ -53,14 +52,13 @@ DECLARE i INTEGER; BEGIN - v_relname := @extschema@.validate_relname(p_relation); + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); - PERFORM @extschema@.common_relation_checks(v_relname, p_attribute); + PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); /* Try to determine partitions count if not set */ IF p_count IS NULL THEN - EXECUTE format('SELECT count(*), max(%s) FROM %s' - , p_attribute, p_relation) + EXECUTE format('SELECT count(*), max(%s) FROM %s', p_attribute, parent_relid) INTO v_rows_count, v_max; IF v_rows_count = 0 THEN @@ -76,35 +74,38 @@ BEGIN END IF; /* Check boundaries */ - EXECUTE format('SELECT @extschema@.check_boundaries(''%s'', ''%s'', ''%s'', ''%s''::%s)' - , v_relname - , p_attribute - , p_start_value - , p_start_value + p_interval*p_count - , pg_typeof(p_start_value)); + EXECUTE format('SELECT @extschema@.check_boundaries(''%s'', ''%s'', ''%s'', ''%s''::%s)', + parent_relid, + p_attribute, + p_start_value, + p_start_value + p_interval * p_count, + pg_typeof(p_start_value)); + + SELECT * INTO v_plain_schema, v_plain_relname + FROM @extschema@.get_plain_schema_and_relname(parent_relid); /* Create sequence for child partitions names */ - SELECT * INTO v_plain_schema, v_plain_relname FROM @extschema@.get_plain_schema_and_relname(p_relation); PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); /* Insert new entry to pathman config */ INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) - VALUES (p_relation, p_attribute, 2, p_interval::text); + VALUES (parent_relid, p_attribute, 2, p_interval::text); /* create first partition */ FOR i IN 1..p_count LOOP - EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s);', pg_typeof(p_start_value)) - USING v_relname, p_start_value, p_start_value + p_interval; + EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s)', + pg_typeof(p_start_value)) + USING parent_relid, p_start_value, p_start_value + p_interval; p_start_value := p_start_value + p_interval; END LOOP; /* Notify backend about changes */ - PERFORM @extschema@.on_create_partitions(p_relation::oid); + PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - PERFORM @extschema@.partition_data(p_relation); + PERFORM @extschema@.partition_data(parent_relid); RETURN p_count; @@ -117,11 +118,11 @@ $$ LANGUAGE plpgsql; * Creates RANGE partitions for specified relation based on numerical attribute */ CREATE OR REPLACE FUNCTION @extschema@.create_range_partitions( - p_relation REGCLASS - , p_attribute TEXT - , p_start_value ANYELEMENT - , p_interval ANYELEMENT - , p_count INTEGER DEFAULT NULL) + parent_relid REGCLASS, + p_attribute TEXT, + p_start_value ANYELEMENT, + p_interval ANYELEMENT, + p_count INTEGER DEFAULT NULL) RETURNS INTEGER AS $$ DECLARE @@ -133,9 +134,9 @@ DECLARE i INTEGER; BEGIN - PERFORM @extschema@.validate_relname(p_relation); + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); - PERFORM @extschema@.common_relation_checks(p_relation, p_attribute); + PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); IF p_count <= 0 THEN RAISE EXCEPTION 'Partitions count must be greater than zero'; @@ -143,8 +144,7 @@ BEGIN /* Try to determine partitions count if not set */ IF p_count IS NULL THEN - EXECUTE format('SELECT count(*), max(%s) FROM %s' - , p_attribute, p_relation) + EXECUTE format('SELECT count(*), max(%s) FROM %s', p_attribute, parent_relid) INTO v_rows_count, v_max; IF v_rows_count = 0 THEN @@ -164,33 +164,35 @@ BEGIN END IF; /* check boundaries */ - PERFORM @extschema@.check_boundaries(p_relation - , p_attribute - , p_start_value - , p_start_value + p_interval*p_count); + PERFORM @extschema@.check_boundaries(parent_relid, + p_attribute, + p_start_value, + p_start_value + p_interval * p_count); + + SELECT * INTO v_plain_schema, v_plain_relname + FROM @extschema@.get_plain_schema_and_relname(parent_relid); /* Create sequence for child partitions names */ - SELECT * INTO v_plain_schema, v_plain_relname FROM @extschema@.get_plain_schema_and_relname(p_relation); PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); /* Insert new entry to pathman config */ INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) - VALUES (p_relation, p_attribute, 2, p_interval::text); + VALUES (parent_relid, p_attribute, 2, p_interval::text); /* create first partition */ FOR i IN 1..p_count LOOP - PERFORM @extschema@.create_single_range_partition(p_relation - , p_start_value - , p_start_value + p_interval); + PERFORM @extschema@.create_single_range_partition(parent_relid, + p_start_value, + p_start_value + p_interval); p_start_value := p_start_value + p_interval; END LOOP; /* Notify backend about changes */ - PERFORM @extschema@.on_create_partitions(p_relation::regclass::oid); + PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - PERFORM @extschema@.partition_data(p_relation); + PERFORM @extschema@.partition_data(parent_relid); RETURN p_count; @@ -203,59 +205,59 @@ $$ LANGUAGE plpgsql; * Creates RANGE partitions for specified range */ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( - p_relation REGCLASS - , p_attribute TEXT - , p_start_value ANYELEMENT - , p_end_value ANYELEMENT - , p_interval ANYELEMENT) + parent_relid REGCLASS, + p_attribute TEXT, + p_start_value ANYELEMENT, + p_end_value ANYELEMENT, + p_interval ANYELEMENT) RETURNS INTEGER AS $$ DECLARE v_plain_schema TEXT; v_plain_relname TEXT; - i INTEGER := 0; + part_count INTEGER := 0; BEGIN - PERFORM @extschema@.validate_relname(p_relation); + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); - PERFORM @extschema@.common_relation_checks(p_relation, p_attribute); + PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); IF p_interval <= 0 THEN RAISE EXCEPTION 'Interval must be positive'; END IF; + SELECT * INTO v_plain_schema, v_plain_relname + FROM @extschema@.get_plain_schema_and_relname(parent_relid); + /* Create sequence for child partitions names */ - SELECT * INTO v_plain_schema, v_plain_relname FROM @extschema@.get_plain_schema_and_relname(p_relation); PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); - /* check boundaries */ - PERFORM @extschema@.check_boundaries(p_relation - , p_attribute - , p_start_value - , p_end_value); + /* Check boundaries */ + PERFORM @extschema@.check_boundaries(parent_relid, + p_attribute, + p_start_value, + p_end_value); /* Insert new entry to pathman config */ INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) - VALUES (p_relation, p_attribute, 2, p_interval::text); + VALUES (parent_relid, p_attribute, 2, p_interval::text); WHILE p_start_value <= p_end_value LOOP - PERFORM @extschema@.create_single_range_partition(p_relation - , p_start_value - , p_start_value + p_interval); + PERFORM @extschema@.create_single_range_partition(parent_relid, + p_start_value, + p_start_value + p_interval); p_start_value := p_start_value + p_interval; - i := i + 1; + part_count := part_count + 1; END LOOP; - /* Create triggers */ - /* Notify backend about changes */ - PERFORM @extschema@.on_create_partitions(p_relation::regclass::oid); + PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - PERFORM @extschema@.partition_data(p_relation); + PERFORM @extschema@.partition_data(parent_relid); - RETURN i; + RETURN part_count; /* number of created partitions */ EXCEPTION WHEN others THEN RAISE EXCEPTION '%', SQLERRM; @@ -266,52 +268,56 @@ $$ LANGUAGE plpgsql; * Creates RANGE partitions for specified range based on datetime attribute */ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( - p_relation REGCLASS - , p_attribute TEXT - , p_start_value ANYELEMENT - , p_end_value ANYELEMENT - , p_interval INTERVAL) + parent_relid REGCLASS, + p_attribute TEXT, + p_start_value ANYELEMENT, + p_end_value ANYELEMENT, + p_interval INTERVAL) RETURNS INTEGER AS $$ DECLARE v_plain_schema TEXT; v_plain_relname TEXT; - i INTEGER := 0; + part_count INTEGER := 0; BEGIN - PERFORM @extschema@.validate_relname(p_relation); + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); - PERFORM @extschema@.common_relation_checks(p_relation, p_attribute); + PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); + + SELECT * INTO v_plain_schema, v_plain_relname + FROM @extschema@.get_plain_schema_and_relname(parent_relid); /* Create sequence for child partitions names */ - SELECT * INTO v_plain_schema, v_plain_relname FROM @extschema@.get_plain_schema_and_relname(p_relation); PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); /* Check boundaries */ - PERFORM @extschema@.check_boundaries(p_relation - , p_attribute - , p_start_value - , p_end_value); + PERFORM @extschema@.check_boundaries(parent_relid, + p_attribute, + p_start_value, + p_end_value); /* Insert new entry to pathman config */ INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) - VALUES (p_relation, p_attribute, 2, p_interval::text); + VALUES (parent_relid, p_attribute, 2, p_interval::text); WHILE p_start_value <= p_end_value LOOP - EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s);', pg_typeof(p_start_value)) - USING p_relation, p_start_value, p_start_value + p_interval; + EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s);', + pg_typeof(p_start_value)) + USING parent_relid, p_start_value, p_start_value + p_interval; + p_start_value := p_start_value + p_interval; - i := i + 1; + part_count := part_count + 1; END LOOP; /* Notify backend about changes */ - PERFORM @extschema@.on_create_partitions(p_relation::regclass::oid); + PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - PERFORM @extschema@.partition_data(p_relation); + PERFORM @extschema@.partition_data(parent_relid); - RETURN i; /* number of created partitions */ + RETURN part_count; /* number of created partitions */ EXCEPTION WHEN others THEN RAISE EXCEPTION '%', SQLERRM; @@ -322,10 +328,10 @@ $$ LANGUAGE plpgsql; * Check RANGE partition boundaries. */ CREATE OR REPLACE FUNCTION @extschema@.check_boundaries( - p_relation REGCLASS - , p_attribute TEXT - , p_start_value ANYELEMENT - , p_end_value ANYELEMENT) + parent_relid REGCLASS, + p_attribute TEXT, + p_start_value ANYELEMENT, + p_end_value ANYELEMENT) RETURNS VOID AS $$ DECLARE @@ -336,7 +342,7 @@ DECLARE BEGIN /* Get min and max values */ EXECUTE format('SELECT count(*), min(%s), max(%s) FROM %s WHERE NOT %s IS NULL', - p_attribute, p_attribute, p_relation::text, p_attribute) + p_attribute, p_attribute, parent_relid::text, p_attribute) INTO v_count, v_min, v_max; /* check that column has NULL values */ @@ -344,16 +350,16 @@ BEGIN RAISE EXCEPTION '''%'' column contains NULL values', p_attribute; END IF; - /* check lower boundary */ + /* Check lower boundary */ IF p_start_value > v_min THEN - RAISE EXCEPTION 'Start value is less than minimum value of ''%''' - , p_attribute; + RAISE EXCEPTION 'Start value is less than minimum value of ''%''', + p_attribute; END IF; - /* check upper boundary */ + /* Check upper boundary */ IF p_end_value <= v_max THEN - RAISE EXCEPTION 'Not enough partitions to fit all the values of ''%''' - , p_attribute; + RAISE EXCEPTION 'Not enough partitions to fit all the values of ''%''', + p_attribute; END IF; END $$ LANGUAGE plpgsql; @@ -362,9 +368,9 @@ $$ LANGUAGE plpgsql; * Creates new RANGE partition. Returns partition name */ CREATE OR REPLACE FUNCTION @extschema@.create_single_range_partition( - p_parent REGCLASS - , p_start_value ANYELEMENT - , p_end_value ANYELEMENT) + parent_relid REGCLASS, + p_start_value ANYELEMENT, + p_end_value ANYELEMENT) RETURNS TEXT AS $$ DECLARE @@ -373,7 +379,6 @@ DECLARE v_plain_child_relname TEXT; v_attname TEXT; v_sql TEXT; - v_cond TEXT; v_plain_schema TEXT; v_plain_relname TEXT; v_child_relname_exists BOOL; @@ -381,43 +386,46 @@ DECLARE BEGIN v_attname := attname FROM @extschema@.pathman_config - WHERE partrel = p_parent; + WHERE partrel = parent_relid; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(p_parent::TEXT); + RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::text); END IF; SELECT * INTO v_plain_schema, v_plain_relname - FROM @extschema@.get_plain_schema_and_relname(p_parent); + FROM @extschema@.get_plain_schema_and_relname(parent_relid); v_seq_name := @extschema@.get_sequence_name(v_plain_schema, v_plain_relname); - /* get next value from sequence */ + /* Get next value from sequence */ LOOP v_part_num := nextval(v_seq_name); v_plain_child_relname := format('%s_%s', v_plain_relname, v_part_num); v_child_relname := format('%s.%s', quote_ident(v_plain_schema), quote_ident(v_plain_child_relname)); + v_child_relname_exists := count(*) > 0 FROM pg_class - WHERE v_child_relname = quote_ident(relnamespace::regnamespace::text) || - '.' || quote_ident(relname) + WHERE relname = v_plain_child_relname AND + relnamespace = v_plain_schema::regnamespace LIMIT 1; + EXIT WHEN v_child_relname_exists = false; END LOOP; - EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)' - , v_child_relname - , @extschema@.get_schema_qualified_name(p_parent, '.')); + EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', + v_child_relname, + @extschema@.get_schema_qualified_name(parent_relid, '.')); - v_cond := @extschema@.get_range_condition(v_attname, p_start_value, p_end_value); - v_sql := format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)' - , v_child_relname - , @extschema@.build_check_constraint_name(v_child_relname::regclass, v_attname) - , v_cond); + EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', + v_child_relname, + @extschema@.build_check_constraint_name(v_child_relname::regclass, + v_attname), + @extschema@.get_range_condition(v_attname, + p_start_value, + p_end_value)); - EXECUTE v_sql; RETURN v_child_relname; END $$ LANGUAGE plpgsql @@ -427,29 +435,23 @@ SET client_min_messages = WARNING; * Split RANGE partition */ CREATE OR REPLACE FUNCTION @extschema@.split_range_partition( - p_partition REGCLASS - , p_value ANYELEMENT - , OUT p_range ANYARRAY) + p_partition REGCLASS, + p_value ANYELEMENT, + OUT p_range ANYARRAY) RETURNS ANYARRAY AS $$ DECLARE - v_parent_relid OID; - v_child_relid OID := p_partition::oid; + v_parent_relid REGCLASS; v_attname TEXT; v_cond TEXT; v_new_partition TEXT; v_part_type INTEGER; v_part_relname TEXT; - v_plain_schema TEXT; - v_plain_relname TEXT; v_check_name TEXT; BEGIN v_part_relname := @extschema@.validate_relname(p_partition); - - v_parent_relid := inhparent - FROM pg_inherits - WHERE inhrelid = v_child_relid; + v_parent_relid = @extschema@.parent_of_partition(p_partition); SELECT attname, parttype FROM @extschema@.pathman_config @@ -458,19 +460,16 @@ BEGIN IF v_attname IS NULL THEN RAISE EXCEPTION 'Table % is not partitioned', - quote_ident(v_parent_relid::regclass::text); + quote_ident(v_parent_relid::text); END IF; - SELECT * INTO v_plain_schema, v_plain_relname - FROM @extschema@.get_plain_schema_and_relname(p_partition); - - /* Check if this is RANGE partition */ + /* Check if this is a RANGE partition */ IF v_part_type != 2 THEN RAISE EXCEPTION 'Specified partition isn''t RANGE partition'; END IF; /* Get partition values range */ - p_range := @extschema@.get_range_by_part_oid(v_parent_relid, v_child_relid, 0); + p_range := @extschema@.get_range_by_part_oid(v_parent_relid, p_partition, 0); IF p_range IS NULL THEN RAISE EXCEPTION 'Could not find specified partition'; END IF; @@ -484,33 +483,33 @@ BEGIN /* Create new partition */ v_new_partition := @extschema@.create_single_range_partition( - @extschema@.get_schema_qualified_name(v_parent_relid::regclass, '.'), + @extschema@.get_schema_qualified_name(v_parent_relid, '.'), p_value, p_range[2]); /* Copy data */ v_cond := @extschema@.get_range_condition(v_attname, p_value, p_range[2]); - EXECUTE format(' - WITH part_data AS ( - DELETE FROM %s WHERE %s RETURNING *) - INSERT INTO %s SELECT * FROM part_data' - , p_partition - , v_cond - , v_new_partition); + EXECUTE format('WITH part_data AS (DELETE FROM %s WHERE %s RETURNING *) + INSERT INTO %s SELECT * FROM part_data', + p_partition, + v_cond, + v_new_partition); /* Alter original partition */ v_cond := @extschema@.get_range_condition(v_attname, p_range[1], p_value); v_check_name := @extschema@.build_check_constraint_name(p_partition, v_attname); - EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s' - , p_partition::text - , v_check_name); - EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)' - , p_partition::text - , v_check_name - , v_cond); + + EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s', + p_partition::text, + v_check_name); + + EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', + p_partition::text, + v_check_name, + v_cond); /* Tell backend to reload configuration */ - PERFORM @extschema@.on_update_partitions(v_parent_relid::oid); + PERFORM @extschema@.on_update_partitions(v_parent_relid); END $$ LANGUAGE plpgsql; @@ -520,29 +519,27 @@ LANGUAGE plpgsql; * Merge RANGE partitions */ CREATE OR REPLACE FUNCTION @extschema@.merge_range_partitions( - p_partition1 REGCLASS - , p_partition2 REGCLASS) + partition1 REGCLASS, + partition2 REGCLASS) RETURNS VOID AS $$ DECLARE v_parent_relid1 OID; v_parent_relid2 OID; - v_part1_relid OID := p_partition1::oid; - v_part2_relid OID := p_partition2::oid; v_attname TEXT; v_part_type INTEGER; v_atttype TEXT; BEGIN - IF v_part1_relid = v_part2_relid THEN - RAISE EXCEPTION 'Cannot merge partition to itself'; + IF partition1 = partition2 THEN + RAISE EXCEPTION 'Cannot merge partition with itself'; END IF; - v_parent_relid1 := inhparent FROM pg_inherits WHERE inhrelid = v_part1_relid; - v_parent_relid2 := inhparent FROM pg_inherits WHERE inhrelid = v_part2_relid; + v_parent_relid1 := @extschema@.parent_of_partition(partition1); + v_parent_relid2 := @extschema@.parent_of_partition(partition2); IF v_parent_relid1 != v_parent_relid2 THEN - RAISE EXCEPTION 'Cannot merge partitions having different parents'; + RAISE EXCEPTION 'Cannot merge partitions with different parents'; END IF; SELECT attname, parttype @@ -555,18 +552,19 @@ BEGIN quote_ident(v_parent_relid1::regclass::text); END IF; - /* Check if this is RANGE partition */ + /* Check if this is a RANGE partition */ IF v_part_type != 2 THEN RAISE EXCEPTION 'Specified partitions aren''t RANGE partitions'; END IF; - v_atttype := @extschema@.get_attribute_type_name(p_partition1, v_attname); + v_atttype := @extschema@.get_attribute_type_name(partition1, v_attname); - EXECUTE format('SELECT @extschema@.merge_range_partitions_internal($1, $2 , $3, NULL::%s)', v_atttype) - USING v_parent_relid1, p_partition1 , p_partition2; + EXECUTE format('SELECT @extschema@.merge_range_partitions_internal($1, $2, $3, NULL::%s)', + v_atttype) + USING v_parent_relid1, partition1, partition2; /* Tell backend to reload configuration */ - PERFORM @extschema@.on_update_partitions(v_parent_relid1::oid); + PERFORM @extschema@.on_update_partitions(v_parent_relid1); END $$ LANGUAGE plpgsql; @@ -580,11 +578,11 @@ LANGUAGE plpgsql; * (it is necessary because of pseudo-types used in function) */ CREATE OR REPLACE FUNCTION @extschema@.merge_range_partitions_internal( - p_parent_relid OID - , p_part1 REGCLASS - , p_part2 REGCLASS - , dummy ANYELEMENT - , OUT p_range ANYARRAY) + parent_relid REGCLASS, + partition1 REGCLASS, + partition2 REGCLASS, + dummy ANYELEMENT, + OUT p_range ANYARRAY) RETURNS ANYARRAY AS $$ DECLARE @@ -594,12 +592,12 @@ DECLARE BEGIN SELECT attname FROM @extschema@.pathman_config - WHERE partrel = p_parent_relid + WHERE partrel = parent_relid INTO v_attname; IF v_attname IS NULL THEN RAISE EXCEPTION 'Table % is not partitioned', - quote_ident(p_parent_relid::regclass::text); + quote_ident(parent_relid::text); END IF; /* @@ -607,37 +605,36 @@ BEGIN * first and second elements of array are MIN and MAX of partition1 * third and forth elements are MIN and MAX of partition2 */ - p_range := @extschema@.get_range_by_part_oid(p_parent_relid, p_part1, 0) || - @extschema@.get_range_by_part_oid(p_parent_relid, p_part2, 0); + p_range := @extschema@.get_range_by_part_oid(parent_relid, partition1, 0) || + @extschema@.get_range_by_part_oid(parent_relid, partition2, 0); /* Check if ranges are adjacent */ IF p_range[1] != p_range[4] AND p_range[2] != p_range[3] THEN RAISE EXCEPTION 'Merge failed. Partitions must be adjacent'; END IF; - /* Extend first partition */ - v_cond := @extschema@.get_range_condition(v_attname - , least(p_range[1], p_range[3]) - , greatest(p_range[2], p_range[4])); - - /* Alter first partition */ - v_check_name := @extschema@.build_check_constraint_name(p_part1, v_attname); - EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s' - , p_part1::text - , v_check_name); - EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)' - , p_part1::text - , v_check_name - , v_cond); + /* Drop constraint on first partition... */ + v_check_name := @extschema@.build_check_constraint_name(partition1, v_attname); + EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s', + partition1::text, + v_check_name); + + /* and create a new one */ + EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', + partition1::text, + v_check_name, + @extschema@.get_range_condition(v_attname, + least(p_range[1], p_range[3]), + greatest(p_range[2], p_range[4]))); /* Copy data from second partition to the first one */ EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) - INSERT INTO %s SELECT * FROM part_data' - , p_part2::text - , p_part1::text); + INSERT INTO %s SELECT * FROM part_data', + partition2::text, + partition1::text); /* Remove second partition */ - EXECUTE format('DROP TABLE %s', p_part2::text); + EXECUTE format('DROP TABLE %s', partition2::text); END $$ LANGUAGE plpgsql; @@ -646,7 +643,7 @@ $$ LANGUAGE plpgsql; * Append new partition */ CREATE OR REPLACE FUNCTION @extschema@.append_range_partition( - p_relation REGCLASS) + parent_relid REGCLASS) RETURNS TEXT AS $$ DECLARE @@ -656,29 +653,24 @@ DECLARE v_interval TEXT; BEGIN - /* Prevent concurrent partition creation */ - PERFORM @extschema@.acquire_partitions_lock(); - SELECT attname, range_interval FROM @extschema@.pathman_config - WHERE partrel = p_relation + WHERE partrel = parent_relid INTO v_attname, v_interval; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(p_relation::TEXT); + RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); END IF; - v_atttype := @extschema@.get_attribute_type_name(p_relation, v_attname); + v_atttype := @extschema@.get_attribute_type_name(parent_relid, v_attname); - EXECUTE format('SELECT @extschema@.append_partition_internal($1, $2, $3, ARRAY[]::%s[])', v_atttype) + EXECUTE format('SELECT @extschema@.append_partition_internal($1, $2, $3, ARRAY[]::%s[])', + v_atttype) INTO v_part_name - USING p_relation, v_atttype, v_interval; + USING parent_relid, v_atttype, v_interval; /* Invalidate cache */ - PERFORM @extschema@.on_update_partitions(p_relation::oid); - - /* Release lock */ - PERFORM @extschema@.release_partitions_lock(); + PERFORM @extschema@.on_update_partitions(parent_relid); RETURN v_part_name; @@ -690,25 +682,26 @@ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION @extschema@.append_partition_internal( - p_relation REGCLASS - , p_atttype TEXT - , p_interval TEXT - , p_range ANYARRAY DEFAULT NULL) + parent_relid REGCLASS, + p_atttype TEXT, + p_interval TEXT, + p_range ANYARRAY DEFAULT NULL) RETURNS TEXT AS $$ DECLARE v_part_name TEXT; BEGIN - p_range := @extschema@.get_range_by_idx(p_relation::oid, -1, 0); + p_range := @extschema@.get_range_by_idx(parent_relid, -1, 0); IF @extschema@.is_date_type(p_atttype::regtype) THEN - v_part_name := @extschema@.create_single_range_partition(p_relation + v_part_name := @extschema@.create_single_range_partition(parent_relid , p_range[2] , p_range[2] + p_interval::interval); ELSE - EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $2 + $3::%s)', p_atttype) - USING p_relation, p_range[2], p_interval + EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $2 + $3::%s)', + p_atttype) + USING parent_relid, p_range[2], p_interval INTO v_part_name; END IF; @@ -721,7 +714,8 @@ LANGUAGE plpgsql; /* * Prepend new partition */ -CREATE OR REPLACE FUNCTION @extschema@.prepend_range_partition(p_relation REGCLASS) +CREATE OR REPLACE FUNCTION @extschema@.prepend_range_partition( + parent_relid REGCLASS) RETURNS TEXT AS $$ DECLARE @@ -731,29 +725,24 @@ DECLARE v_interval TEXT; BEGIN - /* Prevent concurrent partition creation */ - PERFORM @extschema@.acquire_partitions_lock(); - SELECT attname, range_interval FROM @extschema@.pathman_config - WHERE partrel = p_relation + WHERE partrel = parent_relid INTO v_attname, v_interval; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(p_relation::TEXT); + RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); END IF; - v_atttype := @extschema@.get_attribute_type_name(p_relation, v_attname); + v_atttype := @extschema@.get_attribute_type_name(parent_relid, v_attname); - EXECUTE format('SELECT @extschema@.prepend_partition_internal($1, $2, $3, ARRAY[]::%s[])', v_atttype) + EXECUTE format('SELECT @extschema@.prepend_partition_internal($1, $2, $3, ARRAY[]::%s[])', + v_atttype) INTO v_part_name - USING p_relation, v_atttype, v_interval; + USING parent_relid, v_atttype, v_interval; /* Invalidate cache */ - PERFORM @extschema@.on_update_partitions(p_relation::oid); - - /* Release lock */ - PERFORM @extschema@.release_partitions_lock(); + PERFORM @extschema@.on_update_partitions(parent_relid); RETURN v_part_name; @@ -765,25 +754,26 @@ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION @extschema@.prepend_partition_internal( - p_relation REGCLASS - , p_atttype TEXT - , p_interval TEXT - , p_range ANYARRAY DEFAULT NULL) + parent_relid REGCLASS, + p_atttype TEXT, + p_interval TEXT, + p_range ANYARRAY DEFAULT NULL) RETURNS TEXT AS $$ DECLARE v_part_name TEXT; BEGIN - p_range := @extschema@.get_range_by_idx(p_relation::oid, 0, 0); + p_range := @extschema@.get_range_by_idx(parent_relid, 0, 0); IF @extschema@.is_date_type(p_atttype::regtype) THEN - v_part_name := @extschema@.create_single_range_partition(p_relation - , p_range[1] - p_interval::interval - , p_range[1]); + v_part_name := @extschema@.create_single_range_partition(parent_relid, + p_range[1] - p_interval::interval, + p_range[1]); ELSE - EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2 - $3::%s, $2)', p_atttype) - USING p_relation, p_range[1], p_interval + EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2 - $3::%s, $2)', + p_atttype) + USING parent_relid, p_range[1], p_interval INTO v_part_name; END IF; @@ -797,20 +787,17 @@ LANGUAGE plpgsql; * Add new partition */ CREATE OR REPLACE FUNCTION @extschema@.add_range_partition( - p_relation REGCLASS - , p_start_value ANYELEMENT - , p_end_value ANYELEMENT) + parent_relid REGCLASS, + p_start_value ANYELEMENT, + p_end_value ANYELEMENT) RETURNS TEXT AS $$ DECLARE v_part_name TEXT; BEGIN - /* Prevent concurrent partition creation */ - PERFORM @extschema@.acquire_partitions_lock(); - /* check range overlap */ - IF @extschema@.check_overlap(p_relation::oid, p_start_value, p_end_value) != FALSE THEN + IF @extschema@.check_overlap(parent_relid, p_start_value, p_end_value) THEN RAISE EXCEPTION 'Specified range overlaps with existing partitions'; END IF; @@ -819,11 +806,10 @@ BEGIN END IF; /* Create new partition */ - v_part_name := @extschema@.create_single_range_partition(p_relation, p_start_value, p_end_value); - PERFORM @extschema@.on_update_partitions(p_relation::oid); - - /* Release lock */ - PERFORM @extschema@.release_partitions_lock(); + v_part_name := @extschema@.create_single_range_partition(parent_relid, + p_start_value, + p_end_value); + PERFORM @extschema@.on_update_partitions(parent_relid); RETURN v_part_name; @@ -838,32 +824,22 @@ LANGUAGE plpgsql; * Drop range partition */ CREATE OR REPLACE FUNCTION @extschema@.drop_range_partition( - p_partition REGCLASS) + p_partition REGCLASS) RETURNS TEXT AS $$ DECLARE + v_part_relid REGCLASS; v_part_name TEXT := p_partition::TEXT; - v_parent TEXT; v_count INTEGER; BEGIN - /* Prevent concurrent partition management */ - PERFORM @extschema@.acquire_partitions_lock(); - - /* Parent table name */ - SELECT inhparent::regclass INTO v_parent - FROM pg_inherits WHERE inhrelid::regclass = p_partition; - - IF v_parent IS NULL THEN - RAISE EXCEPTION 'Partition ''%'' not found', p_partition; - END IF; + v_part_relid = @extschema@.parent_of_partition(p_partition); - /* Drop table and update cache */ + /* Drop table */ EXECUTE format('DROP TABLE %s', p_partition::TEXT); - PERFORM @extschema@.on_update_partitions(v_parent::regclass::oid); - /* Release lock */ - PERFORM @extschema@.release_partitions_lock(); + /* Invalidate cache */ + PERFORM @extschema@.on_update_partitions(v_part_relid); RETURN v_part_name; @@ -878,10 +854,10 @@ LANGUAGE plpgsql; * Attach range partition */ CREATE OR REPLACE FUNCTION @extschema@.attach_range_partition( - p_relation REGCLASS - , p_partition REGCLASS - , p_start_value ANYELEMENT - , p_end_value ANYELEMENT) + parent_relid REGCLASS, + p_partition REGCLASS, + p_start_value ANYELEMENT, + p_end_value ANYELEMENT) RETURNS TEXT AS $$ DECLARE @@ -893,52 +869,46 @@ DECLARE BEGIN /* Ignore temporary tables */ - SELECT relpersistence FROM pg_catalog.pg_class WHERE oid = p_partition INTO rel_persistence; + SELECT relpersistence FROM pg_catalog.pg_class + WHERE oid = p_partition INTO rel_persistence; + IF rel_persistence = 't'::CHAR THEN - RAISE EXCEPTION 'Temporary table % cannot be used as a partition', + RAISE EXCEPTION 'Temporary table \"%\" cannot be used as a partition', quote_ident(p_partition::TEXT); END IF; - /* Prevent concurrent partition management */ - PERFORM @extschema@.acquire_partitions_lock(); - - IF @extschema@.check_overlap(p_relation::oid, p_start_value, p_end_value) != FALSE THEN + IF @extschema@.check_overlap(parent_relid, p_start_value, p_end_value) THEN RAISE EXCEPTION 'Specified range overlaps with existing partitions'; END IF; - IF NOT @extschema@.validate_relations_equality(p_relation, p_partition) THEN + IF NOT @extschema@.validate_relations_equality(parent_relid, p_partition) THEN RAISE EXCEPTION 'Partition must have the exact same structure as parent'; END IF; /* Set inheritance */ - EXECUTE format('ALTER TABLE %s INHERIT %s' - , p_partition - , p_relation); + EXECUTE format('ALTER TABLE %s INHERIT %s', p_partition, parent_relid); /* Set check constraint */ - v_attname := attname - FROM @extschema@.pathman_config - WHERE partrel = p_relation; + v_attname := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(p_relation::TEXT); + RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); END IF; - v_cond := @extschema@.get_range_condition(v_attname, p_start_value, p_end_value); - /* Plain partition name and schema */ - SELECT * INTO v_plain_schema, v_plain_partname FROM @extschema@.get_plain_schema_and_relname(p_partition); + SELECT * INTO v_plain_schema, v_plain_partname + FROM @extschema@.get_plain_schema_and_relname(p_partition); - EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)' - , p_partition - , @extschema@.build_check_constraint_name(p_partition, v_attname) - , v_cond); + EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', + p_partition, + @extschema@.build_check_constraint_name(p_partition, v_attname), + @extschema@.get_range_condition(v_attname, + p_start_value, + p_end_value)); /* Invalidate cache */ - PERFORM @extschema@.on_update_partitions(p_relation::oid); + PERFORM @extschema@.on_update_partitions(parent_relid); - /* Release lock */ - PERFORM @extschema@.release_partitions_lock(); RETURN p_partition; EXCEPTION WHEN others THEN @@ -952,20 +922,15 @@ LANGUAGE plpgsql; * Detach range partition */ CREATE OR REPLACE FUNCTION @extschema@.detach_range_partition( - p_partition TEXT) + p_partition REGCLASS) RETURNS TEXT AS $$ DECLARE - v_attname text; - v_parent regclass; + v_attname text; + v_parent regclass; BEGIN - /* Prevent concurrent partition management */ - PERFORM @extschema@.acquire_partitions_lock(); - - /* Parent table */ - SELECT inhparent::regclass INTO v_parent - FROM pg_inherits WHERE inhrelid = p_partition::regclass::oid; + v_parent = @extschema@.parent_of_partition(p_partition); v_attname := attname FROM @extschema@.pathman_config @@ -976,20 +941,18 @@ BEGIN END IF; /* Remove inheritance */ - EXECUTE format('ALTER TABLE %s NO INHERIT %s' - , p_partition - , v_parent); + EXECUTE format('ALTER TABLE %s NO INHERIT %s', + p_partition, + v_parent); /* Remove check constraint */ - EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s' - , p_partition - , @extschema@.build_check_constraint_name(p_partition, v_attname)); + EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s', + p_partition, + @extschema@.build_check_constraint_name(p_partition, v_attname)); /* Invalidate cache */ - PERFORM @extschema@.on_update_partitions(v_parent::regclass::oid); + PERFORM @extschema@.on_update_partitions(v_parent); - /* Release lock */ - PERFORM @extschema@.release_partitions_lock(); RETURN p_partition; EXCEPTION WHEN others THEN @@ -1003,7 +966,7 @@ LANGUAGE plpgsql; * Creates an update trigger */ CREATE OR REPLACE FUNCTION @extschema@.create_range_update_trigger( - IN relation REGCLASS) + IN parent_relid REGCLASS) RETURNS TEXT AS $$ DECLARE @@ -1018,7 +981,7 @@ DECLARE BEGIN old_oid := TG_RELID; new_oid := @extschema@.find_or_create_range_partition( - ''%1$s''::regclass::oid, NEW.%2$s); + ''%1$s''::regclass, NEW.%2$s); IF old_oid = new_oid THEN RETURN NEW; @@ -1052,36 +1015,42 @@ BEGIN SELECT string_agg(attname, ', '), string_agg('OLD.' || attname, ', '), string_agg('NEW.' || attname, ', '), - string_agg('CASE WHEN NOT $' || attnum || ' IS NULL THEN ' || attname || ' = $' || attnum || - ' ELSE ' || attname || ' IS NULL END', ' AND '), + string_agg('CASE WHEN NOT $' || attnum || ' IS NULL THEN ' || + attname || ' = $' || attnum || ' ' || + 'ELSE ' || + attname || ' IS NULL END', + ' AND '), string_agg('$' || attnum, ', ') FROM pg_attribute - WHERE attrelid::regclass = relation AND attnum > 0 - INTO att_names, - old_fields, - new_fields, - att_val_fmt, - att_fmt; + WHERE attrelid::regclass = parent_relid AND attnum > 0 + INTO att_names, + old_fields, + new_fields, + att_val_fmt, + att_fmt; attr := attname FROM @extschema@.pathman_config - WHERE partrel = relation; + WHERE partrel = parent_relid; IF attr IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(relation::TEXT); + RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); END IF; - EXECUTE format(func, relation, attr, 0, att_val_fmt, + /* Create function for trigger */ + EXECUTE format(func, parent_relid, attr, 0, att_val_fmt, old_fields, att_fmt, new_fields); - FOR rec in (SELECT * FROM pg_inherits WHERE inhparent = relation::oid) + + /* Create trigger on every partition */ + FOR rec in (SELECT * FROM pg_inherits WHERE inhparent = parent_relid) LOOP - EXECUTE format(trigger - , @extschema@.get_schema_qualified_name(relation) - , rec.inhrelid::regclass - , relation); + EXECUTE format(trigger, + @extschema@.get_schema_qualified_name(parent_relid), + rec.inhrelid::regclass, + parent_relid); END LOOP; - RETURN format('%s_update_trigger_func()', relation); + RETURN format('%s_update_trigger_func()', parent_relid); END $$ LANGUAGE plpgsql; @@ -1090,9 +1059,9 @@ $$ LANGUAGE plpgsql; * Construct CHECK constraint condition for a range partition. */ CREATE OR REPLACE FUNCTION @extschema@.get_range_condition( - p_attname TEXT, - p_start_value ANYELEMENT, - p_end_value ANYELEMENT) + p_attname TEXT, + p_start_value ANYELEMENT, + p_end_value ANYELEMENT) RETURNS TEXT AS 'pg_pathman', 'get_range_condition' LANGUAGE C STRICT; @@ -1100,7 +1069,9 @@ LANGUAGE C STRICT; * Returns N-th range (as an array of two elements). */ CREATE OR REPLACE FUNCTION @extschema@.get_range_by_idx( - parent_relid OID, idx INTEGER, dummy ANYELEMENT) + parent_relid REGCLASS, + idx INTEGER, + dummy ANYELEMENT) RETURNS ANYARRAY AS 'pg_pathman', 'get_range_by_idx' LANGUAGE C STRICT; @@ -1108,7 +1079,9 @@ LANGUAGE C STRICT; * Returns min and max values for specified RANGE partition. */ CREATE OR REPLACE FUNCTION @extschema@.get_range_by_part_oid( - parent_relid OID, partition_relid OID, dummy ANYELEMENT) + parent_relid REGCLASS, + partition_relid REGCLASS, + dummy ANYELEMENT) RETURNS ANYARRAY AS 'pg_pathman', 'get_range_by_part_oid' LANGUAGE C STRICT; @@ -1116,7 +1089,8 @@ LANGUAGE C STRICT; * Returns min value of the first partition's RangeEntry. */ CREATE OR REPLACE FUNCTION @extschema@.get_min_range_value( - parent_relid OID, dummy ANYELEMENT) + parent_relid REGCLASS, + dummy ANYELEMENT) RETURNS ANYELEMENT AS 'pg_pathman', 'get_min_range_value' LANGUAGE C STRICT; @@ -1124,7 +1098,8 @@ LANGUAGE C STRICT; * Returns max value of the last partition's RangeEntry. */ CREATE OR REPLACE FUNCTION @extschema@.get_max_range_value( - parent_relid OID, dummy ANYELEMENT) + parent_relid REGCLASS, + dummy ANYELEMENT) RETURNS ANYELEMENT AS 'pg_pathman', 'get_max_range_value' LANGUAGE C STRICT; @@ -1133,7 +1108,9 @@ LANGUAGE C STRICT; * Returns TRUE if overlaps and FALSE otherwise. */ CREATE OR REPLACE FUNCTION @extschema@.check_overlap( - parent_relid OID, range_min ANYELEMENT, range_max ANYELEMENT) + parent_relid REGCLASS, + range_min ANYELEMENT, + range_max ANYELEMENT) RETURNS BOOLEAN AS 'pg_pathman', 'check_overlap' LANGUAGE C STRICT; @@ -1141,6 +1118,7 @@ LANGUAGE C STRICT; * Needed for an UPDATE trigger. */ CREATE OR REPLACE FUNCTION @extschema@.find_or_create_range_partition( - relid OID, value ANYELEMENT) -RETURNS OID AS 'pg_pathman', 'find_or_create_range_partition' + parent_relid REGCLASS, + value ANYELEMENT) +RETURNS REGCLASS AS 'pg_pathman', 'find_or_create_range_partition' LANGUAGE C STRICT; diff --git a/src/nodes_common.c b/src/nodes_common.c index da4c66fa47..06593fe7ae 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -155,7 +155,6 @@ append_part_attr_to_tlist(List *tlist, Index relno, PartRelationInfo *prel) if (!part_attr_found) { - /* TODO: how about collation support? */ Var *newvar = makeVar(relno, prel->attnum, prel->atttype, diff --git a/src/pg_pathman.c b/src/pg_pathman.c index a6212bb15f..e730b486c6 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -61,9 +61,14 @@ static Node *wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysT static bool disable_inheritance_subselect_walker(Node *node, void *context); /* "Partition creation"-related functions */ -static bool spawn_partitions(const PartRelationInfo *prel, FmgrInfo *cmp_proc, - Datum interval_binary, Oid interval_type, - Datum leading_bound, Datum value, bool forward, +static bool spawn_partitions(Oid partitioned_rel, + Datum value, + Datum leading_bound, + Oid leading_bound_type, + FmgrInfo *cmp_proc, + Datum interval_binary, + Oid interval_type, + bool forward, Oid *last_partition); /* Expression tree handlers */ @@ -576,6 +581,7 @@ wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysTrue) * sequntially. */ found = irange_list_find(wrap->rangeset, index, &lossy); + /* Return NULL for always true and always false. */ if (!found) return NULL; @@ -588,7 +594,7 @@ wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysTrue) if (IsA(wrap->orig, BoolExpr)) { const BoolExpr *expr = (const BoolExpr *) wrap->orig; - BoolExpr *result; + BoolExpr *result; if (expr->boolop == OR_EXPR || expr->boolop == AND_EXPR) { @@ -600,7 +606,8 @@ wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysTrue) Node *arg; bool childAlwaysTrue; - arg = wrapper_make_expression((WrapperNode *)lfirst(lc), index, &childAlwaysTrue); + arg = wrapper_make_expression((WrapperNode *) lfirst(lc), + index, &childAlwaysTrue); #ifdef USE_ASSERT_CHECKING /* * We shouldn't get there for always true clause under OR and @@ -626,7 +633,7 @@ wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysTrue) result->args = args; result->boolop = expr->boolop; result->location = expr->location; - return (Node *)result; + return (Node *) result; } else return copyObject(wrap->orig); @@ -687,14 +694,15 @@ walk_expr_tree(Expr *expr, WalkerContext *context) * it into account while searching for the 'cmp_proc'. */ static bool -spawn_partitions(const PartRelationInfo *prel, - FmgrInfo *cmp_proc, /* cmp(value, leading_bound) */ - Datum interval_binary, /* interval in binary form */ - Oid interval_type, /* INTERVALOID or prel->atttype */ - Datum leading_bound, /* current global min\max */ - Datum value, /* type isn't needed */ - bool forward, - Oid *last_partition) /* append\prepend */ +spawn_partitions(Oid partitioned_rel, /* parent's Oid */ + Datum value, /* value to be INSERTed */ + Datum leading_bound, /* current global min\max */ + Oid leading_bound_type, /* type of the boundary */ + FmgrInfo *cmp_proc, /* cmp(value, leading_bound) */ + Datum interval_binary, /* interval in binary form */ + Oid interval_type, /* INTERVALOID or prel->atttype */ + bool forward, /* append\prepend */ + Oid *last_partition) /* result (Oid of the last partition) */ { /* Cache "+"(leading_bound, interval) or "-"(leading_bound, interval) operator */ #define CacheOperator(finfo, opname, arg1, arg2, is_cached) \ @@ -715,8 +723,8 @@ spawn_partitions(const PartRelationInfo *prel, check_lt((compar), (a), (b)) \ ) - FmgrInfo interval_move_bound; /* move upper\lower boundary */ - bool interval_move_bound_cached = false; + FmgrInfo interval_move_bound; /* function to move upper\lower boundary */ + bool interval_move_bound_cached = false; /* is it cached already? */ bool done = false; Datum cur_part_leading = leading_bound; @@ -732,7 +740,7 @@ spawn_partitions(const PartRelationInfo *prel, while ((done = do_compare(cmp_proc, value, cur_part_leading, forward))) { char *nulls = NULL; /* no params are NULL */ - Oid types[3] = { REGCLASSOID, prel->atttype, prel->atttype }; + Oid types[3] = { REGCLASSOID, leading_bound_type, leading_bound_type }; Datum values[3]; int ret; @@ -740,7 +748,7 @@ spawn_partitions(const PartRelationInfo *prel, Datum cur_part_following = cur_part_leading; CacheOperator(&interval_move_bound, (forward ? "+" : "-"), - prel->atttype, interval_type, interval_move_bound_cached); + leading_bound_type, interval_type, interval_move_bound_cached); /* Move leading bound by interval (leading +\- INTERVAL) */ cur_part_leading = FunctionCall2(&interval_move_bound, @@ -748,7 +756,7 @@ spawn_partitions(const PartRelationInfo *prel, interval_binary); /* Fill in 'values' with parent's Oid and correct boundaries... */ - values[0] = prel->key; /* partitioned table's Oid */ + values[0] = partitioned_rel; /* partitioned table's Oid */ values[1] = forward ? cur_part_following : cur_part_leading; /* value #1 */ values[2] = forward ? cur_part_leading : cur_part_following; /* value #2 */ @@ -774,8 +782,8 @@ spawn_partitions(const PartRelationInfo *prel, #ifdef USE_ASSERT_CHECKING elog(DEBUG2, "%s partition with following='%s' & leading='%s' [%u]", (forward ? "Appending" : "Prepending"), - DebugPrintDatum(cur_part_following, prel->atttype), - DebugPrintDatum(cur_part_leading, prel->atttype), + DebugPrintDatum(cur_part_following, leading_bound_type), + DebugPrintDatum(cur_part_leading, leading_bound_type), MyProcPid); #endif } @@ -794,7 +802,7 @@ Oid create_partitions_internal(Oid relid, Datum value, Oid value_type) { MemoryContext old_mcxt = CurrentMemoryContext; - Oid partid = InvalidOid; /* default value */ + Oid partid = InvalidOid; /* last created partition (or InvalidOid) */ PG_TRY(); { @@ -869,12 +877,14 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) elog(ERROR, "Could not connect using SPI"); /* while (value >= MAX) ... */ - spawn_partitions(prel, &interval_type_cmp, interval_binary, - interval_type, max_rvalue, value, true, &partid); + spawn_partitions(prel->key, value, max_rvalue, prel->atttype, + &interval_type_cmp, interval_binary, + interval_type, true, &partid); /* while (value < MIN) ... */ - spawn_partitions(prel, &interval_type_cmp, interval_binary, - interval_type, min_rvalue, value, false, &partid); + spawn_partitions(prel->key, value, min_rvalue, prel->atttype, + &interval_type_cmp, interval_binary, + interval_type, false, &partid); SPI_finish(); /* close SPI connection */ } @@ -923,7 +933,7 @@ create_partitions(Oid relid, Datum value, Oid value_type) elog(DEBUG2, "create_partitions(): chose BGW [%u]", MyProcPid); last_partition = create_partitions_bg_worker(relid, value, value_type); } - /* Else it'd better for the current backend to create partitions */ + /* Else it'd be better for the current backend to create partitions */ else { elog(DEBUG2, "create_partitions(): chose backend [%u]", MyProcPid); diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 075de1be5e..acdbccc766 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -31,8 +31,6 @@ PG_FUNCTION_INFO_V1( find_or_create_range_partition); PG_FUNCTION_INFO_V1( get_range_condition ); PG_FUNCTION_INFO_V1( get_range_by_idx ); PG_FUNCTION_INFO_V1( get_range_by_part_oid ); -PG_FUNCTION_INFO_V1( acquire_partitions_lock ); -PG_FUNCTION_INFO_V1( release_partitions_lock ); PG_FUNCTION_INFO_V1( check_overlap ); PG_FUNCTION_INFO_V1( get_min_range_value ); PG_FUNCTION_INFO_V1( get_max_range_value ); @@ -43,6 +41,7 @@ PG_FUNCTION_INFO_V1( build_check_constraint_name_attname ); PG_FUNCTION_INFO_V1( is_date_type ); PG_FUNCTION_INFO_V1( get_attribute_type_name ); PG_FUNCTION_INFO_V1( is_attribute_nullable ); +PG_FUNCTION_INFO_V1( parent_of_partition ); PG_FUNCTION_INFO_V1( debug_capture ); @@ -66,6 +65,7 @@ on_partitions_created_internal(Oid partitioned_table, bool add_callbacks) static void on_partitions_updated_internal(Oid partitioned_table, bool add_callbacks) { + /* TODO: shall we emit relcache invalidation event here? */ elog(DEBUG2, "on_partitions_updated() [add_callbacks = %s] " "triggered for relation %u", (add_callbacks ? "true" : "false"), partitioned_table); @@ -126,6 +126,7 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); + /* FIXME: does this function even work? */ search_state = search_range_partition_eq(value, &cmp_func,prel, &found_rentry); @@ -143,6 +144,7 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) { Oid child_oid = InvalidOid; + /* FIXME: useless double-checked lock (no new data) */ LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); LWLockAcquire(pmstate->edit_partitions_lock, LW_EXCLUSIVE); @@ -388,24 +390,6 @@ check_overlap(PG_FUNCTION_ARGS) PG_RETURN_BOOL(false); } -/* - * Acquire partitions lock - */ -Datum -acquire_partitions_lock(PG_FUNCTION_ARGS) -{ - /* FIXME: have to find another way (shmem maybe?) */ - LWLockAcquire(pmstate->edit_partitions_lock, LW_EXCLUSIVE); - PG_RETURN_NULL(); -} - -Datum -release_partitions_lock(PG_FUNCTION_ARGS) -{ - LWLockRelease(pmstate->edit_partitions_lock); - PG_RETURN_NULL(); -} - /* * Returns hash function OID for specified type */ @@ -524,6 +508,34 @@ is_attribute_nullable(PG_FUNCTION_ARGS) PG_RETURN_BOOL(result); /* keep compiler happy */ } +/* + * Get parent of a specified partition. + */ +Datum +parent_of_partition(PG_FUNCTION_ARGS) +{ + Oid partition = PG_GETARG_OID(0); + PartParentSearch parent_search; + Oid parent; + + /* Fetch parent & write down search status */ + parent = get_parent_of_partition(partition, &parent_search); + + /* We MUST be sure :) */ + Assert(parent_search != PPS_NOT_SURE); + + /* It must be parent known by pg_pathman */ + if (parent_search == PPS_ENTRY_PART_PARENT) + PG_RETURN_OID(parent); + else + { + elog(ERROR, "\%s\" is not pg_pathman's partition", + get_rel_name_or_relid(partition)); + + PG_RETURN_NULL(); + } +} + /* * NOTE: used for DEBUG, set breakpoint here. */ @@ -535,4 +547,3 @@ debug_capture(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } - From bce18a56bf45859a0e9276220fae940214ff07b3 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 17 Aug 2016 15:39:42 +0300 Subject: [PATCH 046/184] rename: get_hash() -> get_hash_part_idx(), make_hash() -> hash_to_part_index(), parent_of_partition() -> get_parent_of_partition(), get_range_condition() -> build_range_condition(); qualify some catalog's relations, change behavior of get_schema_qualified_name() & validate_relname(), fixes --- expected/pg_pathman.out | 6 +- hash.sql | 28 +++-- init.sql | 79 ++++++------ range.sql | 46 +++---- sql/pg_pathman.sql | 6 +- src/init.c | 4 +- src/pathman.h | 2 +- src/pg_pathman.c | 29 +++-- src/pl_funcs.c | 262 +++++++++++++++++++++------------------- 9 files changed, 248 insertions(+), 214 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 0c4b889324..37a10d0f53 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -616,7 +616,7 @@ begin 'wrong plan provider'); perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Relation Name')::text, - format('"runtime_test_1_%s"', pathman.get_hash(hashint4(1), 6)), + format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(1), 6)), 'wrong partition'); select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans') into num; @@ -649,7 +649,7 @@ begin for i in 0..3 loop perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->i->'Relation Name')::text, - format('"runtime_test_1_%s"', pathman.get_hash(hashint4(i + 1), 6)), + format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), 'wrong partition'); num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; @@ -715,7 +715,7 @@ begin for i in 0..3 loop perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Relation Name')::text, - format('"runtime_test_2_%s"', pathman.get_hash(hashint4(i + 1), 6)), + format('"runtime_test_2_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), 'wrong partition'); num = plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Actual Loops'; diff --git a/hash.sql b/hash.sql index fece5499d5..8d4228dad6 100644 --- a/hash.sql +++ b/hash.sql @@ -49,9 +49,10 @@ BEGIN EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', v_child_relname, - parent_relid::text); + @extschema@.get_schema_qualified_name(parent_relid)); - EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (@extschema@.get_hash(%s(%s), %s) = %s)', + EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s + CHECK (@extschema@.get_hash_part_idx(%s(%s), %s) = %s)', v_child_relname, @extschema@.build_check_constraint_name(v_child_relname::regclass, attribute), @@ -84,22 +85,22 @@ DECLARE RETURNS TRIGGER AS $body$ DECLARE - old_hash INTEGER; - new_hash INTEGER; + old_idx INTEGER; /* partition indices */ + new_idx INTEGER; q TEXT; BEGIN - old_hash := @extschema@.get_hash(%9$s(OLD.%2$s), %3$s); - new_hash := @extschema@.get_hash(%9$s(NEW.%2$s), %3$s); + old_idx := @extschema@.get_hash_part_idx(%9$s(OLD.%2$s), %3$s); + new_idx := @extschema@.get_hash_part_idx(%9$s(NEW.%2$s), %3$s); - IF old_hash = new_hash THEN + IF old_idx = new_idx THEN RETURN NEW; END IF; - q := format(''DELETE FROM %8$s WHERE %4$s'', old_hash); + q := format(''DELETE FROM %8$s WHERE %4$s'', old_idx); EXECUTE q USING %5$s; - q := format(''INSERT INTO %8$s VALUES (%6$s)'', new_hash); + q := format(''INSERT INTO %8$s VALUES (%6$s)'', new_idx); EXECUTE q USING %7$s; RETURN NULL; @@ -138,7 +139,7 @@ BEGIN attname || ' IS NULL END', ' AND '), string_agg('$' || attnum, ', ') - FROM pg_attribute + FROM pg_catalog.pg_attribute WHERE attrelid = parent_relid AND attnum > 0 INTO att_names, old_fields, @@ -152,7 +153,8 @@ BEGIN RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); END IF; - partitions_count := COUNT(*) FROM pg_inherits WHERE inhparent = parent_relid::oid; + partitions_count := COUNT(*) FROM pg_catalog.pg_inherits + WHERE inhparent = parent_relid::oid; /* Function name, trigger name and child relname template */ funcname := plain_schema || '.' || quote_ident(format('%s_update_trigger_func', plain_relname)); @@ -189,6 +191,6 @@ LANGUAGE C STRICT; /* * Calculates hash for integer value */ -CREATE OR REPLACE FUNCTION @extschema@.get_hash(INTEGER, INTEGER) -RETURNS INTEGER AS 'pg_pathman', 'get_hash' +CREATE OR REPLACE FUNCTION @extschema@.get_hash_part_idx(INTEGER, INTEGER) +RETURNS INTEGER AS 'pg_pathman', 'get_hash_part_idx' LANGUAGE C STRICT; diff --git a/init.sql b/init.sql index fe7496ca86..bdf12e5610 100644 --- a/init.sql +++ b/init.sql @@ -35,9 +35,8 @@ SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config', ''); * Copy rows to partitions */ CREATE OR REPLACE FUNCTION @extschema@.partition_data( - p_parent REGCLASS, - p_invalidate_cache_on_error BOOLEAN DEFAULT FALSE, - OUT p_total BIGINT) + parent_relid REGCLASS, + OUT p_total BIGINT) AS $$ DECLARE @@ -46,14 +45,12 @@ DECLARE cnt BIGINT := 0; BEGIN - relname := @extschema@.validate_relname(p_parent); - p_total := 0; /* Create partitions and copy rest of the data */ EXECUTE format('WITH part_data AS (DELETE FROM ONLY %1$s RETURNING *) INSERT INTO %1$s SELECT * FROM part_data', - relname); + @extschema@.get_schema_qualified_name(parent_relid)); /* Get number of inserted rows */ GET DIAGNOSTICS p_total = ROW_COUNT; @@ -66,17 +63,17 @@ LANGUAGE plpgsql; * Disable pathman partitioning for specified relation */ CREATE OR REPLACE FUNCTION @extschema@.disable_partitioning( - relation REGCLASS) + parent_relid REGCLASS) RETURNS VOID AS $$ BEGIN - relation := @extschema@.validate_relname(relation); + PERFORM @extschema@.validate_relname(parent_relid); - DELETE FROM @extschema@.pathman_config WHERE partrel = relation; - PERFORM @extschema@.drop_triggers(relation); + DELETE FROM @extschema@.pathman_config WHERE partrel = parent_relid; + PERFORM @extschema@.drop_triggers(parent_relid); /* Notify backend about changes */ - PERFORM on_remove_partitions(relation::regclass::integer); + PERFORM @extschema@.on_remove_partitions(parent_relid); END $$ LANGUAGE plpgsql; @@ -131,41 +128,51 @@ CREATE OR REPLACE FUNCTION @extschema@.get_plain_schema_and_relname( AS $$ BEGIN - SELECT relnamespace::regnamespace, pg_class.relname - FROM pg_class WHERE oid = cls::oid + SELECT pg_catalog.pg_class.relnamespace::regnamespace, + pg_catalog.pg_class.relname + FROM pg_catalog.pg_class WHERE oid = cls::oid INTO schema, relname; END $$ LANGUAGE plpgsql; /* - * Validates relation name. It must be schema qualified + * Returns schema-qualified name for table */ -CREATE OR REPLACE FUNCTION @extschema@.validate_relname( - cls REGCLASS) +CREATE OR REPLACE FUNCTION @extschema@.get_schema_qualified_name( + cls REGCLASS, + delimiter TEXT DEFAULT '.', + suffix TEXT DEFAULT '') RETURNS TEXT AS $$ BEGIN - RETURN @extschema@.get_schema_qualified_name(cls, '.'); + RETURN (SELECT quote_ident(relnamespace::regnamespace::text) || + delimiter || + quote_ident(relname || suffix) + FROM pg_catalog.pg_class + WHERE oid = cls::oid); END $$ LANGUAGE plpgsql; /* - * Returns schema-qualified name for table + * Validates relation name. It must be schema qualified */ -CREATE OR REPLACE FUNCTION @extschema@.get_schema_qualified_name( - cls REGCLASS, - delimiter TEXT DEFAULT '_', - suffix TEXT DEFAULT '') +CREATE OR REPLACE FUNCTION @extschema@.validate_relname( + cls REGCLASS) RETURNS TEXT AS $$ +DECLARE + relname TEXT; + BEGIN - RETURN (SELECT quote_ident(relnamespace::regnamespace::text) || - delimiter || - quote_ident(relname || suffix) - FROM pg_class - WHERE oid = cls::oid); + relname = @extschema@.get_schema_qualified_name(cls); + + IF relname IS NULL THEN + RAISE EXCEPTION 'Relation %s does not exist', cls; + END IF; + + RETURN relname; END $$ LANGUAGE plpgsql; @@ -183,8 +190,10 @@ DECLARE BEGIN FOR rec IN ( WITH - a1 AS (select * from pg_attribute where attrelid = relation1 and attnum > 0), - a2 AS (select * from pg_attribute where attrelid = relation2 and attnum > 0) + a1 AS (select * from pg_catalog.pg_attribute + where attrelid = relation1 and attnum > 0), + a2 AS (select * from pg_catalog.pg_attribute + where attrelid = relation2 and attnum > 0) SELECT a1.attname name1, a2.attname name2, a1.atttypid type1, a2.atttypid type2 FROM a1 FULL JOIN a2 ON a1.attnum = a2.attnum @@ -211,7 +220,7 @@ DECLARE pg_class_oid oid; BEGIN - pg_class_oid = 'pg_class'::regclass; + pg_class_oid = 'pg_catalog.pg_class'::regclass; /* Handle 'DROP TABLE' events */ WITH to_be_deleted AS ( @@ -261,11 +270,10 @@ DECLARE v_rec RECORD; v_rows INTEGER; v_part_count INTEGER := 0; - v_relname TEXT; conf_num_del INTEGER; BEGIN - v_relname := @extschema@.validate_relname(parent_relid); + PERFORM @extschema@.validate_relname(parent_relid); /* Drop trigger first */ PERFORM @extschema@.drop_triggers(parent_relid); @@ -280,7 +288,8 @@ BEGIN END IF; FOR v_rec IN (SELECT inhrelid::regclass::text AS tbl - FROM pg_inherits WHERE inhparent::regclass = parent_relid) + FROM pg_catalog.pg_inherits + WHERE inhparent::regclass = parent_relid) LOOP IF NOT delete_data THEN EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) @@ -376,6 +385,6 @@ LANGUAGE C STRICT; /* * Get parent of pg_pathman's partition. */ -CREATE OR REPLACE FUNCTION @extschema@.parent_of_partition(REGCLASS) -RETURNS REGCLASS AS 'pg_pathman', 'parent_of_partition' +CREATE OR REPLACE FUNCTION @extschema@.get_parent_of_partition(REGCLASS) +RETURNS REGCLASS AS 'pg_pathman', 'get_parent_of_partition_pl' LANGUAGE C STRICT; diff --git a/range.sql b/range.sql index 203205231a..a09e898172 100644 --- a/range.sql +++ b/range.sql @@ -14,7 +14,9 @@ CREATE OR REPLACE FUNCTION @extschema@.get_sequence_name( RETURNS TEXT AS $$ BEGIN - RETURN format('%s.%s', plain_schema, quote_ident(format('%s_seq', plain_relname))); + RETURN format('%s.%s', + quote_ident(plain_schema), + quote_ident(format('%s_seq', plain_relname))); END $$ LANGUAGE plpgsql; @@ -416,15 +418,15 @@ BEGIN EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', v_child_relname, - @extschema@.get_schema_qualified_name(parent_relid, '.')); + @extschema@.get_schema_qualified_name(parent_relid)); EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', v_child_relname, @extschema@.build_check_constraint_name(v_child_relname::regclass, v_attname), - @extschema@.get_range_condition(v_attname, - p_start_value, - p_end_value)); + @extschema@.build_range_condition(v_attname, + p_start_value, + p_end_value)); RETURN v_child_relname; END @@ -451,7 +453,7 @@ DECLARE BEGIN v_part_relname := @extschema@.validate_relname(p_partition); - v_parent_relid = @extschema@.parent_of_partition(p_partition); + v_parent_relid = @extschema@.get_parent_of_partition(p_partition); SELECT attname, parttype FROM @extschema@.pathman_config @@ -483,12 +485,12 @@ BEGIN /* Create new partition */ v_new_partition := @extschema@.create_single_range_partition( - @extschema@.get_schema_qualified_name(v_parent_relid, '.'), + @extschema@.get_schema_qualified_name(v_parent_relid), p_value, p_range[2]); /* Copy data */ - v_cond := @extschema@.get_range_condition(v_attname, p_value, p_range[2]); + v_cond := @extschema@.build_range_condition(v_attname, p_value, p_range[2]); EXECUTE format('WITH part_data AS (DELETE FROM %s WHERE %s RETURNING *) INSERT INTO %s SELECT * FROM part_data', p_partition, @@ -496,7 +498,7 @@ BEGIN v_new_partition); /* Alter original partition */ - v_cond := @extschema@.get_range_condition(v_attname, p_range[1], p_value); + v_cond := @extschema@.build_range_condition(v_attname, p_range[1], p_value); v_check_name := @extschema@.build_check_constraint_name(p_partition, v_attname); EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s', @@ -535,8 +537,8 @@ BEGIN RAISE EXCEPTION 'Cannot merge partition with itself'; END IF; - v_parent_relid1 := @extschema@.parent_of_partition(partition1); - v_parent_relid2 := @extschema@.parent_of_partition(partition2); + v_parent_relid1 := @extschema@.get_parent_of_partition(partition1); + v_parent_relid2 := @extschema@.get_parent_of_partition(partition2); IF v_parent_relid1 != v_parent_relid2 THEN RAISE EXCEPTION 'Cannot merge partitions with different parents'; @@ -623,9 +625,9 @@ BEGIN EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', partition1::text, v_check_name, - @extschema@.get_range_condition(v_attname, - least(p_range[1], p_range[3]), - greatest(p_range[2], p_range[4]))); + @extschema@.build_range_condition(v_attname, + least(p_range[1], p_range[3]), + greatest(p_range[2], p_range[4]))); /* Copy data from second partition to the first one */ EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) @@ -833,7 +835,7 @@ DECLARE v_count INTEGER; BEGIN - v_part_relid = @extschema@.parent_of_partition(p_partition); + v_part_relid = @extschema@.get_parent_of_partition(p_partition); /* Drop table */ EXECUTE format('DROP TABLE %s', p_partition::TEXT); @@ -902,9 +904,9 @@ BEGIN EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', p_partition, @extschema@.build_check_constraint_name(p_partition, v_attname), - @extschema@.get_range_condition(v_attname, - p_start_value, - p_end_value)); + @extschema@.build_range_condition(v_attname, + p_start_value, + p_end_value)); /* Invalidate cache */ PERFORM @extschema@.on_update_partitions(parent_relid); @@ -930,7 +932,7 @@ DECLARE v_parent regclass; BEGIN - v_parent = @extschema@.parent_of_partition(p_partition); + v_parent = @extschema@.get_parent_of_partition(p_partition); v_attname := attname FROM @extschema@.pathman_config @@ -1045,7 +1047,7 @@ BEGIN FOR rec in (SELECT * FROM pg_inherits WHERE inhparent = parent_relid) LOOP EXECUTE format(trigger, - @extschema@.get_schema_qualified_name(parent_relid), + @extschema@.get_schema_qualified_name(parent_relid, '_'), rec.inhrelid::regclass, parent_relid); END LOOP; @@ -1058,11 +1060,11 @@ $$ LANGUAGE plpgsql; /* * Construct CHECK constraint condition for a range partition. */ -CREATE OR REPLACE FUNCTION @extschema@.get_range_condition( +CREATE OR REPLACE FUNCTION @extschema@.build_range_condition( p_attname TEXT, p_start_value ANYELEMENT, p_end_value ANYELEMENT) -RETURNS TEXT AS 'pg_pathman', 'get_range_condition' +RETURNS TEXT AS 'pg_pathman', 'build_range_condition' LANGUAGE C STRICT; /* diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 30d6187908..0dcfccce9a 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -205,7 +205,7 @@ begin 'wrong plan provider'); perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Relation Name')::text, - format('"runtime_test_1_%s"', pathman.get_hash(hashint4(1), 6)), + format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(1), 6)), 'wrong partition'); select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans') into num; @@ -239,7 +239,7 @@ begin for i in 0..3 loop perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->i->'Relation Name')::text, - format('"runtime_test_1_%s"', pathman.get_hash(hashint4(i + 1), 6)), + format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), 'wrong partition'); num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; @@ -307,7 +307,7 @@ begin for i in 0..3 loop perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Relation Name')::text, - format('"runtime_test_2_%s"', pathman.get_hash(hashint4(i + 1), 6)), + format('"runtime_test_2_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), 'wrong partition'); num = plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Actual Loops'; diff --git a/src/init.c b/src/init.c index 13f7300c8c..b2f76cbc71 100644 --- a/src/init.c +++ b/src/init.c @@ -718,7 +718,7 @@ read_opexpr_const(const OpExpr *opexpr, /* * Validate hash constraint. It MUST have this exact format: * - * get_hash(TYPE_HASH_PROC(VALUE), PARTITIONS_COUNT) = CUR_PARTITION_HASH + * get_hash_part_idx(TYPE_HASH_PROC(VALUE), PARTITIONS_COUNT) = CUR_PARTITION_HASH * * Writes 'part_hash' hash value for this partition on success. */ @@ -741,7 +741,7 @@ validate_hash_constraint(const Expr *expr, if (!IsA(linitial(eq_expr->args), FuncExpr)) return false; - get_hash_expr = (FuncExpr *) linitial(eq_expr->args); /* arg #1: get_hash(...) */ + get_hash_expr = (FuncExpr *) linitial(eq_expr->args); /* get_hash_part_idx(...) */ /* Is 'eqexpr' an equality operator? */ tce = lookup_type_cache(get_hash_expr->funcresulttype, TYPECACHE_BTREE_OPFAMILY); diff --git a/src/pathman.h b/src/pathman.h index 4fa5c765c7..c471579c26 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -124,7 +124,7 @@ search_rangerel_result search_range_partition_eq(const Datum value, const PartRelationInfo *prel, RangeEntry *out_re); -uint32 make_hash(uint32 value, uint32 partitions); +uint32 hash_to_part_index(uint32 value, uint32 partitions); void handle_modification_query(Query *parse); void disable_inheritance(Query *parse); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index e730b486c6..e4e747da5f 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -1128,7 +1128,7 @@ select_range_partitions(const Datum value, } /* - * This function determines which partitions should appear in query plan + * This function determines which partitions should appear in query plan. */ static void handle_binary_opexpr(WalkerContext *context, WrapperNode *result, @@ -1157,10 +1157,10 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, if (strategy == BTEqualStrategyNumber) { Datum value = OidFunctionCall1(prel->hash_proc, c->constvalue); - uint32 hash = make_hash(DatumGetInt32(value), - PrelChildrenCount(prel)); + uint32 idx = hash_to_part_index(DatumGetInt32(value), + PrelChildrenCount(prel)); - result->rangeset = list_make1_irange(make_irange(hash, hash, true)); + result->rangeset = list_make1_irange(make_irange(idx, idx, true)); return; /* exit on equal */ } @@ -1224,10 +1224,10 @@ handle_binary_opexpr_param(const PartRelationInfo *prel, } /* - * Calculates hash value + * Convert hash value to the partition index. */ uint32 -make_hash(uint32 value, uint32 partitions) +hash_to_part_index(uint32 value, uint32 partitions) { return value % partitions; } @@ -1317,9 +1317,9 @@ handle_const(const Const *c, WalkerContext *context) case PT_HASH: { Datum value = OidFunctionCall1(prel->hash_proc, c->constvalue); - uint32 hash = make_hash(DatumGetInt32(value), - PrelChildrenCount(prel)); - result->rangeset = list_make1_irange(make_irange(hash, hash, true)); + uint32 idx = hash_to_part_index(DatumGetInt32(value), + PrelChildrenCount(prel)); + result->rangeset = list_make1_irange(make_irange(idx, idx, true)); } break; @@ -1506,7 +1506,6 @@ handle_arrexpr(const ScalarArrayOpExpr *expr, WalkerContext *context) Node *varnode = (Node *) linitial(expr->args); Var *var; Node *arraynode = (Node *) lsecond(expr->args); - uint32 hash; const PartRelationInfo *prel = context->prel; result->orig = (const Node *)expr; @@ -1538,7 +1537,6 @@ handle_arrexpr(const ScalarArrayOpExpr *expr, WalkerContext *context) Datum *elem_values; bool *elem_nulls; int i; - Datum value; /* Extract values from array */ arrayval = DatumGetArrayTypeP(((Const *) arraynode)->constvalue); @@ -1554,11 +1552,16 @@ handle_arrexpr(const ScalarArrayOpExpr *expr, WalkerContext *context) /* Construct OIDs list */ for (i = 0; i < num_elems; i++) { + Datum value; + uint32 idx; + /* Invoke base hash function for value type */ value = OidFunctionCall1(prel->hash_proc, elem_values[i]); - hash = make_hash(DatumGetInt32(value), PrelChildrenCount(prel)); + idx = hash_to_part_index(DatumGetInt32(value), PrelChildrenCount(prel)); result->rangeset = irange_list_union(result->rangeset, - list_make1_irange(make_irange(hash, hash, true))); + list_make1_irange(make_irange(idx, + idx, + true))); } /* Free resources */ diff --git a/src/pl_funcs.c b/src/pl_funcs.c index acdbccc766..179df62792 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -27,21 +27,21 @@ PG_FUNCTION_INFO_V1( on_partitions_created ); PG_FUNCTION_INFO_V1( on_partitions_updated ); PG_FUNCTION_INFO_V1( on_partitions_removed ); +PG_FUNCTION_INFO_V1( get_parent_of_partition_pl ); +PG_FUNCTION_INFO_V1( get_attribute_type_name ); PG_FUNCTION_INFO_V1( find_or_create_range_partition); -PG_FUNCTION_INFO_V1( get_range_condition ); PG_FUNCTION_INFO_V1( get_range_by_idx ); PG_FUNCTION_INFO_V1( get_range_by_part_oid ); -PG_FUNCTION_INFO_V1( check_overlap ); PG_FUNCTION_INFO_V1( get_min_range_value ); PG_FUNCTION_INFO_V1( get_max_range_value ); PG_FUNCTION_INFO_V1( get_type_hash_func ); -PG_FUNCTION_INFO_V1( get_hash ); +PG_FUNCTION_INFO_V1( get_hash_part_idx ); +PG_FUNCTION_INFO_V1( check_overlap ); +PG_FUNCTION_INFO_V1( build_range_condition ); PG_FUNCTION_INFO_V1( build_check_constraint_name_attnum ); PG_FUNCTION_INFO_V1( build_check_constraint_name_attname ); PG_FUNCTION_INFO_V1( is_date_type ); -PG_FUNCTION_INFO_V1( get_attribute_type_name ); PG_FUNCTION_INFO_V1( is_attribute_nullable ); -PG_FUNCTION_INFO_V1( parent_of_partition ); PG_FUNCTION_INFO_V1( debug_capture ); @@ -51,7 +51,7 @@ static void on_partitions_removed_internal(Oid partitioned_table, bool add_callb /* - * Callbacks + * Callbacks. */ static void @@ -80,7 +80,7 @@ on_partitions_removed_internal(Oid partitioned_table, bool add_callbacks) } /* - * Thin layer between pure c and pl/PgSQL + * Thin layer between pure C and pl/PgSQL. */ Datum @@ -104,6 +104,64 @@ on_partitions_removed(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } + +/* + * Get parent of a specified partition. + */ +Datum +get_parent_of_partition_pl(PG_FUNCTION_ARGS) +{ + Oid partition = PG_GETARG_OID(0); + PartParentSearch parent_search; + Oid parent; + + /* Fetch parent & write down search status */ + parent = get_parent_of_partition(partition, &parent_search); + + /* We MUST be sure :) */ + Assert(parent_search != PPS_NOT_SURE); + + /* It must be parent known by pg_pathman */ + if (parent_search == PPS_ENTRY_PART_PARENT) + PG_RETURN_OID(parent); + else + { + elog(ERROR, "\%s\" is not pg_pathman's partition", + get_rel_name_or_relid(partition)); + + PG_RETURN_NULL(); + } +} + +/* + * Get type (as text) of a given attribute. + */ +Datum +get_attribute_type_name(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + text *attname = PG_GETARG_TEXT_P(1); + char *result; + HeapTuple tp; + + /* NOTE: for now it's the most efficient way */ + tp = SearchSysCacheAttName(relid, text_to_cstring(attname)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + result = format_type_be(att_tup->atttypid); + ReleaseSysCache(tp); + + PG_RETURN_TEXT_P(cstring_to_text(result)); + } + else + elog(ERROR, "Cannot find type name for attribute \"%s\" " + "of relation \"%s\"", + text_to_cstring(attname), get_rel_name_or_relid(relid)); + + PG_RETURN_NULL(); /* keep compiler happy */ +} + /* * Returns partition oid for specified parent relid and value. * In case when partition doesn't exist try to create one. @@ -171,7 +229,10 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) } /* - * Returns range (min, max) as output parameters. + * Returns range entry (min, max) (in form of array). + * + * arg #1 is the parent's Oid. + * arg #2 is the partition's Oid. */ Datum get_range_by_part_oid(PG_FUNCTION_ARGS) @@ -220,52 +281,11 @@ get_range_by_part_oid(PG_FUNCTION_ARGS) } /* - * Formats range condition for a CHECK CONSTRAINT. - */ -Datum -get_range_condition(PG_FUNCTION_ARGS) -{ - text *attname = PG_GETARG_TEXT_P(0); - - Datum min_bound = PG_GETARG_DATUM(1), - max_bound = PG_GETARG_DATUM(2); - - Oid min_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 1), - max_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 2); - - char *subst_str; /* substitution string */ - char *result; - - /* This is not going to trigger (not now, at least), just for the safety */ - if (min_bound_type != max_bound_type) - elog(ERROR, "Cannot build range condition: " - "boundaries should be of the same type"); - - /* Check if we need single quotes */ - /* TODO: check for primitive types instead, that would be better */ - if (is_date_type_internal(min_bound_type) || - is_string_type_internal(min_bound_type)) - { - subst_str = "%1$s >= '%2$s' AND %1$s < '%3$s'"; - } - else - subst_str = "%1$s >= %2$s AND %1$s < %3$s"; - - /* Create range condition CSTRING */ - result = psprintf(subst_str, - text_to_cstring(attname), - datum_to_cstring(min_bound, min_bound_type), - datum_to_cstring(max_bound, max_bound_type)); - - PG_RETURN_TEXT_P(cstring_to_text(result)); -} - -/* - * Returns N-th range (in form of array) + * Returns N-th range entry (min, max) (in form of array). * - * First argument is the parent relid. - * Second argument is the index of the range (if it is - * negative then the last range will be returned). + * arg #1 is the parent's Oid. + * arg #2 is the index of the range + * (if it is negative then the last range will be returned). */ Datum get_range_by_idx(PG_FUNCTION_ARGS) @@ -306,7 +326,7 @@ get_range_by_idx(PG_FUNCTION_ARGS) } /* - * Returns min value of the first range for relation + * Returns min value of the first range for relation. */ Datum get_min_range_value(PG_FUNCTION_ARGS) @@ -326,7 +346,7 @@ get_min_range_value(PG_FUNCTION_ARGS) } /* - * Returns max value of the last range for relation + * Returns max value of the last range for relation. */ Datum get_max_range_value(PG_FUNCTION_ARGS) @@ -390,9 +410,12 @@ check_overlap(PG_FUNCTION_ARGS) PG_RETURN_BOOL(false); } + /* - * Returns hash function OID for specified type + * HASH-related stuff. */ + +/* Returns hash function's OID for a specified type. */ Datum get_type_hash_func(PG_FUNCTION_ARGS) { @@ -404,39 +427,91 @@ get_type_hash_func(PG_FUNCTION_ARGS) PG_RETURN_OID(tce->hash_proc); } +/* Wrapper for hash_to_part_index() */ Datum -get_hash(PG_FUNCTION_ARGS) +get_hash_part_idx(PG_FUNCTION_ARGS) { uint32 value = PG_GETARG_UINT32(0), part_count = PG_GETARG_UINT32(1); - PG_RETURN_UINT32(make_hash(value, part_count)); + PG_RETURN_UINT32(hash_to_part_index(value, part_count)); } +/* + * Traits. + */ + Datum -get_attribute_type_name(PG_FUNCTION_ARGS) +is_date_type(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(is_date_type_internal(PG_GETARG_OID(0))); +} + +Datum +is_attribute_nullable(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); text *attname = PG_GETARG_TEXT_P(1); - char *result; + bool result = true; HeapTuple tp; - /* NOTE: for now it's the most efficient way */ tp = SearchSysCacheAttName(relid, text_to_cstring(attname)); if (HeapTupleIsValid(tp)) { Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); - result = format_type_be(att_tup->atttypid); + result = !att_tup->attnotnull; ReleaseSysCache(tp); - - PG_RETURN_TEXT_P(cstring_to_text(result)); } else elog(ERROR, "Cannot find type name for attribute \"%s\" " "of relation \"%s\"", text_to_cstring(attname), get_rel_name_or_relid(relid)); - PG_RETURN_NULL(); /* keep compiler happy */ + PG_RETURN_BOOL(result); /* keep compiler happy */ +} + + +/* + * Useful string builders. + */ + +/* Build range condition for a CHECK CONSTRAINT. */ +Datum +build_range_condition(PG_FUNCTION_ARGS) +{ + text *attname = PG_GETARG_TEXT_P(0); + + Datum min_bound = PG_GETARG_DATUM(1), + max_bound = PG_GETARG_DATUM(2); + + Oid min_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 1), + max_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + + char *subst_str; /* substitution string */ + char *result; + + /* This is not going to trigger (not now, at least), just for the safety */ + if (min_bound_type != max_bound_type) + elog(ERROR, "Cannot build range condition: " + "boundaries should be of the same type"); + + /* Check if we need single quotes */ + /* TODO: check for primitive types instead, that would be better */ + if (is_date_type_internal(min_bound_type) || + is_string_type_internal(min_bound_type)) + { + subst_str = "%1$s >= '%2$s' AND %1$s < '%3$s'"; + } + else + subst_str = "%1$s >= %2$s AND %1$s < %3$s"; + + /* Create range condition CSTRING */ + result = psprintf(subst_str, + text_to_cstring(attname), + datum_to_cstring(min_bound, min_bound_type), + datum_to_cstring(max_bound, max_bound_type)); + + PG_RETURN_TEXT_P(cstring_to_text(result)); } Datum @@ -479,63 +554,6 @@ build_check_constraint_name_attname(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(cstring_to_text(result)); } -Datum -is_date_type(PG_FUNCTION_ARGS) -{ - PG_RETURN_BOOL(is_date_type_internal(PG_GETARG_OID(0))); -} - -Datum -is_attribute_nullable(PG_FUNCTION_ARGS) -{ - Oid relid = PG_GETARG_OID(0); - text *attname = PG_GETARG_TEXT_P(1); - bool result = true; - HeapTuple tp; - - tp = SearchSysCacheAttName(relid, text_to_cstring(attname)); - if (HeapTupleIsValid(tp)) - { - Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); - result = !att_tup->attnotnull; - ReleaseSysCache(tp); - } - else - elog(ERROR, "Cannot find type name for attribute \"%s\" " - "of relation \"%s\"", - text_to_cstring(attname), get_rel_name_or_relid(relid)); - - PG_RETURN_BOOL(result); /* keep compiler happy */ -} - -/* - * Get parent of a specified partition. - */ -Datum -parent_of_partition(PG_FUNCTION_ARGS) -{ - Oid partition = PG_GETARG_OID(0); - PartParentSearch parent_search; - Oid parent; - - /* Fetch parent & write down search status */ - parent = get_parent_of_partition(partition, &parent_search); - - /* We MUST be sure :) */ - Assert(parent_search != PPS_NOT_SURE); - - /* It must be parent known by pg_pathman */ - if (parent_search == PPS_ENTRY_PART_PARENT) - PG_RETURN_OID(parent); - else - { - elog(ERROR, "\%s\" is not pg_pathman's partition", - get_rel_name_or_relid(partition)); - - PG_RETURN_NULL(); - } -} - /* * NOTE: used for DEBUG, set breakpoint here. */ From 4e5e2fb2104bb97deb55e547a1cb820e0a670c78 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 17 Aug 2016 17:51:28 +0300 Subject: [PATCH 047/184] introduce functions build_update_trigger[_func]_name() --- expected/pg_pathman.out | 24 +++++++++--------- hash.sql | 48 +++++++++++++++++++---------------- init.sql | 22 ++++++++++------ range.sql | 56 ++++++++++++++++++++++------------------- src/pl_funcs.c | 37 +++++++++++++++++++++++++++ 5 files changed, 120 insertions(+), 67 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 37a10d0f53..d6fa0b215e 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -118,9 +118,9 @@ SET pg_pathman.enable_runtimemergeappend = OFF; VACUUM; /* update triggers test */ SELECT pathman.create_hash_update_trigger('test.hash_rel'); - create_hash_update_trigger ----------------------------- - + create_hash_update_trigger +----------------------------- + test.hash_rel_upd_trig_func (1 row) UPDATE test.hash_rel SET value = 7 WHERE value = 6; @@ -139,9 +139,9 @@ SELECT * FROM test.hash_rel WHERE value = 7; (1 row) SELECT pathman.create_range_update_trigger('test.num_range_rel'); - create_range_update_trigger ------------------------------------------- - test.num_range_rel_update_trigger_func() + create_range_update_trigger +---------------------------------- + test.num_range_rel_upd_trig_func (1 row) UPDATE test.num_range_rel SET id = 3001 WHERE id = 1; @@ -1068,7 +1068,7 @@ SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); (1 row) SELECT pathman.drop_partitions('test.hash_rel', TRUE); -NOTICE: function test.hash_rel_update_trigger_func() does not exist, skipping +NOTICE: function test.hash_rel_upd_trig_func() does not exist, skipping drop_partitions ----------------- 3 @@ -1221,7 +1221,7 @@ SELECT * FROM test."TeSt"; SELECT pathman.create_hash_update_trigger('test."TeSt"'); create_hash_update_trigger ---------------------------- - + test."TeSt_upd_trig_func" (1 row) UPDATE test."TeSt" SET a = 1; @@ -1305,7 +1305,7 @@ SELECT pathman.split_range_partition('test."RangeRel_1"', '2015-01-01'::DATE); (1 row) SELECT pathman.drop_partitions('test."RangeRel"'); -NOTICE: function test.RangeRel_update_trigger_func() does not exist, skipping +NOTICE: function test.RangeRel_upd_trig_func() does not exist, skipping NOTICE: 1 rows copied from test."RangeRel_6" NOTICE: 0 rows copied from test."RangeRel_4" NOTICE: 1 rows copied from test."RangeRel_3" @@ -1341,7 +1341,7 @@ SELECT pathman.create_range_partitions('test."RangeRel"', 'id', 1, 100, 3); (1 row) SELECT pathman.drop_partitions('test."RangeRel"'); -NOTICE: function test.RangeRel_update_trigger_func() does not exist, skipping +NOTICE: function test.RangeRel_upd_trig_func() does not exist, skipping NOTICE: 0 rows copied from test."RangeRel_3" NOTICE: 0 rows copied from test."RangeRel_2" NOTICE: 0 rows copied from test."RangeRel_1" @@ -1500,7 +1500,7 @@ EXPLAIN (COSTS OFF) DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02 DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02' AND r.id = t.id; /* Create range partitions from whole range */ SELECT drop_partitions('range_rel'); -NOTICE: function public.range_rel_update_trigger_func() does not exist, skipping +NOTICE: function public.range_rel_upd_trig_func() does not exist, skipping NOTICE: 0 rows copied from range_rel_15 NOTICE: 0 rows copied from range_rel_14 NOTICE: 14 rows copied from range_rel_13 @@ -1527,7 +1527,7 @@ SELECT create_partitions_from_range('range_rel', 'id', 1, 1000, 100); (1 row) SELECT drop_partitions('range_rel', TRUE); -NOTICE: function public.range_rel_update_trigger_func() does not exist, skipping +NOTICE: function public.range_rel_upd_trig_func() does not exist, skipping drop_partitions ----------------- 10 diff --git a/hash.sql b/hash.sql index 8d4228dad6..1292083e62 100644 --- a/hash.sql +++ b/hash.sql @@ -78,16 +78,15 @@ SET client_min_messages = WARNING; */ CREATE OR REPLACE FUNCTION @extschema@.create_hash_update_trigger( parent_relid REGCLASS) -RETURNS VOID AS +RETURNS TEXT AS $$ DECLARE - func TEXT := 'CREATE OR REPLACE FUNCTION %s() + func TEXT := 'CREATE OR REPLACE FUNCTION %1$s() RETURNS TRIGGER AS $body$ DECLARE old_idx INTEGER; /* partition indices */ new_idx INTEGER; - q TEXT; BEGIN old_idx := @extschema@.get_hash_part_idx(%9$s(OLD.%2$s), %3$s); @@ -97,11 +96,11 @@ DECLARE RETURN NEW; END IF; - q := format(''DELETE FROM %8$s WHERE %4$s'', old_idx); - EXECUTE q USING %5$s; + EXECUTE format(''DELETE FROM %8$s WHERE %4$s'', old_idx) + USING %5$s; - q := format(''INSERT INTO %8$s VALUES (%6$s)'', new_idx); - EXECUTE q USING %7$s; + EXECUTE format(''INSERT INTO %8$s VALUES (%6$s)'', new_idx) + USING %7$s; RETURN NULL; END $body$ @@ -119,16 +118,19 @@ DECLARE attr TEXT; plain_schema TEXT; plain_relname TEXT; + child_relname_format TEXT; funcname TEXT; triggername TEXT; - child_relname_format TEXT; atttype TEXT; hashfunc TEXT; partitions_count INTEGER; BEGIN - SELECT * INTO plain_schema, plain_relname - FROM @extschema@.get_plain_schema_and_relname(parent_relid); + attr := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; + + IF attr IS NULL THEN + RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); + END IF; SELECT string_agg(attname, ', '), string_agg('OLD.' || attname, ', '), @@ -147,21 +149,21 @@ BEGIN att_val_fmt, att_fmt; - attr := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; - - IF attr IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); - END IF; - partitions_count := COUNT(*) FROM pg_catalog.pg_inherits WHERE inhparent = parent_relid::oid; - /* Function name, trigger name and child relname template */ - funcname := plain_schema || '.' || quote_ident(format('%s_update_trigger_func', plain_relname)); - child_relname_format := plain_schema || '.' || quote_ident(plain_relname || '_%s'); - triggername := quote_ident(format('%s_%s_update_trigger', plain_schema, plain_relname)); + /* Build trigger & trigger function's names */ + funcname := @extschema@.build_update_trigger_func_name(parent_relid); + triggername := @extschema@.build_update_trigger_name(parent_relid); - /* base hash function for type */ + /* Build partition name template */ + SELECT * INTO plain_schema, plain_relname + FROM @extschema@.get_plain_schema_and_relname(parent_relid); + + child_relname_format := quote_ident(plain_schema) || '.' || + quote_ident(plain_relname || '_%s'); + + /* Fetch base hash function for atttype */ atttype := @extschema@.get_attribute_type_name(parent_relid, attr); hashfunc := @extschema@.get_type_hash_func(atttype::regtype)::regproc; @@ -170,7 +172,7 @@ BEGIN old_fields, att_fmt, new_fields, child_relname_format, hashfunc); EXECUTE func; - /* Create triggers on child relations */ + /* Create trigger on every partition */ FOR num IN 0..partitions_count-1 LOOP EXECUTE format(trigger, @@ -178,6 +180,8 @@ BEGIN format(child_relname_format, num), funcname); END LOOP; + + return funcname; END $$ LANGUAGE plpgsql; diff --git a/init.sql b/init.sql index bdf12e5610..5c83360830 100644 --- a/init.sql +++ b/init.sql @@ -240,19 +240,14 @@ LANGUAGE plpgsql; * Drop trigger */ CREATE OR REPLACE FUNCTION @extschema@.drop_triggers( - relation REGCLASS) + parent_relid REGCLASS) RETURNS VOID AS $$ DECLARE - relname TEXT; - schema TEXT; funcname TEXT; BEGIN - SELECT * INTO schema, relname - FROM @extschema@.get_plain_schema_and_relname(relation); - - funcname := schema || '.' || quote_ident(format('%s_update_trigger_func', relname)); + funcname := @extschema@.build_update_trigger_func_name(parent_relid); EXECUTE format('DROP FUNCTION IF EXISTS %s() CASCADE', funcname); END $$ LANGUAGE plpgsql; @@ -375,6 +370,19 @@ CREATE OR REPLACE FUNCTION @extschema@.build_check_constraint_name( RETURNS TEXT AS 'pg_pathman', 'build_check_constraint_name_attname' LANGUAGE C STRICT; +/* + * Build update trigger and its underlying function's names. + */ +CREATE OR REPLACE FUNCTION @extschema@.build_update_trigger_name( + REGCLASS) +RETURNS TEXT AS 'pg_pathman', 'build_update_trigger_name' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION @extschema@.build_update_trigger_func_name( + REGCLASS) +RETURNS TEXT AS 'pg_pathman', 'build_update_trigger_func_name' +LANGUAGE C STRICT; + /* * DEBUG: Place this inside some plpgsql fuction and set breakpoint. */ diff --git a/range.sql b/range.sql index a09e898172..eb676840cd 100644 --- a/range.sql +++ b/range.sql @@ -972,39 +972,40 @@ CREATE OR REPLACE FUNCTION @extschema@.create_range_update_trigger( RETURNS TEXT AS $$ DECLARE - func TEXT := 'CREATE OR REPLACE FUNCTION %s_update_trigger_func() + func TEXT := 'CREATE OR REPLACE FUNCTION %1$s() RETURNS TRIGGER AS $body$ DECLARE - old_oid INTEGER; - new_oid INTEGER; - q TEXT; + old_oid Oid; + new_oid Oid; BEGIN old_oid := TG_RELID; new_oid := @extschema@.find_or_create_range_partition( - ''%1$s''::regclass, NEW.%2$s); + ''%2$s''::regclass, NEW.%3$s); IF old_oid = new_oid THEN RETURN NEW; END IF; - q := format(''DELETE FROM %%s WHERE %4$s'', - old_oid::regclass::text); - EXECUTE q USING %5$s; + EXECUTE format(''DELETE FROM %%s WHERE %5$s'', + old_oid::regclass::text) + USING %6$s; - q := format(''INSERT INTO %%s VALUES (%6$s)'', - new_oid::regclass::text); - EXECUTE q USING %7$s; + EXECUTE format(''INSERT INTO %%s VALUES (%7$s)'', + new_oid::regclass::text) + USING %8$s; RETURN NULL; END $body$ LANGUAGE plpgsql'; - trigger TEXT := 'CREATE TRIGGER %s_update_trigger ' || + trigger TEXT := 'CREATE TRIGGER %s ' || 'BEFORE UPDATE ON %s ' || - 'FOR EACH ROW EXECUTE PROCEDURE %s_update_trigger_func()'; + 'FOR EACH ROW EXECUTE PROCEDURE %s()'; + triggername TEXT; + funcname TEXT; att_names TEXT; old_fields TEXT; new_fields TEXT; @@ -1014,6 +1015,12 @@ DECLARE rec RECORD; BEGIN + attr := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; + + IF attr IS NULL THEN + RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); + END IF; + SELECT string_agg(attname, ', '), string_agg('OLD.' || attname, ', '), string_agg('NEW.' || attname, ', '), @@ -1031,28 +1038,25 @@ BEGIN att_val_fmt, att_fmt; - attr := attname - FROM @extschema@.pathman_config - WHERE partrel = parent_relid; - - IF attr IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); - END IF; + /* Build trigger & trigger function's names */ + funcname := @extschema@.build_update_trigger_func_name(parent_relid); + triggername := @extschema@.build_update_trigger_name(parent_relid); /* Create function for trigger */ - EXECUTE format(func, parent_relid, attr, 0, att_val_fmt, + EXECUTE format(func, funcname, parent_relid, attr, 0, att_val_fmt, old_fields, att_fmt, new_fields); /* Create trigger on every partition */ - FOR rec in (SELECT * FROM pg_inherits WHERE inhparent = parent_relid) + FOR rec in (SELECT * FROM pg_catalog.pg_inherits + WHERE inhparent = parent_relid) LOOP EXECUTE format(trigger, - @extschema@.get_schema_qualified_name(parent_relid, '_'), - rec.inhrelid::regclass, - parent_relid); + triggername, + @extschema@.get_schema_qualified_name(rec.inhrelid), + funcname); END LOOP; - RETURN format('%s_update_trigger_func()', parent_relid); + return funcname; END $$ LANGUAGE plpgsql; diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 179df62792..73681d4ae4 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -40,6 +40,8 @@ PG_FUNCTION_INFO_V1( check_overlap ); PG_FUNCTION_INFO_V1( build_range_condition ); PG_FUNCTION_INFO_V1( build_check_constraint_name_attnum ); PG_FUNCTION_INFO_V1( build_check_constraint_name_attname ); +PG_FUNCTION_INFO_V1( build_update_trigger_func_name ); +PG_FUNCTION_INFO_V1( build_update_trigger_name ); PG_FUNCTION_INFO_V1( is_date_type ); PG_FUNCTION_INFO_V1( is_attribute_nullable ); PG_FUNCTION_INFO_V1( debug_capture ); @@ -554,6 +556,41 @@ build_check_constraint_name_attname(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(cstring_to_text(result)); } +Datum +build_update_trigger_func_name(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0), + nspid; + const char *result; + + /* Check that relation exists */ + if (get_rel_type_id(relid) == InvalidOid) + elog(ERROR, "Invalid relation %u", relid); + + nspid = get_rel_namespace(relid); + result = psprintf("%s.%s", + quote_identifier(get_namespace_name(nspid)), + quote_identifier(psprintf("%s_upd_trig_func", + get_rel_name(relid)))); + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + +Datum +build_update_trigger_name(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + const char *result; /* trigger's name can't be qualified */ + + /* Check that relation exists */ + if (get_rel_type_id(relid) == InvalidOid) + elog(ERROR, "Invalid relation %u", relid); + + result = quote_identifier(psprintf("%s_upd_trig", get_rel_name(relid))); + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + /* * NOTE: used for DEBUG, set breakpoint here. */ From 18167f8c8cff85032965f146fd5b5c455bac7a08 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 17 Aug 2016 18:16:45 +0300 Subject: [PATCH 048/184] Check for PostgreSQL 9.5.4 ver, more comments --- src/pathman.h | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/pathman.h b/src/pathman.h index c471579c26..3daa06f4e1 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -27,16 +27,12 @@ #include "parser/parsetree.h" -/* Check PostgreSQL version */ -/* - * TODO: a fix for WaitForBackgroundWorkerShutdown() - * has been accepted, so we have to update this number. - */ -#if PG_VERSION_NUM < 90503 - #error "Cannot build pg_pathman with PostgreSQL version lower than 9.5.3" +/* Check PostgreSQL version (9.5.4 contains an important fix for BGW) */ +#if PG_VERSION_NUM < 90504 + #error "Cannot build pg_pathman with PostgreSQL version lower than 9.5.4" #endif -/* Print Datum as CString to server log */ +/* Get CString representation of Datum (simple wrapper) */ #ifdef USE_ASSERT_CHECKING #include "utils.h" #define DebugPrintDatum(datum, typid) ( datum_to_cstring((datum), (typid)) ) @@ -59,8 +55,14 @@ /* type modifier (typmod) for 'range_interval' */ #define PATHMAN_CONFIG_interval_typmod -1 +/* + * Cache current PATHMAN_CONFIG relid (set during load_config()). + */ extern Oid pathman_config_relid; +/* + * Just to clarify our intentions (returns pathman_config_relid). + */ Oid get_pathman_config_relid(void); @@ -72,10 +74,12 @@ typedef struct PathmanState LWLock *dsm_init_lock, *load_config_lock, *edit_partitions_lock; - DsmArray databases; } PathmanState; +/* + * Result of search_range_partition_eq(). + */ typedef enum { SEARCH_RANGEREL_OUT_OF_RANGE = 0, @@ -115,7 +119,6 @@ extern PathmanState *pmstate; } while (0) -/* utility functions */ int append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte, int index, Oid childOID, List *wrappers); @@ -140,25 +143,22 @@ void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, typedef struct { - const Node *orig; - List *args; - List *rangeset; - bool found_gap; - double paramsel; + const Node *orig; /* examined expression */ + List *args; /* extracted from 'orig' */ + List *rangeset; /* IndexRanges representing selected parts */ + bool found_gap; /* were there any gaps? */ + double paramsel; /* estimated selectivity */ } WrapperNode; typedef struct { - /* Main partitioning structure */ - const PartRelationInfo *prel; - + const PartRelationInfo *prel; /* main partitioning structure */ ExprContext *econtext; /* for ExecEvalExpr() */ - bool for_insert; /* are we in PartitionFilter now? */ } WalkerContext; /* - * Usual initialization procedure for WalkerContext + * Usual initialization procedure for WalkerContext. */ #define InitWalkerContext(context, prel_info, ecxt, for_ins) \ do { \ @@ -184,6 +184,7 @@ void select_range_partitions(const Datum value, const int strategy, WrapperNode *result); +/* Examine expression in order to select partitions. */ WrapperNode *walk_expr_tree(Expr *expr, WalkerContext *context); #endif /* PATHMAN_H */ From e373da027cb8a433c10e9fc3995c6651c25b2aa5 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 18 Aug 2016 13:24:05 +0300 Subject: [PATCH 049/184] fix 'spawned' in spawn_partitions(), small fixes in pl_funcs.c, get_pathman_relation_info() returns pointer to const PartRelationInfo --- src/hooks.c | 28 ++++---- src/nodes_common.c | 29 ++++---- src/partition_filter.c | 24 +++---- src/pg_pathman.c | 29 ++++---- src/pl_funcs.c | 156 +++++++++++++++++++++-------------------- src/relation_info.c | 10 +-- src/relation_info.h | 8 +-- 7 files changed, 146 insertions(+), 138 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 8c52e1bd23..f692086516 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -36,16 +36,16 @@ pathman_join_pathlist_hook(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { - JoinCostWorkspace workspace; - RangeTblEntry *inner_rte = root->simple_rte_array[innerrel->relid]; - PartRelationInfo *inner_prel; - List *pathkeys = NIL, - *joinclauses, - *otherclauses; - ListCell *lc; - WalkerContext context; - double paramsel; - bool innerrel_rinfo_contains_part_attr; + JoinCostWorkspace workspace; + RangeTblEntry *inner_rte = root->simple_rte_array[innerrel->relid]; + const PartRelationInfo *inner_prel; + List *pathkeys = NIL, + *joinclauses, + *otherclauses; + ListCell *lc; + WalkerContext context; + double paramsel; + bool innerrel_rinfo_contains_part_attr; /* Call hooks set by other extensions */ if (set_join_pathlist_next) @@ -159,10 +159,10 @@ pathman_join_pathlist_hook(PlannerInfo *root, void pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { - PartRelationInfo *prel = NULL; - RangeTblEntry **new_rte_array; - RelOptInfo **new_rel_array; - int len; + const PartRelationInfo *prel; + RangeTblEntry **new_rte_array; + RelOptInfo **new_rel_array; + int len; /* Invoke original hook if needed */ if (set_rel_pathlist_hook_next != NULL) diff --git a/src/nodes_common.c b/src/nodes_common.c index 06593fe7ae..6e783cafa8 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -139,7 +139,7 @@ replace_tlist_varnos(List *child_tlist, RelOptInfo *parent) /* Append partition attribute in case it's not present in target list */ static List * -append_part_attr_to_tlist(List *tlist, Index relno, PartRelationInfo *prel) +append_part_attr_to_tlist(List *tlist, Index relno, const PartRelationInfo *prel) { ListCell *lc; bool part_attr_found = false; @@ -346,9 +346,12 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *custom_plans, CustomScanMethods *scan_methods) { - RuntimeAppendPath *rpath = (RuntimeAppendPath *) best_path; - CustomScan *cscan; - PartRelationInfo *prel = get_pathman_relation_info(rpath->relid); + RuntimeAppendPath *rpath = (RuntimeAppendPath *) best_path; + const PartRelationInfo *prel; + CustomScan *cscan; + + prel = get_pathman_relation_info(rpath->relid); + Assert(prel); cscan = makeNode(CustomScan); cscan->custom_scan_tlist = NIL; /* initial value (empty list) */ @@ -487,27 +490,27 @@ end_append_common(CustomScanState *node) void rescan_append_common(CustomScanState *node) { - RuntimeAppendState *scan_state = (RuntimeAppendState *) node; - ExprContext *econtext = node->ss.ps.ps_ExprContext; - PartRelationInfo *prel; - List *ranges; - ListCell *lc; - Oid *parts; - int nparts; + RuntimeAppendState *scan_state = (RuntimeAppendState *) node; + ExprContext *econtext = node->ss.ps.ps_ExprContext; + const PartRelationInfo *prel; + List *ranges; + ListCell *lc; + Oid *parts; + int nparts; prel = get_pathman_relation_info(scan_state->relid); Assert(prel); + /* First we select all available partitions... */ ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); InitWalkerContext(&scan_state->wcxt, prel, econtext, false); - foreach (lc, scan_state->custom_exprs) { WrapperNode *wn; + /* ... then we cut off irrelevant ones using the provided clauses */ wn = walk_expr_tree((Expr *) lfirst(lc), &scan_state->wcxt); - ranges = irange_list_intersect(ranges, wn->rangeset); } diff --git a/src/partition_filter.c b/src/partition_filter.c index 5cfa608afa..db168f4cdf 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -153,18 +153,18 @@ partition_filter_exec(CustomScanState *node) if (!TupIsNull(slot)) { - PartRelationInfo *prel; + const PartRelationInfo *prel; - MemoryContext old_cxt; + MemoryContext old_cxt; - List *ranges; - int nparts; - Oid *parts; - Oid selected_partid; + List *ranges; + int nparts; + Oid *parts; + Oid selected_partid; - WalkerContext wcxt; - bool isnull; - Datum value; + WalkerContext wcxt; + bool isnull; + Datum value; /* Fetch PartRelationInfo for this partitioned relation */ prel = get_pathman_relation_info(state->partitioned_table); @@ -390,9 +390,9 @@ partition_filter_visitor(Plan *plan, void *context) forboth (lc1, modify_table->plans, lc2, modify_table->resultRelations) { - Index rindex = lfirst_int(lc2); - Oid relid = getrelid(rindex, rtable); - PartRelationInfo *prel = get_pathman_relation_info(relid); + Index rindex = lfirst_int(lc2); + Oid relid = getrelid(rindex, rtable); + const PartRelationInfo *prel = get_pathman_relation_info(relid); /* Check that table is partitioned */ if (prel) diff --git a/src/pg_pathman.c b/src/pg_pathman.c index e4e747da5f..e48ed4862d 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -175,10 +175,10 @@ _PG_init(void) void disable_inheritance(Query *parse) { - ListCell *lc; - RangeTblEntry *rte; - PartRelationInfo *prel; - MemoryContext oldcontext; + const PartRelationInfo *prel; + RangeTblEntry *rte; + MemoryContext oldcontext; + ListCell *lc; /* If query contains CTE (WITH statement) then handle subqueries too */ disable_inheritance_cte(parse); @@ -301,12 +301,12 @@ disable_inheritance_subselect_walker(Node *node, void *context) void handle_modification_query(Query *parse) { - PartRelationInfo *prel; - List *ranges; - RangeTblEntry *rte; - WrapperNode *wrap; - Expr *expr; - WalkerContext context; + const PartRelationInfo *prel; + List *ranges; + RangeTblEntry *rte; + WrapperNode *wrap; + Expr *expr; + WalkerContext context; Assert(parse->commandType == CMD_UPDATE || parse->commandType == CMD_DELETE); @@ -725,7 +725,7 @@ spawn_partitions(Oid partitioned_rel, /* parent's Oid */ FmgrInfo interval_move_bound; /* function to move upper\lower boundary */ bool interval_move_bound_cached = false; /* is it cached already? */ - bool done = false; + bool spawned = false; Datum cur_part_leading = leading_bound; @@ -737,7 +737,7 @@ spawn_partitions(Oid partitioned_rel, /* parent's Oid */ get_namespace_name(get_pathman_schema())); /* Execute comparison function cmp(value, cur_part_leading) */ - while ((done = do_compare(cmp_proc, value, cur_part_leading, forward))) + while (do_compare(cmp_proc, value, cur_part_leading, forward)) { char *nulls = NULL; /* no params are NULL */ Oid types[3] = { REGCLASSOID, leading_bound_type, leading_bound_type }; @@ -786,11 +786,14 @@ spawn_partitions(Oid partitioned_rel, /* parent's Oid */ DebugPrintDatum(cur_part_leading, leading_bound_type), MyProcPid); #endif + + /* We have spawned at least 1 partition */ + spawned = true; } pfree(query); - return done; + return spawned; } /* diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 73681d4ae4..9ca1b5825b 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -171,13 +171,13 @@ get_attribute_type_name(PG_FUNCTION_ARGS) Datum find_or_create_range_partition(PG_FUNCTION_ARGS) { - Oid parent_oid = PG_GETARG_OID(0); - Datum value = PG_GETARG_DATUM(1); - Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); - PartRelationInfo *prel; - FmgrInfo cmp_func; - RangeEntry found_rentry; - search_rangerel_result search_state; + Oid parent_oid = PG_GETARG_OID(0); + Datum value = PG_GETARG_DATUM(1); + Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); + const PartRelationInfo *prel; + FmgrInfo cmp_func; + RangeEntry found_rentry; + search_rangerel_result search_state; prel = get_pathman_relation_info(parent_oid); @@ -239,47 +239,38 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) Datum get_range_by_part_oid(PG_FUNCTION_ARGS) { - Oid parent_oid = PG_GETARG_OID(0); - Oid child_oid = PG_GETARG_OID(1); - const int nelems = 2; - uint32 i; - bool found = false; - Datum *elems; - PartRelationInfo *prel; - RangeEntry *ranges; - Oid *parts; - TypeCacheEntry *tce; - ArrayType *arr; + Oid parent_oid = PG_GETARG_OID(0); + Oid child_oid = PG_GETARG_OID(1); + uint32 i; + RangeEntry *ranges; + const PartRelationInfo *prel; prel = get_pathman_relation_info(parent_oid); - if (!prel) - PG_RETURN_NULL(); + elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", + get_rel_name_or_relid(parent_oid)); ranges = PrelGetRangesArray(prel); - parts = PrelGetChildrenArray(prel); - tce = lookup_type_cache(prel->atttype, 0); - /* Looking for specified partition */ + /* Look for the specified partition */ for (i = 0; i < PrelChildrenCount(prel); i++) - if (parts[i] == child_oid) + if (ranges[i].child_oid == child_oid) { - found = true; - break; - } + ArrayType *arr; + Datum elems[2] = { ranges[i].min, ranges[i].max }; - if (found) - { - elems = palloc(nelems * sizeof(Datum)); - elems[0] = ranges[i].min; - elems[1] = ranges[i].max; + arr = construct_array(elems, 2, prel->atttype, + prel->attlen, prel->attbyval, + prel->attalign); - arr = construct_array(elems, nelems, prel->atttype, - tce->typlen, tce->typbyval, tce->typalign); - PG_RETURN_ARRAYTYPE_P(arr); - } + PG_RETURN_ARRAYTYPE_P(arr); + } - PG_RETURN_NULL(); + elog(ERROR, "Relation \"%s\" has no partition \"%s\"", + get_rel_name_or_relid(parent_oid), + get_rel_name_or_relid(child_oid)); + + PG_RETURN_NULL(); /* keep compiler happy */ } /* @@ -292,33 +283,30 @@ get_range_by_part_oid(PG_FUNCTION_ARGS) Datum get_range_by_idx(PG_FUNCTION_ARGS) { - Oid parent_oid = PG_GETARG_OID(0); - int idx = PG_GETARG_INT32(1); - PartRelationInfo *prel; - RangeEntry *ranges; - RangeEntry re; - Datum *elems; + Oid parent_oid = PG_GETARG_OID(0); + int idx = PG_GETARG_INT32(1); + Datum elems[2]; + RangeEntry *ranges; + const PartRelationInfo *prel; prel = get_pathman_relation_info(parent_oid); if (!prel) - elog(ERROR, "Cannot get partitioning cache entry for relation \"%s\"", + elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", get_rel_name_or_relid(parent_oid)); if (((uint32) abs(idx)) >= PrelChildrenCount(prel)) - elog(ERROR, "Partition #%d does not exist (max is #%u)", - idx, PrelChildrenCount(prel) - 1); + elog(ERROR, "Partition #%d does not exist (total amount is %u)", + idx, PrelChildrenCount(prel)); ranges = PrelGetRangesArray(prel); - if (idx >= 0) - re = ranges[idx]; - else if(idx == -1) - re = ranges[PrelChildrenCount(prel) - 1]; - else + + if (idx == -1) + idx = PrelChildrenCount(prel) - 1; + else if (idx < -1) elog(ERROR, "Negative indices other than -1 (last partition) are not allowed"); - elems = palloc(2 * sizeof(Datum)); - elems[0] = re.min; - elems[1] = re.max; + elems[0] = ranges[idx].min; + elems[1] = ranges[idx].max; PG_RETURN_ARRAYTYPE_P(construct_array(elems, 2, prel->atttype, @@ -333,14 +321,19 @@ get_range_by_idx(PG_FUNCTION_ARGS) Datum get_min_range_value(PG_FUNCTION_ARGS) { - Oid parent_oid = PG_GETARG_OID(0); - PartRelationInfo *prel; - RangeEntry *ranges; + Oid parent_oid = PG_GETARG_OID(0); + RangeEntry *ranges; + const PartRelationInfo *prel; prel = get_pathman_relation_info(parent_oid); + if (!prel) + elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", + get_rel_name_or_relid(parent_oid)); - if (!prel || prel->parttype != PT_RANGE || PrelChildrenCount(prel) == 0) - PG_RETURN_NULL(); + if (prel->parttype != PT_RANGE) + if (!prel) + elog(ERROR, "Relation \"%s\" is not partitioned by RANGE", + get_rel_name_or_relid(parent_oid)); ranges = PrelGetRangesArray(prel); @@ -353,15 +346,19 @@ get_min_range_value(PG_FUNCTION_ARGS) Datum get_max_range_value(PG_FUNCTION_ARGS) { - Oid parent_oid = PG_GETARG_OID(0); - PartRelationInfo *prel; - RangeEntry *ranges; + Oid parent_oid = PG_GETARG_OID(0); + RangeEntry *ranges; + const PartRelationInfo *prel; prel = get_pathman_relation_info(parent_oid); + if (!prel) + elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", + get_rel_name_or_relid(parent_oid)); - /* TODO: separate all these checks, they look ugly together */ - if (!prel || prel->parttype != PT_RANGE || PrelChildrenCount(prel) == 0) - PG_RETURN_NULL(); + if (prel->parttype != PT_RANGE) + if (!prel) + elog(ERROR, "Relation \"%s\" is not partitioned by RANGE", + get_rel_name_or_relid(parent_oid)); ranges = PrelGetRangesArray(prel); @@ -375,25 +372,30 @@ get_max_range_value(PG_FUNCTION_ARGS) Datum check_overlap(PG_FUNCTION_ARGS) { - Oid parent_oid = PG_GETARG_OID(0); + Oid parent_oid = PG_GETARG_OID(0); - Datum p1 = PG_GETARG_DATUM(1), - p2 = PG_GETARG_DATUM(2); + Datum p1 = PG_GETARG_DATUM(1), + p2 = PG_GETARG_DATUM(2); - Oid p1_type = get_fn_expr_argtype(fcinfo->flinfo, 1), - p2_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + Oid p1_type = get_fn_expr_argtype(fcinfo->flinfo, 1), + p2_type = get_fn_expr_argtype(fcinfo->flinfo, 2); - FmgrInfo cmp_func_1, - cmp_func_2; + FmgrInfo cmp_func_1, + cmp_func_2; - PartRelationInfo *prel; - RangeEntry *ranges; - uint32 i; + uint32 i; + RangeEntry *ranges; + const PartRelationInfo *prel; prel = get_pathman_relation_info(parent_oid); + if (!prel) + elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", + get_rel_name_or_relid(parent_oid)); - if (!prel || prel->parttype != PT_RANGE) - PG_RETURN_NULL(); + if (prel->parttype != PT_RANGE) + if (!prel) + elog(ERROR, "Relation \"%s\" is not partitioned by RANGE", + get_rel_name_or_relid(parent_oid)); /* comparison functions */ fill_type_cmp_fmgr_info(&cmp_func_1, p1_type, prel->atttype); diff --git a/src/relation_info.c b/src/relation_info.c index 55ebe65fd9..81f7797117 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -61,7 +61,7 @@ static Oid get_parent_of_partition_internal(Oid partition, */ /* Create or update PartRelationInfo in local cache. Might emit ERROR. */ -PartRelationInfo * +const PartRelationInfo * refresh_pathman_relation_info(Oid relid, PartType partitioning_type, const char *part_column_name) @@ -200,12 +200,12 @@ invalidate_pathman_relation_info(Oid relid, bool *found) } /* Get PartRelationInfo from local cache. */ -PartRelationInfo * +const PartRelationInfo * get_pathman_relation_info(Oid relid) { - PartRelationInfo *prel = hash_search(partitioned_rels, - (const void *) &relid, - HASH_FIND, NULL); + const PartRelationInfo *prel = hash_search(partitioned_rels, + (const void *) &relid, + HASH_FIND, NULL); /* Refresh PartRelationInfo if needed */ if (prel && !PrelIsValid(prel)) diff --git a/src/relation_info.h b/src/relation_info.h index 2437ba093f..e113d0d19e 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -115,12 +115,12 @@ typedef enum #define PrelIsValid(prel) ( (prel) && (prel)->valid ) -PartRelationInfo *refresh_pathman_relation_info(Oid relid, - PartType partitioning_type, - const char *part_column_name); +const PartRelationInfo *refresh_pathman_relation_info(Oid relid, + PartType partitioning_type, + const char *part_column_name); void invalidate_pathman_relation_info(Oid relid, bool *found); void remove_pathman_relation_info(Oid relid); -PartRelationInfo *get_pathman_relation_info(Oid relid); +const PartRelationInfo *get_pathman_relation_info(Oid relid); void delay_pathman_shutdown(void); void delay_invalidation_parent_rel(Oid parent); From 9af063089026152b398e307a616ed1425aafed39 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 19 Aug 2016 19:23:02 +0300 Subject: [PATCH 050/184] merge concurrent partitioning into picky nodes, first stage --- hash.sql | 11 +- init.sql | 176 +++++++++++++-- sql/pg_pathman.sql | 3 + src/hooks.c | 28 ++- src/init.c | 95 +++++++- src/init.h | 2 + src/pathman.h | 12 +- src/pg_pathman.c | 7 + src/pl_funcs.c | 76 ++++++- src/relation_info.c | 25 ++- src/relation_info.h | 2 + src/worker.c | 496 +++++++++++++++++++++++++++++++++++------ tests/PostgresNode.pyc | Bin 0 -> 8572 bytes 13 files changed, 839 insertions(+), 94 deletions(-) create mode 100644 tests/PostgresNode.pyc diff --git a/hash.sql b/hash.sql index 1292083e62..36c551ec9d 100644 --- a/hash.sql +++ b/hash.sql @@ -14,7 +14,8 @@ CREATE OR REPLACE FUNCTION @extschema@.create_hash_partitions( parent_relid REGCLASS, attribute TEXT, - partitions_count INTEGER + partitions_count INTEGER, + partition_data BOOLEAN DEFAULT true ) RETURNS INTEGER AS $$ DECLARE @@ -23,6 +24,7 @@ DECLARE v_plain_schema TEXT; v_plain_relname TEXT; v_hashfunc TEXT; + v_enable_parent BOOLEAN := NOT partition_data; BEGIN PERFORM @extschema@.validate_relname(parent_relid); @@ -66,7 +68,12 @@ BEGIN PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - PERFORM @extschema@.partition_data(parent_relid); + IF partition_data = true THEN + PERFORM @extschema@.disable_parent(parent_relid); + PERFORM @extschema@.partition_data(parent_relid); + ELSE + PERFORM @extschema@.enable_parent(parent_relid); + END IF; RETURN partitions_count; END diff --git a/init.sql b/init.sql index 5c83360830..e3034225f8 100644 --- a/init.sql +++ b/init.sql @@ -24,37 +24,179 @@ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config ( parttype INTEGER NOT NULL, range_interval TEXT, - CHECK (parttype >= 1 OR parttype <= 2) /* check for allowed part types */ + CHECK (parttype IN (1, 2)) /* check for allowed part types */ ); +CREATE TABLE IF NOT EXISTS @extschema@.pathman_config_params ( + partrel REGCLASS NOT NULL, + enable_parent BOOLEAN NOT NULL DEFAULT TRUE +); +CREATE UNIQUE INDEX i_pathman_config_params +ON @extschema@.pathman_config_params(partrel); SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config', ''); +SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config_params', ''); + + +CREATE OR REPLACE FUNCTION @extschema@.on_enable_parent(relid OID) +RETURNS OID AS 'pg_pathman' LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION @extschema@.on_disable_parent(relid OID) +RETURNS OID AS 'pg_pathman' LANGUAGE C STRICT; + +/* Include parent relation into query plan's for specified relation */ +CREATE OR REPLACE FUNCTION @extschema@.enable_parent(relation REGCLASS) +RETURNS VOID AS +$$ +BEGIN + INSERT INTO @extschema@.pathman_config_params values (relation, True) + ON CONFLICT (partrel) DO + UPDATE SET enable_parent = True; + + PERFORM @extschema@.on_enable_parent(relation::oid); +END +$$ +LANGUAGE plpgsql; + +/* Do not include parent relation into query plan's for specified relation */ +CREATE OR REPLACE FUNCTION @extschema@.disable_parent(relation REGCLASS) +RETURNS VOID AS +$$ +BEGIN + INSERT INTO @extschema@.pathman_config_params values (relation, False) + ON CONFLICT (partrel) DO + UPDATE SET enable_parent = False; + + PERFORM @extschema@.on_disable_parent(relation::oid); +END +$$ +LANGUAGE plpgsql; +/* + * Partitioning data tools + */ +CREATE OR REPLACE FUNCTION @extschema@.active_workers() +RETURNS TABLE ( + pid INT, + dbid INT, + relid INT, + processed INT, + status TEXT +) AS 'pg_pathman' LANGUAGE C STRICT; + +CREATE OR REPLACE VIEW @extschema@.pathman_active_workers +AS SELECT * FROM @extschema@.active_workers(); + +CREATE OR REPLACE FUNCTION @extschema@.partition_data_worker(relation regclass) +RETURNS VOID AS 'pg_pathman' LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION @extschema@.stop_worker(relation regclass) +RETURNS BOOL AS 'pg_pathman' LANGUAGE C STRICT; + +/* PathmanRange type */ +CREATE OR REPLACE FUNCTION @extschema@.pathman_range_in(cstring) + RETURNS PathmanRange + AS 'pg_pathman' + LANGUAGE C IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION @extschema@.pathman_range_out(PathmanRange) + RETURNS cstring + AS 'pg_pathman' + LANGUAGE C IMMUTABLE STRICT; + +/* +CREATE OR REPLACE FUNCTION @extschema@.get_whole_range(relid OID) + RETURNS PathmanRange + AS 'pg_pathman' + LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION @extschema@.range_value_cmp(range PathmanRange, value ANYELEMENT) + RETURNS INTEGER + AS 'pg_pathman' + LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION @extschema@.range_lower(range PathmanRange, dummy ANYELEMENT) + RETURNS ANYELEMENT + AS 'pg_pathman' + LANGUAGE C; + +CREATE OR REPLACE FUNCTION @extschema@.range_upper(range PathmanRange, dummy ANYELEMENT) + RETURNS ANYELEMENT + AS 'pg_pathman' + LANGUAGE C; + +CREATE OR REPLACE FUNCTION @extschema@.range_oid(range PathmanRange) + RETURNS OID + AS 'pg_pathman' + LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION @extschema@.range_partitions_list(parent_relid OID) + RETURNS SETOF PATHMANRANGE AS 'pg_pathman' + LANGUAGE C STRICT; +*/ +CREATE TYPE @extschema@.PathmanRange ( + internallength = 32, + input = pathman_range_in, + output = pathman_range_out +); /* * Copy rows to partitions */ CREATE OR REPLACE FUNCTION @extschema@.partition_data( - parent_relid REGCLASS, - OUT p_total BIGINT) + p_relation regclass + , p_min ANYELEMENT DEFAULT NULL::text + , p_max ANYELEMENT DEFAULT NULL::text + , p_limit INT DEFAULT NULL + , OUT p_total BIGINT) AS $$ DECLARE - relname TEXT; - rec RECORD; - cnt BIGINT := 0; - + v_attr TEXT; + v_limit_clause TEXT := ''; + v_where_clause TEXT := ''; BEGIN - p_total := 0; - - /* Create partitions and copy rest of the data */ - EXECUTE format('WITH part_data AS (DELETE FROM ONLY %1$s RETURNING *) - INSERT INTO %1$s SELECT * FROM part_data', - @extschema@.get_schema_qualified_name(parent_relid)); - - /* Get number of inserted rows */ - GET DIAGNOSTICS p_total = ROW_COUNT; - RETURN; + SELECT attname INTO v_attr + FROM @extschema@.pathman_config WHERE partrel = p_relation; + + PERFORM @extschema@.debug_capture(); + + p_total := 0; + + /* Format LIMIT clause if needed */ + IF NOT p_limit IS NULL THEN + v_limit_clause := format('LIMIT %s', p_limit); + END IF; + + /* Format WHERE clause if needed */ + IF NOT p_min IS NULL THEN + v_where_clause := format('%1$s >= $1', v_attr); + END IF; + + IF NOT p_max IS NULL THEN + IF NOT p_min IS NULL THEN + v_where_clause := v_where_clause || ' AND '; + END IF; + v_where_clause := v_where_clause || format('%1$s < $2', v_attr); + END IF; + + IF v_where_clause != '' THEN + v_where_clause := 'WHERE ' || v_where_clause; + END IF; + + /* Lock rows and copy data */ + RAISE NOTICE 'Copying data to partitions...'; + EXECUTE format(' + WITH data AS ( + DELETE FROM ONLY %1$s WHERE ctid IN ( + SELECT ctid FROM ONLY %1$s %2$s %3$s FOR UPDATE NOWAIT + ) RETURNING *) + INSERT INTO %1$s SELECT * FROM data' + , p_relation, v_where_clause, v_limit_clause) + USING p_min, p_max; + + GET DIAGNOSTICS p_total = ROW_COUNT; + RETURN; END $$ LANGUAGE plpgsql; diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 0dcfccce9a..f54f08640c 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -1,3 +1,6 @@ + +--:gdb + \set VERBOSITY terse CREATE SCHEMA pathman; diff --git a/src/hooks.c b/src/hooks.c index f692086516..d5d4be167e 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -246,11 +246,12 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb /* * Expand simple_rte_array and simple_rel_array */ + len = irange_list_length(ranges); + if (prel->enable_parent) + len++; - if (ranges) + if (len > 0) { - len = irange_list_length(ranges); - /* Expand simple_rel_array and simple_rte_array */ new_rel_array = (RelOptInfo **) palloc0((root->simple_rel_array_size + len) * sizeof(RelOptInfo *)); @@ -275,6 +276,10 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb root->simple_rte_array = new_rte_array; } + /* Add parent if needed */ + if (prel->enable_parent) + append_child_relation(root, rel, rti, rte, i, rte->relid, NULL); + /* * Iterate all indexes in rangeset and append corresponding child * relations. @@ -448,14 +453,31 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) void pathman_post_parse_analysis_hook(ParseState *pstate, Query *query) { + elog(DEBUG2, "Called parse hook [%u]", MyProcPid); + /* Invoke original hook if needed */ if (post_parse_analyze_hook_next) post_parse_analyze_hook_next(pstate, query); + /* Finish delayed invalidation jobs */ if (IsPathmanReady()) finish_delayed_invalidation(); + elog(DEBUG2, "post_parse: %d %d %u [%u]", + IsPathmanEnabled(), + initialization_needed, + get_pathman_schema(), + MyProcPid); + + /* DEBUG!!!! */ + // static int parse_sleep = 10; + // if (IsPathmanEnabled() && + // initialization_needed && + // get_pathman_schema() == InvalidOid) + // sleep(parse_sleep); + /* -------------------- */ + /* Load config if pg_pathman exists & it's still necessary */ if (IsPathmanEnabled() && initialization_needed && diff --git a/src/init.c b/src/init.c index b2f76cbc71..b6043a532d 100644 --- a/src/init.c +++ b/src/init.c @@ -53,6 +53,8 @@ bool initialization_needed = true; static bool relcache_callback_needed = true; +static bool init_pathman_relation_oids(void); +static void fini_pathman_relation_oids(void); static void init_local_cache(void); static void fini_local_cache(void); static void read_pathman_config(void); @@ -83,8 +85,18 @@ static int oid_cmp(const void *p1, const void *p2); void load_config(void) { - /* Cache PATHMAN_CONFIG relation's Oid */ - pathman_config_relid = get_relname_relid(PATHMAN_CONFIG, get_pathman_schema()); + elog(DEBUG2, "pg_pathman's config!!! [%u]", MyProcPid); + + /* + * Try to cache important relids. + * + * Once CREATE EXTENSION stmt is processed, get_pathman_schema() + * function starts to return perfectly valid schema Oid, which + * means we have to check that ALL pg_pathman's relations' Oids + * have been cached properly. + */ + if (!init_pathman_relation_oids()) + return; /* remain 'uninitialized', exit */ init_local_cache(); /* create 'partitioned_rels' hash table */ read_pathman_config(); /* read PATHMAN_CONFIG table & fill cache */ @@ -108,8 +120,8 @@ load_config(void) void unload_config(void) { - /* Don't forget to reset cached PATHMAN_CONFIG relation's Oid */ - pathman_config_relid = InvalidOid; + /* Don't forget to reset pg_pathman's cached relids */ + fini_pathman_relation_oids(); fini_local_cache(); /* destroy 'partitioned_rels' hash table */ @@ -128,6 +140,39 @@ estimate_pathman_shmem_size(void) return estimate_dsm_config_size() + MAXALIGN(sizeof(PathmanState)); } +/* + * TODO: write some comment; + */ +static bool +init_pathman_relation_oids(void) +{ + Oid schema = get_pathman_schema(); + + /* Cache PATHMAN_CONFIG relation's Oid */ + pathman_config_params_relid = get_relname_relid(PATHMAN_CONFIG_PARAMS, + schema); + pathman_config_relid = get_relname_relid(PATHMAN_CONFIG, schema); + + /* Return false if any relation doesn't exist yet */ + if (pathman_config_params_relid == InvalidOid || + pathman_config_relid == InvalidOid) + { + return false; + } + + return true; +} + +/* + * TODO: write some comment; + */ +static void +fini_pathman_relation_oids(void) +{ + pathman_config_relid = InvalidOid; + pathman_config_params_relid = InvalidOid; +} + /* * Initialize per-process resources. */ @@ -465,7 +510,7 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, snapshot = RegisterSnapshot(GetLatestSnapshot()); scan = heap_beginscan(rel, snapshot, 1, key); - while((htup = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((htup = heap_getnext(scan, ForwardScanDirection)) != NULL) { contains_rel = true; /* found partitioned table */ @@ -508,6 +553,46 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, return contains_rel; } +/* + * Return 'enable_parent' parameter of relation + */ +bool +read_enable_parent_parameter(Oid relid) +{ + Relation rel; + HeapScanDesc scan; + ScanKeyData key[1]; + Snapshot snapshot; + HeapTuple htup; + bool result = false; + + ScanKeyInit(&key[0], + Anum_pathman_config_params_partrel, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + rel = heap_open(get_pathman_config_params_relid(), AccessShareLock); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = heap_beginscan(rel, snapshot, 1, key); + + if ((htup = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + Datum values[Natts_pathman_config_params]; + bool isnull[Natts_pathman_config_params]; + + /* Extract data if necessary */ + heap_deform_tuple(htup, RelationGetDescr(rel), values, isnull); + result = values[Anum_pathman_config_params_enable_parent - 1]; + } + + /* Clean resources */ + heap_endscan(scan); + UnregisterSnapshot(snapshot); + heap_close(rel, AccessShareLock); + + return result; +} + /* * Go through the PATHMAN_CONFIG table and create PartRelationInfo entries. */ diff --git a/src/init.h b/src/init.h index 010b302a12..bfcb072665 100644 --- a/src/init.h +++ b/src/init.h @@ -45,4 +45,6 @@ bool pathman_config_contains_relation(Oid relid, bool *isnull, TransactionId *xmin); +bool read_enable_parent_parameter(Oid relid); + #endif diff --git a/src/pathman.h b/src/pathman.h index 3daa06f4e1..7d51f9261e 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -55,16 +55,26 @@ /* type modifier (typmod) for 'range_interval' */ #define PATHMAN_CONFIG_interval_typmod -1 +/* + * Definitions for the "pathman_config_params" table + */ +#define PATHMAN_CONFIG_PARAMS "pathman_config_params" +// #define PATHMAN_CONFIG_PARAMS_INDEX "i_pathman_config_params" +#define Natts_pathman_config_params 2 +#define Anum_pathman_config_params_partrel 1 /* primary key */ +#define Anum_pathman_config_params_enable_parent 2 /* include parent into plan */ + /* * Cache current PATHMAN_CONFIG relid (set during load_config()). */ extern Oid pathman_config_relid; +extern Oid pathman_config_params_relid; /* * Just to clarify our intentions (returns pathman_config_relid). */ Oid get_pathman_config_relid(void); - +Oid get_pathman_config_params_relid(void); /* * pg_pathman's global state. diff --git a/src/pg_pathman.c b/src/pg_pathman.c index e48ed4862d..eed678d41e 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -51,6 +51,7 @@ List *inheritance_enabled_relids = NIL; bool pg_pathman_enable = true; PathmanState *pmstate; Oid pathman_config_relid = InvalidOid; +Oid pathman_config_params_relid = InvalidOid; /* pg module functions */ @@ -2102,3 +2103,9 @@ get_pathman_config_relid(void) { return pathman_config_relid; } + +Oid +get_pathman_config_params_relid(void) +{ + return pathman_config_params_relid; +} \ No newline at end of file diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 9ca1b5825b..a3e13ccbe5 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -23,10 +23,14 @@ #include "utils/memutils.h" +#include "miscadmin.h" + /* declarations */ PG_FUNCTION_INFO_V1( on_partitions_created ); PG_FUNCTION_INFO_V1( on_partitions_updated ); PG_FUNCTION_INFO_V1( on_partitions_removed ); +PG_FUNCTION_INFO_V1( on_enable_parent ); +PG_FUNCTION_INFO_V1( on_disable_parent ); PG_FUNCTION_INFO_V1( get_parent_of_partition_pl ); PG_FUNCTION_INFO_V1( get_attribute_type_name ); PG_FUNCTION_INFO_V1( find_or_create_range_partition); @@ -46,6 +50,31 @@ PG_FUNCTION_INFO_V1( is_date_type ); PG_FUNCTION_INFO_V1( is_attribute_nullable ); PG_FUNCTION_INFO_V1( debug_capture ); +/* pathman_range type */ +typedef struct PathmanRange +{ + Oid type_oid; + bool by_val; + RangeEntry range; +} PathmanRange; + +typedef struct PathmanHash +{ + Oid child_oid; + uint32 hash; +} PathmanHash; + +typedef struct PathmanRangeListCtxt +{ + Oid type_oid; + bool by_val; + RangeEntry *ranges; + int nranges; + int pos; +} PathmanRangeListCtxt; + +PG_FUNCTION_INFO_V1( pathman_range_in ); +PG_FUNCTION_INFO_V1( pathman_range_out ); static void on_partitions_created_internal(Oid partitioned_table, bool add_callbacks); static void on_partitions_updated_internal(Oid partitioned_table, bool add_callbacks); @@ -164,6 +193,24 @@ get_attribute_type_name(PG_FUNCTION_ARGS) PG_RETURN_NULL(); /* keep compiler happy */ } +Datum +on_enable_parent(PG_FUNCTION_ARGS) +{ + Oid relid = DatumGetObjectId(PG_GETARG_DATUM(0)); + + set_enable_parent(relid, true); + PG_RETURN_NULL(); +} + +Datum +on_disable_parent(PG_FUNCTION_ARGS) +{ + Oid relid = DatumGetObjectId(PG_GETARG_DATUM(0)); + + set_enable_parent(relid, false); + PG_RETURN_NULL(); +} + /* * Returns partition oid for specified parent relid and value. * In case when partition doesn't exist try to create one. @@ -599,8 +646,35 @@ build_update_trigger_name(PG_FUNCTION_ARGS) Datum debug_capture(PG_FUNCTION_ARGS) { + static float8 sleep_time = 0; + DirectFunctionCall1(pg_sleep, Float8GetDatum(sleep_time)); + /* Write something (doesn't really matter) */ - elog(WARNING, "debug_capture"); + elog(WARNING, "debug_capture [%u]", MyProcPid); PG_RETURN_VOID(); } + +Datum +pathman_range_in(PG_FUNCTION_ARGS) +{ + elog(ERROR, "Not implemented"); +} + +Datum +pathman_range_out(PG_FUNCTION_ARGS) +{ + PathmanRange *rng = (PathmanRange *) PG_GETARG_POINTER(0); + char *result; + char *left, + *right; + Oid outputfunc; + bool typisvarlena; + + getTypeOutputInfo(rng->type_oid, &outputfunc, &typisvarlena); + left = OidOutputFunctionCall(outputfunc, PATHMAN_GET_DATUM(rng->range.min, rng->by_val)); + right = OidOutputFunctionCall(outputfunc, PATHMAN_GET_DATUM(rng->range.max, rng->by_val)); + + result = psprintf("[%s: %s)", left, right); + PG_RETURN_CSTRING(result); +} diff --git a/src/relation_info.c b/src/relation_info.c index 81f7797117..a86e9a5d21 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -156,6 +156,12 @@ refresh_pathman_relation_info(Oid relid, pfree(prel_children); + /* + * Read additional parameter ('enable_parent' is the only one at + * the moment) + */ + prel->enable_parent = read_enable_parent_parameter(relid); + /* We've successfully built a cache entry */ prel->valid = true; @@ -217,7 +223,7 @@ get_pathman_relation_info(Oid relid) if (pathman_config_contains_relation(relid, values, isnull, NULL)) { PartType part_type; - const char *attname; + const char *attname; /* We can't use 'part_type' & 'attname' from invalid prel */ part_type = DatumGetPartType(values[Anum_pathman_config_parttype - 1]); @@ -225,7 +231,9 @@ get_pathman_relation_info(Oid relid) /* Refresh partitioned table cache entry */ /* TODO: possible refactoring, pass found 'prel' instead of searching */ - prel = refresh_pathman_relation_info(relid, part_type, attname); + prel = refresh_pathman_relation_info(relid, + part_type, + attname); Assert(PrelIsValid(prel)); /* it MUST be valid if we got here */ } /* Else clear remaining cache entry */ @@ -263,6 +271,19 @@ remove_pathman_relation_info(Oid relid) relid, MyProcPid); } +void +set_enable_parent(Oid relid, bool flag) +{ + PartRelationInfo *prel; + + prel = hash_search(partitioned_rels, + (const void *) &relid, + HASH_FIND, NULL); + if (!prel) + elog(ERROR, "Relation %s isn't handled by pg_pathman", get_rel_name(relid)); + + prel->enable_parent = flag; +} /* * Functions for delayed invalidation. diff --git a/src/relation_info.h b/src/relation_info.h index e113d0d19e..b8591f46e4 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -46,6 +46,7 @@ typedef struct { Oid key; /* partitioned table's Oid */ bool valid; /* is this entry valid? */ + bool enable_parent; /* include parent to the plan */ uint32 children_count; Oid *children; /* Oids of child partitions */ @@ -121,6 +122,7 @@ const PartRelationInfo *refresh_pathman_relation_info(Oid relid, void invalidate_pathman_relation_info(Oid relid, bool *found); void remove_pathman_relation_info(Oid relid); const PartRelationInfo *get_pathman_relation_info(Oid relid); +void set_enable_parent(Oid relid, bool flag); void delay_pathman_shutdown(void); void delay_invalidation_parent_rel(Oid parent); diff --git a/src/worker.c b/src/worker.c index 12351968d6..1a0bbe5fc3 100644 --- a/src/worker.c +++ b/src/worker.c @@ -20,15 +20,26 @@ #include "miscadmin.h" #include "postmaster/bgworker.h" #include "storage/dsm.h" +#include "storage/ipc.h" +#include "storage/latch.h" #include "utils/datum.h" #include "utils/typcache.h" #include "utils/lsyscache.h" #include "utils/resowner.h" +#include "utils/builtins.h" +#include "utils/snapmgr.h" +#include "funcapi.h" +#define WORKER_SLOTS 10 static void bg_worker_load_config(const char *bgw_name); -static void bg_worker_main(Datum main_arg); +static void create_partitions_bg_worker_main(Datum main_arg); +static void partition_data_bg_worker_main(Datum main_arg); +static void handle_sigterm(SIGNAL_ARGS); +PG_FUNCTION_INFO_V1( partition_data_worker ); +PG_FUNCTION_INFO_V1( active_workers ); +PG_FUNCTION_INFO_V1( stop_worker ); static const char *create_partitions_bgw = "CreatePartitionsWorker"; @@ -50,7 +61,25 @@ typedef struct uint8 value[FLEXIBLE_ARRAY_MEMBER]; } PartitionArgs; +typedef enum WorkerStatus +{ + WS_FREE = 0, + WS_WORKING, + WS_STOPPING +} WorkerStatus; + +typedef struct PartitionDataArgs +{ + WorkerStatus status; + Oid dbid; + Oid relid; + uint32 batch_size; + uint32 batch_count; + pid_t pid; + size_t total_rows; +} PartitionDataArgs; +PartitionDataArgs *slots; /* * Useful datum packing\unpacking functions for BGW. @@ -96,7 +125,79 @@ UnpackDatumFromByteArray(Datum *datum, Size datum_size, bool typbyval, memcpy(dst, byte_array, datum_size); } +/* + * Common function to start background worker + */ +static void +start_bg_worker(char name[BGW_MAXLEN], + bgworker_main_type main_func, + uint32 arg, + bool wait) +{ +#define HandleError(condition, new_state) \ + if (condition) { exec_state = (new_state); goto handle_exec_state; } + + /* Execution state to be checked */ + enum + { + BGW_OK = 0, /* everything is fine (default) */ + BGW_COULD_NOT_START, /* could not start worker */ + BGW_PM_DIED /* postmaster died */ + } exec_state = BGW_OK; + + BackgroundWorker worker; + BackgroundWorkerHandle *bgw_handle; + BgwHandleStatus bgw_status; + bool bgw_started; + pid_t pid; + + /* Initialize worker struct */ + memcpy(worker.bgw_name, name, BGW_MAXLEN); + worker.bgw_flags = BGWORKER_SHMEM_ACCESS | + BGWORKER_BACKEND_DATABASE_CONNECTION; + worker.bgw_start_time = BgWorkerStart_RecoveryFinished; + worker.bgw_restart_time = BGW_NEVER_RESTART; + worker.bgw_main = main_func; + worker.bgw_main_arg = arg; + worker.bgw_notify_pid = MyProcPid; + + /* Start dynamic worker */ + bgw_started = RegisterDynamicBackgroundWorker(&worker, &bgw_handle); + HandleError(bgw_started == false, BGW_COULD_NOT_START); + + /* Wait till the worker starts */ + bgw_status = WaitForBackgroundWorkerStartup(bgw_handle, &pid); + HandleError(bgw_status == BGWH_POSTMASTER_DIED, BGW_PM_DIED); + + // elog(NOTICE, "worker pid: %u", pid); + // sleep(30); + + if(wait) + { + /* Wait till the worker finishes job */ + bgw_status = WaitForBackgroundWorkerShutdown(bgw_handle); + HandleError(bgw_status == BGWH_POSTMASTER_DIED, BGW_PM_DIED); + } +/* end execution */ +handle_exec_state: + + switch (exec_state) + { + case BGW_COULD_NOT_START: + elog(ERROR, "Unable to create background worker for pg_pathman"); + break; + + case BGW_PM_DIED: + ereport(ERROR, + (errmsg("Postmaster died during the pg_pathman background worker process"), + errhint("More details may be available in the server log."))); + break; + + default: + break; + } +} /* * Initialize pg_pathman's local config in BGW process. @@ -155,84 +256,28 @@ create_partitions_bg_worker_segment(Oid relid, Datum value, Oid value_type) Oid create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) { -#define HandleError(condition, new_state) \ - if (condition) { exec_state = (new_state); goto handle_bg_exec_state; } - - /* Execution state to be checked */ - enum - { - BGW_OK = 0, /* everything is fine (default) */ - BGW_COULD_NOT_START, /* could not start worker */ - BGW_PM_DIED /* postmaster died */ - } exec_state = BGW_OK; - - BackgroundWorker worker; - BackgroundWorkerHandle *bgw_handle; - BgwHandleStatus bgw_status; - bool bgw_started; dsm_segment *segment; dsm_handle segment_handle; - pid_t pid; PartitionArgs *args; Oid child_oid = InvalidOid; - /* Create a dsm segment for the worker to pass arguments */ segment = create_partitions_bg_worker_segment(relid, value, value_type); segment_handle = dsm_segment_handle(segment); args = (PartitionArgs *) dsm_segment_address(segment); - /* Initialize worker struct */ - worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; - worker.bgw_start_time = BgWorkerStart_RecoveryFinished; - worker.bgw_restart_time = BGW_NEVER_RESTART; - worker.bgw_notify_pid = MyProcPid; - worker.bgw_main_arg = UInt32GetDatum(segment_handle); - worker.bgw_main = bg_worker_main; - - /* Set worker's name */ - memcpy((void *) &worker.bgw_name, - (const void *) create_partitions_bgw, - strlen(create_partitions_bgw)); - - /* Start dynamic worker */ - bgw_started = RegisterDynamicBackgroundWorker(&worker, &bgw_handle); - HandleError(bgw_started == false, BGW_COULD_NOT_START); - - /* Wait till the worker starts */ - bgw_status = WaitForBackgroundWorkerStartup(bgw_handle, &pid); - HandleError(bgw_status == BGWH_POSTMASTER_DIED, BGW_PM_DIED); - - /* Wait till the worker finishes job */ - bgw_status = WaitForBackgroundWorkerShutdown(bgw_handle); - HandleError(bgw_status == BGWH_POSTMASTER_DIED, BGW_PM_DIED); + /* Start worker and wait for it to finish */ + start_bg_worker("create partitions worker", + create_partitions_bg_worker_main, + UInt32GetDatum(segment_handle), + true); /* Save the result (partition Oid) */ child_oid = args->result; - -/* end execution */ -handle_bg_exec_state: - /* Free dsm segment */ dsm_detach(segment); - switch (exec_state) - { - case BGW_COULD_NOT_START: - elog(ERROR, "Unable to create background worker for pg_pathman"); - break; - - case BGW_PM_DIED: - ereport(ERROR, - (errmsg("Postmaster died during the pg_pathman's background worker process"), - errhint("More details may be available in the server log."))); - break; - - default: - break; - } - if (child_oid == InvalidOid) elog(ERROR, "Attempt to append new partitions to relation \"%s\" failed", @@ -245,7 +290,7 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) * Main worker routine. Accepts dsm_handle as an argument */ static void -bg_worker_main(Datum main_arg) +create_partitions_bg_worker_main(Datum main_arg) { dsm_handle handle = DatumGetUInt32(main_arg); dsm_segment *segment; @@ -297,3 +342,328 @@ bg_worker_main(Datum main_arg) dsm_detach(segment); } + +/* + * Starts background worker that redistributes data. Function returns + * immediately + */ +Datum +partition_data_worker( PG_FUNCTION_ARGS ) +{ + Oid relid = PG_GETARG_OID(0); + int empty_slot_idx = -1; + int i; + PartitionDataArgs *args = NULL; + + /* TODO: lock would be nice */ + + /* Check if relation is a partitioned table */ + if (get_pathman_relation_info(relid) == NULL) + elog(ERROR, + "Relation '%s' isn't partitioned by pg_pathman", + get_rel_name(relid)); + + /* + * Look for empty slot and also check that partitioning data for this table + * hasn't already starded + */ + for (i=0; istatus = WS_WORKING; + args->dbid = MyDatabaseId; + args->relid = relid; + args->total_rows = 0; + + /* start worker and wait for it to finish */ + start_bg_worker("partition data worker", + partition_data_bg_worker_main, + empty_slot_idx, + false); + elog(NOTICE, + "Worker started. You can stop it with the following command: " + "select stop_worker('%s');", + get_rel_name(relid)); + + PG_RETURN_VOID(); +} + +/* + * When we receive a SIGTERM, we set InterruptPending and ProcDiePending just + * like a normal backend. The next CHECK_FOR_INTERRUPTS() will do the right + * thing. + */ +static void +handle_sigterm(SIGNAL_ARGS) +{ + int save_errno = errno; + + SetLatch(MyLatch); + + if (!proc_exit_inprogress) + { + InterruptPending = true; + ProcDiePending = true; + } + + errno = save_errno; +} + +/* + * Main worker routine. Accepts dsm_handle as an argument + */ +static void +partition_data_bg_worker_main(Datum main_arg) +{ + PartitionDataArgs *args; + char *sql = NULL; + Oid types[2] = { OIDOID, INT4OID }; + Datum vals[2]; + bool nulls[2] = { false, false }; + int rows; + int slot_idx = DatumGetInt32(main_arg); + MemoryContext worker_context = CurrentMemoryContext; + int failures_count = 0; + bool failed; + + /* Create resource owner */ + CurrentResourceOwner = ResourceOwnerCreate(NULL, "PartitionDataWorker"); + + args = &slots[slot_idx]; + args->pid = MyProcPid; + vals[0] = args->relid; + vals[1] = 10000; + + /* Establish signal handlers before unblocking signals. */ + pqsignal(SIGTERM, handle_sigterm); + + /* We're now ready to receive signals */ + BackgroundWorkerUnblockSignals(); + + /* Establish connection and start transaction */ + BackgroundWorkerInitializeConnectionByOid(args->dbid, InvalidOid); + + bg_worker_load_config("PartitionDataWorker"); + + do + { + failed = false; + rows = 0; + StartTransactionCommand(); + SPI_connect(); + PushActiveSnapshot(GetTransactionSnapshot()); + + if (sql == NULL) + { + MemoryContext oldcontext; + + /* + * Allocate as SQL query in top memory context because current + * context will be destroyed after transaction finishes + */ + oldcontext = MemoryContextSwitchTo(worker_context); + sql = psprintf("SELECT %s.partition_data($1::oid, p_limit:=$2)", + get_namespace_name(get_pathman_schema())); + MemoryContextSwitchTo(oldcontext); + } + + PG_TRY(); + { + int ret; + bool isnull; + + ret = SPI_execute_with_args(sql, 2, types, vals, nulls, false, 0); + if (ret > 0) + { + TupleDesc tupdesc = SPI_tuptable->tupdesc; + HeapTuple tuple = SPI_tuptable->vals[0]; + + Assert(SPI_processed == 1); + + rows = DatumGetInt32(SPI_getbinval(tuple, tupdesc, 1, &isnull)); + } + } + PG_CATCH(); + { + EmitErrorReport(); + FlushErrorState(); + + elog(WARNING, "Error #%u", failures_count); + /* + * The most common exception we can catch here is a deadlock with + * concurrent user queries. Check that attempts count doesn't exceed + * some reasonable value + */ + if (100 <= failures_count++) + { + pfree(sql); + args->status = WS_FREE; + elog(ERROR, "Failures count exceeded 100. Finishing..."); + exit(1); + } + failed = true; + } + PG_END_TRY(); + + SPI_finish(); + PopActiveSnapshot(); + if (failed) + { + /* abort transaction and sleep for a second */ + AbortCurrentTransaction(); + DirectFunctionCall1(pg_sleep, Float8GetDatum(1)); + } + else + { + /* Reset failures counter and commit transaction */ + CommitTransactionCommand(); + failures_count = 0; + args->total_rows += rows; + } + + /* If other backend requested to stop worker then quit */ + if (args->status == WS_STOPPING) + break; + } + while(rows > 0 || failed); /* do while there is still rows to relocate */ + + pfree(sql); + args->status = WS_FREE; +} + +/* Function context for active_workers() SRF */ +typedef struct PartitionDataListCtxt +{ + int cur_idx; +} PartitionDataListCtxt; + +/* + * Returns list of active workers for partitioning data. Each record + * contains pid, relation name and number of processed rows + */ +Datum +active_workers(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + MemoryContext oldcontext; + TupleDesc tupdesc; + PartitionDataListCtxt *userctx; + int i; + Datum result; + + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + /* Switch context when allocating stuff to be used in later calls */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + userctx = (PartitionDataListCtxt *) palloc(sizeof(PartitionDataListCtxt)); + userctx->cur_idx = 0; + funcctx->user_fctx = (void *) userctx; + + /* Create tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(5, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "dbid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "processed", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "status", + TEXTOID, -1, 0); + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + userctx = (PartitionDataListCtxt *) funcctx->user_fctx; + + /* + * Iterate through worker slots + */ + for (i=userctx->cur_idx; iattinmeta, values); + + result = HeapTupleGetDatum(tuple); + userctx->cur_idx = i + 1; + SRF_RETURN_NEXT(funcctx, result); + } + } + + SRF_RETURN_DONE(funcctx); +} + +Datum +stop_worker(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int i; + + for (i = 0; i < WORKER_SLOTS; i++) + if (slots[i].status != WS_FREE && + slots[i].relid == relid && + slots[i].dbid == MyDatabaseId) + { + slots[i].status = WS_STOPPING; + elog(NOTICE, + "Worker will stop after current batch finished"); + PG_RETURN_BOOL(true); + } + + elog(ERROR, + "Worker for relation '%s' not found", + get_rel_name(relid)); +} diff --git a/tests/PostgresNode.pyc b/tests/PostgresNode.pyc new file mode 100644 index 0000000000000000000000000000000000000000..379ba1682dcb7920314acbe17c3e43efd1074cbc GIT binary patch literal 8572 zcmcgx?Q+~!89v%wukDRp=c`FSLaUT!vMFgkN=r*gN)sn7X21<{nWS!ukfpP0*;;9> zbGEiS$q#6M!3<>v{_~$1E`b@i0j_`!yJz;|)%vJ?BU|de8g3AJ2Q# z{O4llAAkGP-B?wBGx&WQhxy1;D#3qGWlF8rZBMOx);$R=r~ks=s(A&?4~mgL7#ZttMxSVZX!he`*nh<@N`KgY z)otKpIUI(0G|*vaJJNCQ3PX1nhLp6-chI`LSq${$Ptz>X<>g^nZ0XpV%fo&+jO^wh z$}d}OFmR_ZwqI(qS|s|?a6jNAr*Rm*v+db=`~=#JGP|bjS7qJAWhX0)kY?ry7_hO1 zG+Kn7fXxtmHrj-r4Ztn%ug?*BjsR^zf4xKGIRaY|@YffpizAfl9h~l#(Qv3szX!E1 zk4l3dTlhLl2WcKz?I&rkr%Rp3z+pq%U9IymRDP5v6;v^oI6js+_tQAt_mo*sR?(o* zz7M5~C#`h}(5l1Y2{dmQqe~)qyDW>+wlNfoEbf*T`bKBHfUY_L=n&Xc7Lewt4Z|~> zp6^iJf)`V?@gXbW6^QPHHcz3qT;x{oSs&0xedrD*VT-uPoM?PF;1Z@IDXk_9HQrc; zp^uK3lWN42iwEB)f*wNyf^OlzL<$cUv?dz-uTnc4M7Nd-jglPDXxo-LzTZ$$C-be21Z_hPww1m;Dw$EzJfQm>W*PGI5MSD>QEGJHHvJe+{3 zz`-RS6p$WJ3;2Q)u!iG-IDvUi*#;c~K@mPtS=FP(Z(;_A3t!qp(AX?;Z5Pp5v&GP@ z7hWS6JFxf&LJ$QzbO5tdaSlVjh+he%szuNHmYFLIt2czLCm^XU^}4M1#%M;V{n|O6Fkvo z1o7Y$Tg*cfFPGLK@?k(tJcGk<@;05kRsEaAZ-qA~zkP14xAPtp>! zj&Ss~m7>6mHozo6e2tiXys6{u(7jloHp48nu4T(~82EU$OT{`ozloO2heyO7acU)BYHTp^2%+`NnbY6uh|YJ>}KhoUdNyJyk zhK-}thANzbnyN1~IV>G!US4AmsH=2vIOvCuH`OUvJy3<=CoP)!7q8LGol(>9J>^v$ z*!-jeyBF&}#j(36+J15mr?oEguptfb@8}Z6*??tmYLgA8U)MI@;4)klESJ+LOFxZR zN%_49icJg?2oM-nwdqcFiGfFaA(GJ0Gy8e$U-ye%uVa{GvZ9Z)h1zd`q5~Jee>#RC zmjMs`bEf0`CtwjD5H~UJ7&itKND5yWDu$SX>fS9um|bvV*Et9Yn)pKrhhtiIAyMjb zHzOpSj#)6&NwJ4tQHg3T!8O|6QghKej=;F+NkXA;v=%XqYiQJNQuzZxhG9rJB*-Vn zAd7i844cHTcAMW|mK!LsbHp!30C5=KE!C&-V{}{l=O04oJoiD)BZO^kL;~6QPRFnQ zZh%Ygy2(=T>caZb$QA?OP7L;aR`hwI6TZrBY9Ay7iKu>j;+4Ux5N_4wAVH419RZnX zcLLK@bj7FLG{JXy4=Lhwpds*1V2SMoy3S!|P5_yh;hKS~Mupb(pB%P|U_eK_urF>` zdG2lTTp4lI7o8QX0GlXvsv)=a*KAZ;h7@YrKk=MB8#7Zj6W@qjw1$~hdhvF@Pr zC1Vw41T$sYz!?bw(cc3<1*2*+mVlHXnIKajXRwV%yJ~#qH7eEwD^|jz@CXu!@TmK3 zb$?d%72lxS*q4AXG&|WOoQ!jN)4;i&T6K+(e%;ZEjvulQ*X`J+txoA$zi$K2Gi ze~2b#_oH$grp!gmHN+kUQU{_I#K1OVR3>A}hh9CCKe>uY5HKA(tFKEIp(qmaUcPt3 z?l@zM-M|pZtO{*h>$r)z4!G?ZXwzjm#>6|4(`ci2R_@%c($-+WdK4*Bc%xGVRU?c|o0vwXpp4)vWi)VfwBnrsnVO_XiiGyzBLQFeD;$QgPR%WO&o`yq z^A?(jwlrWGFMpSD7=|Ecz~n<`3SZ!^!V29+_hWYP* zvqP7%2nfx`jN(}9?H*AcJmV8_9?zSMj0#1W-f2n}VBG#`Ta@7?#&!ppGNG z*>RMVsuAvTfW)tG?1wy26M{!To;?*Xq)IVSPluG|517_4dZ_lZh|VV_XpjpE8htce zl}3F->nLe~$pIFMh(fZdTCN5|_`ubQha%qzm92OvFJK5&*DP=HKx)RV8GuJKFz0cq z!6Pm%jBw-MSvg!Eg#h8GOW}bOryex+08DHSA~U zhSXu;MLE8%)I6SZ&}n2Iw-?Ij=E@DX!Q{5VD6XTGHDuz9)r4Tg;z#(b5Rg?*1;5~@ znCXu=gaD|+SC)1K4J-|7Iqix*#5wpBA{-cFm4+*S@l=FsjVh$A${vDS)NxFbMaG7a zF=;>7*xEsr6j0OhiL%uFl;0?EAxA=R*~Zab$M))7A8``iGtt2zu&x~lm4dwauaL;m zl~(8266&3HBgPO}!ek4LRDbV9YUS3)XVe21`aM;;B2^-dw==8o|1LribdpTArK#88 zBc4cQ#gbjk|1)%_v!*%BL}nNw=KP&)4k)!3NR^PG%rV~lnPel`n-z-+XwyN+)6aP# zb^+BJNEnd_!jR#VkH!@Be$9O#e~U1iS8-av$5Q#%YI;XuPwspYY~q>5spg57PV8gj R_}Tec-h)8cKGyDd{|30k8GHZ$ literal 0 HcmV?d00001 From 17944ce3d8cdd732b7a7af4db2c2fb4ca38ee2d4 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 19 Aug 2016 19:24:31 +0300 Subject: [PATCH 051/184] cleanup --- tests/PostgresNode.pyc | Bin 8572 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/PostgresNode.pyc diff --git a/tests/PostgresNode.pyc b/tests/PostgresNode.pyc deleted file mode 100644 index 379ba1682dcb7920314acbe17c3e43efd1074cbc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8572 zcmcgx?Q+~!89v%wukDRp=c`FSLaUT!vMFgkN=r*gN)sn7X21<{nWS!ukfpP0*;;9> zbGEiS$q#6M!3<>v{_~$1E`b@i0j_`!yJz;|)%vJ?BU|de8g3AJ2Q# z{O4llAAkGP-B?wBGx&WQhxy1;D#3qGWlF8rZBMOx);$R=r~ks=s(A&?4~mgL7#ZttMxSVZX!he`*nh<@N`KgY z)otKpIUI(0G|*vaJJNCQ3PX1nhLp6-chI`LSq${$Ptz>X<>g^nZ0XpV%fo&+jO^wh z$}d}OFmR_ZwqI(qS|s|?a6jNAr*Rm*v+db=`~=#JGP|bjS7qJAWhX0)kY?ry7_hO1 zG+Kn7fXxtmHrj-r4Ztn%ug?*BjsR^zf4xKGIRaY|@YffpizAfl9h~l#(Qv3szX!E1 zk4l3dTlhLl2WcKz?I&rkr%Rp3z+pq%U9IymRDP5v6;v^oI6js+_tQAt_mo*sR?(o* zz7M5~C#`h}(5l1Y2{dmQqe~)qyDW>+wlNfoEbf*T`bKBHfUY_L=n&Xc7Lewt4Z|~> zp6^iJf)`V?@gXbW6^QPHHcz3qT;x{oSs&0xedrD*VT-uPoM?PF;1Z@IDXk_9HQrc; zp^uK3lWN42iwEB)f*wNyf^OlzL<$cUv?dz-uTnc4M7Nd-jglPDXxo-LzTZ$$C-be21Z_hPww1m;Dw$EzJfQm>W*PGI5MSD>QEGJHHvJe+{3 zz`-RS6p$WJ3;2Q)u!iG-IDvUi*#;c~K@mPtS=FP(Z(;_A3t!qp(AX?;Z5Pp5v&GP@ z7hWS6JFxf&LJ$QzbO5tdaSlVjh+he%szuNHmYFLIt2czLCm^XU^}4M1#%M;V{n|O6Fkvo z1o7Y$Tg*cfFPGLK@?k(tJcGk<@;05kRsEaAZ-qA~zkP14xAPtp>! zj&Ss~m7>6mHozo6e2tiXys6{u(7jloHp48nu4T(~82EU$OT{`ozloO2heyO7acU)BYHTp^2%+`NnbY6uh|YJ>}KhoUdNyJyk zhK-}thANzbnyN1~IV>G!US4AmsH=2vIOvCuH`OUvJy3<=CoP)!7q8LGol(>9J>^v$ z*!-jeyBF&}#j(36+J15mr?oEguptfb@8}Z6*??tmYLgA8U)MI@;4)klESJ+LOFxZR zN%_49icJg?2oM-nwdqcFiGfFaA(GJ0Gy8e$U-ye%uVa{GvZ9Z)h1zd`q5~Jee>#RC zmjMs`bEf0`CtwjD5H~UJ7&itKND5yWDu$SX>fS9um|bvV*Et9Yn)pKrhhtiIAyMjb zHzOpSj#)6&NwJ4tQHg3T!8O|6QghKej=;F+NkXA;v=%XqYiQJNQuzZxhG9rJB*-Vn zAd7i844cHTcAMW|mK!LsbHp!30C5=KE!C&-V{}{l=O04oJoiD)BZO^kL;~6QPRFnQ zZh%Ygy2(=T>caZb$QA?OP7L;aR`hwI6TZrBY9Ay7iKu>j;+4Ux5N_4wAVH419RZnX zcLLK@bj7FLG{JXy4=Lhwpds*1V2SMoy3S!|P5_yh;hKS~Mupb(pB%P|U_eK_urF>` zdG2lTTp4lI7o8QX0GlXvsv)=a*KAZ;h7@YrKk=MB8#7Zj6W@qjw1$~hdhvF@Pr zC1Vw41T$sYz!?bw(cc3<1*2*+mVlHXnIKajXRwV%yJ~#qH7eEwD^|jz@CXu!@TmK3 zb$?d%72lxS*q4AXG&|WOoQ!jN)4;i&T6K+(e%;ZEjvulQ*X`J+txoA$zi$K2Gi ze~2b#_oH$grp!gmHN+kUQU{_I#K1OVR3>A}hh9CCKe>uY5HKA(tFKEIp(qmaUcPt3 z?l@zM-M|pZtO{*h>$r)z4!G?ZXwzjm#>6|4(`ci2R_@%c($-+WdK4*Bc%xGVRU?c|o0vwXpp4)vWi)VfwBnrsnVO_XiiGyzBLQFeD;$QgPR%WO&o`yq z^A?(jwlrWGFMpSD7=|Ecz~n<`3SZ!^!V29+_hWYP* zvqP7%2nfx`jN(}9?H*AcJmV8_9?zSMj0#1W-f2n}VBG#`Ta@7?#&!ppGNG z*>RMVsuAvTfW)tG?1wy26M{!To;?*Xq)IVSPluG|517_4dZ_lZh|VV_XpjpE8htce zl}3F->nLe~$pIFMh(fZdTCN5|_`ubQha%qzm92OvFJK5&*DP=HKx)RV8GuJKFz0cq z!6Pm%jBw-MSvg!Eg#h8GOW}bOryex+08DHSA~U zhSXu;MLE8%)I6SZ&}n2Iw-?Ij=E@DX!Q{5VD6XTGHDuz9)r4Tg;z#(b5Rg?*1;5~@ znCXu=gaD|+SC)1K4J-|7Iqix*#5wpBA{-cFm4+*S@l=FsjVh$A${vDS)NxFbMaG7a zF=;>7*xEsr6j0OhiL%uFl;0?EAxA=R*~Zab$M))7A8``iGtt2zu&x~lm4dwauaL;m zl~(8266&3HBgPO}!ek4LRDbV9YUS3)XVe21`aM;;B2^-dw==8o|1LribdpTArK#88 zBc4cQ#gbjk|1)%_v!*%BL}nNw=KP&)4k)!3NR^PG%rV~lnPel`n-z-+XwyN+)6aP# zb^+BJNEnd_!jR#VkH!@Be$9O#e~U1iS8-av$5Q#%YI;XuPwspYY~q>5spg57PV8gj R_}Tec-h)8cKGyDd{|30k8GHZ$ From a3e9ee5691d4fc0b121789774d539d64a5247086 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 19 Aug 2016 20:04:18 +0300 Subject: [PATCH 052/184] regression tests are ok --- hash.sql | 3 -- init.sql | 126 +++++++++++++++++++++++++++------------------ range.sql | 32 +++++++++--- sql/pg_pathman.sql | 3 -- 4 files changed, 101 insertions(+), 63 deletions(-) diff --git a/hash.sql b/hash.sql index 36c551ec9d..8716a0790a 100644 --- a/hash.sql +++ b/hash.sql @@ -24,7 +24,6 @@ DECLARE v_plain_schema TEXT; v_plain_relname TEXT; v_hashfunc TEXT; - v_enable_parent BOOLEAN := NOT partition_data; BEGIN PERFORM @extschema@.validate_relname(parent_relid); @@ -71,8 +70,6 @@ BEGIN IF partition_data = true THEN PERFORM @extschema@.disable_parent(parent_relid); PERFORM @extschema@.partition_data(parent_relid); - ELSE - PERFORM @extschema@.enable_parent(parent_relid); END IF; RETURN partitions_count; diff --git a/init.sql b/init.sql index e3034225f8..c14b0986c9 100644 --- a/init.sql +++ b/init.sql @@ -140,63 +140,91 @@ CREATE TYPE @extschema@.PathmanRange ( output = pathman_range_out ); +/* + * Copy rows to partitions + */ +-- CREATE OR REPLACE FUNCTION @extschema@.partition_data( +-- p_relation regclass +-- , p_min ANYELEMENT DEFAULT NULL::text +-- , p_max ANYELEMENT DEFAULT NULL::text +-- , p_limit INT DEFAULT NULL +-- , OUT p_total BIGINT) +-- AS +-- $$ +-- DECLARE +-- v_attr TEXT; +-- v_limit_clause TEXT := ''; +-- v_where_clause TEXT := ''; +-- BEGIN +-- SELECT attname INTO v_attr +-- FROM @extschema@.pathman_config WHERE partrel = p_relation; + +-- PERFORM @extschema@.debug_capture(); + +-- p_total := 0; + +-- /* Format LIMIT clause if needed */ +-- IF NOT p_limit IS NULL THEN +-- v_limit_clause := format('LIMIT %s', p_limit); +-- END IF; + +-- /* Format WHERE clause if needed */ +-- IF NOT p_min IS NULL THEN +-- v_where_clause := format('%1$s >= $1', v_attr); +-- END IF; + +-- IF NOT p_max IS NULL THEN +-- IF NOT p_min IS NULL THEN +-- v_where_clause := v_where_clause || ' AND '; +-- END IF; +-- v_where_clause := v_where_clause || format('%1$s < $2', v_attr); +-- END IF; + +-- IF v_where_clause != '' THEN +-- v_where_clause := 'WHERE ' || v_where_clause; +-- END IF; + +-- /* Lock rows and copy data */ +-- RAISE NOTICE 'Copying data to partitions...'; +-- EXECUTE format(' +-- WITH data AS ( +-- DELETE FROM ONLY %1$s WHERE ctid IN ( +-- SELECT ctid FROM ONLY %1$s %2$s %3$s FOR UPDATE NOWAIT +-- ) RETURNING *) +-- INSERT INTO %1$s SELECT * FROM data' +-- , p_relation, v_where_clause, v_limit_clause) +-- USING p_min, p_max; + +-- GET DIAGNOSTICS p_total = ROW_COUNT; +-- RETURN; +-- END +-- $$ +-- LANGUAGE plpgsql; + /* * Copy rows to partitions */ CREATE OR REPLACE FUNCTION @extschema@.partition_data( - p_relation regclass - , p_min ANYELEMENT DEFAULT NULL::text - , p_max ANYELEMENT DEFAULT NULL::text - , p_limit INT DEFAULT NULL - , OUT p_total BIGINT) + parent_relid REGCLASS, + OUT p_total BIGINT) AS $$ DECLARE - v_attr TEXT; - v_limit_clause TEXT := ''; - v_where_clause TEXT := ''; + relname TEXT; + rec RECORD; + cnt BIGINT := 0; + BEGIN - SELECT attname INTO v_attr - FROM @extschema@.pathman_config WHERE partrel = p_relation; - - PERFORM @extschema@.debug_capture(); - - p_total := 0; - - /* Format LIMIT clause if needed */ - IF NOT p_limit IS NULL THEN - v_limit_clause := format('LIMIT %s', p_limit); - END IF; - - /* Format WHERE clause if needed */ - IF NOT p_min IS NULL THEN - v_where_clause := format('%1$s >= $1', v_attr); - END IF; - - IF NOT p_max IS NULL THEN - IF NOT p_min IS NULL THEN - v_where_clause := v_where_clause || ' AND '; - END IF; - v_where_clause := v_where_clause || format('%1$s < $2', v_attr); - END IF; - - IF v_where_clause != '' THEN - v_where_clause := 'WHERE ' || v_where_clause; - END IF; - - /* Lock rows and copy data */ - RAISE NOTICE 'Copying data to partitions...'; - EXECUTE format(' - WITH data AS ( - DELETE FROM ONLY %1$s WHERE ctid IN ( - SELECT ctid FROM ONLY %1$s %2$s %3$s FOR UPDATE NOWAIT - ) RETURNING *) - INSERT INTO %1$s SELECT * FROM data' - , p_relation, v_where_clause, v_limit_clause) - USING p_min, p_max; - - GET DIAGNOSTICS p_total = ROW_COUNT; - RETURN; + p_total := 0; + + /* Create partitions and copy rest of the data */ + EXECUTE format('WITH part_data AS (DELETE FROM ONLY %1$s RETURNING *) + INSERT INTO %1$s SELECT * FROM part_data', + @extschema@.get_schema_qualified_name(parent_relid)); + + /* Get number of inserted rows */ + GET DIAGNOSTICS p_total = ROW_COUNT; + RETURN; END $$ LANGUAGE plpgsql; diff --git a/range.sql b/range.sql index eb676840cd..d89fc38bef 100644 --- a/range.sql +++ b/range.sql @@ -42,7 +42,8 @@ CREATE OR REPLACE FUNCTION @extschema@.create_range_partitions( p_attribute TEXT, p_start_value ANYELEMENT, p_interval INTERVAL, - p_count INTEGER DEFAULT NULL) + p_count INTEGER DEFAULT NULL, + p_partition_data BOOLEAN DEFAULT true) RETURNS INTEGER AS $$ DECLARE @@ -107,7 +108,10 @@ BEGIN PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - PERFORM @extschema@.partition_data(parent_relid); + IF p_partition_data = true THEN + PERFORM @extschema@.disable_parent(parent_relid); + PERFORM @extschema@.partition_data(parent_relid); + END IF; RETURN p_count; @@ -124,7 +128,8 @@ CREATE OR REPLACE FUNCTION @extschema@.create_range_partitions( p_attribute TEXT, p_start_value ANYELEMENT, p_interval ANYELEMENT, - p_count INTEGER DEFAULT NULL) + p_count INTEGER DEFAULT NULL, + p_partition_data BOOLEAN DEFAULT true) RETURNS INTEGER AS $$ DECLARE @@ -194,7 +199,10 @@ BEGIN PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - PERFORM @extschema@.partition_data(parent_relid); + IF p_partition_data = true THEN + PERFORM @extschema@.disable_parent(parent_relid); + PERFORM @extschema@.partition_data(parent_relid); + END IF; RETURN p_count; @@ -211,7 +219,8 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( p_attribute TEXT, p_start_value ANYELEMENT, p_end_value ANYELEMENT, - p_interval ANYELEMENT) + p_interval ANYELEMENT, + p_partition_data BOOLEAN DEFAULT true) RETURNS INTEGER AS $$ DECLARE @@ -257,7 +266,10 @@ BEGIN PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - PERFORM @extschema@.partition_data(parent_relid); + IF p_partition_data = true THEN + PERFORM @extschema@.disable_parent(parent_relid); + PERFORM @extschema@.partition_data(parent_relid); + END IF; RETURN part_count; /* number of created partitions */ @@ -274,7 +286,8 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( p_attribute TEXT, p_start_value ANYELEMENT, p_end_value ANYELEMENT, - p_interval INTERVAL) + p_interval INTERVAL, + p_partition_data BOOLEAN DEFAULT true) RETURNS INTEGER AS $$ DECLARE @@ -317,7 +330,10 @@ BEGIN PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - PERFORM @extschema@.partition_data(parent_relid); + IF p_partition_data = true THEN + PERFORM @extschema@.disable_parent(parent_relid); + PERFORM @extschema@.partition_data(parent_relid); + END IF; RETURN part_count; /* number of created partitions */ diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index f54f08640c..0dcfccce9a 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -1,6 +1,3 @@ - ---:gdb - \set VERBOSITY terse CREATE SCHEMA pathman; From e5ad13457c647149ddb3373c73678b85cf722c47 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 19 Aug 2016 20:17:21 +0300 Subject: [PATCH 053/184] make load_config() return status, rework pg_pathman's utility relations (e.g. pathman_config) Oid cache --- src/init.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++------ src/init.h | 2 +- src/worker.c | 10 ++++++--- 3 files changed, 63 insertions(+), 10 deletions(-) diff --git a/src/init.c b/src/init.c index b2f76cbc71..845ce16376 100644 --- a/src/init.c +++ b/src/init.c @@ -53,6 +53,8 @@ bool initialization_needed = true; static bool relcache_callback_needed = true; +static bool init_pathman_relation_oids(void); +static void fini_pathman_relation_oids(void); static void init_local_cache(void); static void fini_local_cache(void); static void read_pathman_config(void); @@ -79,12 +81,22 @@ static int oid_cmp(const void *p1, const void *p2); /* * Create local PartRelationInfo cache & load pg_pathman's config. + * Return true on success. May occasionally emit ERROR. */ -void +bool load_config(void) { - /* Cache PATHMAN_CONFIG relation's Oid */ - pathman_config_relid = get_relname_relid(PATHMAN_CONFIG, get_pathman_schema()); + /* + * Try to cache important relids. + * + * Once CREATE EXTENSION stmt is processed, get_pathman_schema() + * function starts returning perfectly valid schema Oid, which + * means we have to check that *ALL* pg_pathman's relations' Oids + * have been cached properly. Only then can we assume that + * initialization is not needed anymore. + */ + if (!init_pathman_relation_oids()) + return false; /* remain 'uninitialized', exit before creating main caches */ init_local_cache(); /* create 'partitioned_rels' hash table */ read_pathman_config(); /* read PATHMAN_CONFIG table & fill cache */ @@ -100,6 +112,8 @@ load_config(void) initialization_needed = false; elog(DEBUG2, "pg_pathman's config has been loaded successfully [%u]", MyProcPid); + + return true; } /* @@ -108,10 +122,11 @@ load_config(void) void unload_config(void) { - /* Don't forget to reset cached PATHMAN_CONFIG relation's Oid */ - pathman_config_relid = InvalidOid; + /* Don't forget to reset pg_pathman's cached relids */ + fini_pathman_relation_oids(); - fini_local_cache(); /* destroy 'partitioned_rels' hash table */ + /* Destroy 'partitioned_rels' & 'parent_cache' hash tables */ + fini_local_cache(); /* Mark pg_pathman as uninitialized */ initialization_needed = true; @@ -128,6 +143,40 @@ estimate_pathman_shmem_size(void) return estimate_dsm_config_size() + MAXALIGN(sizeof(PathmanState)); } +/* + * Cache *all* important pg_pathman's relids at once. + * We should NOT rely on any previously cached values. + */ +static bool +init_pathman_relation_oids(void) +{ + Oid schema = get_pathman_schema(); + Assert(schema != InvalidOid); + + /* Cache PATHMAN_CONFIG relation's Oid */ + pathman_config_relid = get_relname_relid(PATHMAN_CONFIG, schema); + /* NOTE: add more relations to be cached right here ^^^ */ + + /* Return false if *any* relation doesn't exist yet */ + if (pathman_config_relid == InvalidOid) + { + return false; + } + + /* Everything is fine, proceed */ + return true; +} + +/* + * Forget *all* pg_pathman's cached relids. + */ +static void +fini_pathman_relation_oids(void) +{ + pathman_config_relid = InvalidOid; + /* NOTE: add more relations to be forgotten right here ^^^ */ +} + /* * Initialize per-process resources. */ diff --git a/src/init.h b/src/init.h index 010b302a12..ec8c537993 100644 --- a/src/init.h +++ b/src/init.h @@ -26,7 +26,7 @@ extern bool initialization_needed; Size estimate_pathman_shmem_size(void); void init_shmem_config(void); -void load_config(void); +bool load_config(void); void unload_config(void); void fill_prel_with_partitions(const Oid *partitions, diff --git a/src/worker.c b/src/worker.c index 12351968d6..1b14c3eb99 100644 --- a/src/worker.c +++ b/src/worker.c @@ -104,9 +104,13 @@ UnpackDatumFromByteArray(Datum *datum, Size datum_size, bool typbyval, static void bg_worker_load_config(const char *bgw_name) { - load_config(); - elog(LOG, "%s: loaded pg_pathman's config [%u]", - bgw_name, MyProcPid); + /* Try to load config */ + if (!load_config()) + elog(ERROR, "%s: could not load pg_pathman's config [%u]", + bgw_name, MyProcPid); + else + elog(LOG, "%s: loaded pg_pathman's config [%u]", + bgw_name, MyProcPid); } /* From f5ba66089f1cea9831972d6a631e19ec3a615940 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 19 Aug 2016 20:51:20 +0300 Subject: [PATCH 054/184] python scripts --- .gitignore | 1 + init.sql | 154 +++++++++++++------------- src/pathman.h | 6 +- tests/concurrent_partitioning_test.py | 64 +++++++++++ 4 files changed, 145 insertions(+), 80 deletions(-) create mode 100644 tests/concurrent_partitioning_test.py diff --git a/.gitignore b/.gitignore index b2853d5b29..7f9490f2be 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ regression.diffs regression.out *.o *.so +*.pyc pg_pathman--*.sql diff --git a/init.sql b/init.sql index c14b0986c9..cb752a4092 100644 --- a/init.sql +++ b/init.sql @@ -140,95 +140,95 @@ CREATE TYPE @extschema@.PathmanRange ( output = pathman_range_out ); -/* - * Copy rows to partitions - */ --- CREATE OR REPLACE FUNCTION @extschema@.partition_data( --- p_relation regclass --- , p_min ANYELEMENT DEFAULT NULL::text --- , p_max ANYELEMENT DEFAULT NULL::text --- , p_limit INT DEFAULT NULL --- , OUT p_total BIGINT) --- AS --- $$ --- DECLARE --- v_attr TEXT; --- v_limit_clause TEXT := ''; --- v_where_clause TEXT := ''; --- BEGIN --- SELECT attname INTO v_attr --- FROM @extschema@.pathman_config WHERE partrel = p_relation; - --- PERFORM @extschema@.debug_capture(); - --- p_total := 0; - --- /* Format LIMIT clause if needed */ --- IF NOT p_limit IS NULL THEN --- v_limit_clause := format('LIMIT %s', p_limit); --- END IF; - --- /* Format WHERE clause if needed */ --- IF NOT p_min IS NULL THEN --- v_where_clause := format('%1$s >= $1', v_attr); --- END IF; - --- IF NOT p_max IS NULL THEN --- IF NOT p_min IS NULL THEN --- v_where_clause := v_where_clause || ' AND '; --- END IF; --- v_where_clause := v_where_clause || format('%1$s < $2', v_attr); --- END IF; - --- IF v_where_clause != '' THEN --- v_where_clause := 'WHERE ' || v_where_clause; --- END IF; - --- /* Lock rows and copy data */ --- RAISE NOTICE 'Copying data to partitions...'; --- EXECUTE format(' --- WITH data AS ( --- DELETE FROM ONLY %1$s WHERE ctid IN ( --- SELECT ctid FROM ONLY %1$s %2$s %3$s FOR UPDATE NOWAIT --- ) RETURNING *) --- INSERT INTO %1$s SELECT * FROM data' --- , p_relation, v_where_clause, v_limit_clause) --- USING p_min, p_max; - --- GET DIAGNOSTICS p_total = ROW_COUNT; --- RETURN; --- END --- $$ --- LANGUAGE plpgsql; - /* * Copy rows to partitions */ CREATE OR REPLACE FUNCTION @extschema@.partition_data( - parent_relid REGCLASS, - OUT p_total BIGINT) + p_relation regclass + , p_min ANYELEMENT DEFAULT NULL::text + , p_max ANYELEMENT DEFAULT NULL::text + , p_limit INT DEFAULT NULL + , OUT p_total BIGINT) AS $$ DECLARE - relname TEXT; - rec RECORD; - cnt BIGINT := 0; - + v_attr TEXT; + v_limit_clause TEXT := ''; + v_where_clause TEXT := ''; BEGIN - p_total := 0; - - /* Create partitions and copy rest of the data */ - EXECUTE format('WITH part_data AS (DELETE FROM ONLY %1$s RETURNING *) - INSERT INTO %1$s SELECT * FROM part_data', - @extschema@.get_schema_qualified_name(parent_relid)); - - /* Get number of inserted rows */ - GET DIAGNOSTICS p_total = ROW_COUNT; - RETURN; + SELECT attname INTO v_attr + FROM @extschema@.pathman_config WHERE partrel = p_relation; + + PERFORM @extschema@.debug_capture(); + + p_total := 0; + + /* Format LIMIT clause if needed */ + IF NOT p_limit IS NULL THEN + v_limit_clause := format('LIMIT %s', p_limit); + END IF; + + /* Format WHERE clause if needed */ + IF NOT p_min IS NULL THEN + v_where_clause := format('%1$s >= $1', v_attr); + END IF; + + IF NOT p_max IS NULL THEN + IF NOT p_min IS NULL THEN + v_where_clause := v_where_clause || ' AND '; + END IF; + v_where_clause := v_where_clause || format('%1$s < $2', v_attr); + END IF; + + IF v_where_clause != '' THEN + v_where_clause := 'WHERE ' || v_where_clause; + END IF; + + /* Lock rows and copy data */ + RAISE NOTICE 'Copying data to partitions...'; + EXECUTE format(' + WITH data AS ( + DELETE FROM ONLY %1$s WHERE ctid IN ( + SELECT ctid FROM ONLY %1$s %2$s %3$s FOR UPDATE NOWAIT + ) RETURNING *) + INSERT INTO %1$s SELECT * FROM data' + , p_relation, v_where_clause, v_limit_clause) + USING p_min, p_max; + + GET DIAGNOSTICS p_total = ROW_COUNT; + RETURN; END $$ LANGUAGE plpgsql; +/* + * Copy rows to partitions + */ +-- CREATE OR REPLACE FUNCTION @extschema@.partition_data( +-- parent_relid REGCLASS, +-- OUT p_total BIGINT) +-- AS +-- $$ +-- DECLARE +-- relname TEXT; +-- rec RECORD; +-- cnt BIGINT := 0; + +-- BEGIN +-- p_total := 0; + +-- /* Create partitions and copy rest of the data */ +-- EXECUTE format('WITH part_data AS (DELETE FROM ONLY %1$s RETURNING *) +-- INSERT INTO %1$s SELECT * FROM part_data', +-- @extschema@.get_schema_qualified_name(parent_relid)); + +-- /* Get number of inserted rows */ +-- GET DIAGNOSTICS p_total = ROW_COUNT; +-- RETURN; +-- END +-- $$ +-- LANGUAGE plpgsql; + /* * Disable pathman partitioning for specified relation */ diff --git a/src/pathman.h b/src/pathman.h index 7d51f9261e..7cd3301172 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -28,9 +28,9 @@ /* Check PostgreSQL version (9.5.4 contains an important fix for BGW) */ -#if PG_VERSION_NUM < 90504 - #error "Cannot build pg_pathman with PostgreSQL version lower than 9.5.4" -#endif +// #if PG_VERSION_NUM < 90504 +// #error "Cannot build pg_pathman with PostgreSQL version lower than 9.5.4" +// #endif /* Get CString representation of Datum (simple wrapper) */ #ifdef USE_ASSERT_CHECKING diff --git a/tests/concurrent_partitioning_test.py b/tests/concurrent_partitioning_test.py new file mode 100644 index 0000000000..7aa7d1c40f --- /dev/null +++ b/tests/concurrent_partitioning_test.py @@ -0,0 +1,64 @@ +#coding: utf-8 +""" + concurrent_partitioning_test.py + Tests concurrent partitioning worker with simultaneous update queries + + Copyright (c) 2015-2016, Postgres Professional +""" + +import unittest +from testgres import get_new_node, clean_all, stop_all +from subprocess import Popen, PIPE +import subprocess +import time + + +class ConcurrentTest(unittest.TestCase): + + def setUp(self): + pass + + def tearDown(self): + stop_all() + # clean_all() + + def test_concurrent(self): + setup_cmd = [ + 'create extension pg_pathman', + 'create table abc(id serial, t text)', + 'insert into abc select generate_series(1, 300000)', + 'select create_hash_partitions(\'abc\', \'id\', 3, p_partition_data := false)', + ] + + node = get_new_node('test') + node.init() + node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + + for cmd in setup_cmd: + node.safe_psql('postgres', cmd) + + node.psql('postgres', 'select partition_data_worker(\'abc\')') + + while True: + # update some rows to check for deadlocks + node.safe_psql('postgres', + '''update abc set t = 'test' + where id in (select (random() * 300000)::int from generate_series(1, 3000))''') + + count = node.execute('postgres', 'select count(*) from pathman_active_workers') + + # if there is no active workers then it means work is done + if count[0][0] == 0: + break + time.sleep(1) + + data = node.execute('postgres', 'select count(*) from only abc') + self.assertEqual(data[0][0], 0) + data = node.execute('postgres', 'select count(*) from abc') + self.assertEqual(data[0][0], 300000) + + node.stop() + +if __name__ == "__main__": + unittest.main() From e23663c9cbea4ce8f989e677825e54620117d35e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 22 Aug 2016 09:24:13 +0300 Subject: [PATCH 055/184] introduce function shout_if_prel_is_invalid(), fixes --- src/init.c | 7 ++----- src/pg_pathman.c | 18 +++++++++-------- src/pl_funcs.c | 44 +++++++++--------------------------------- src/relation_info.h | 5 +++-- src/utils.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ src/utils.h | 3 +++ 6 files changed, 74 insertions(+), 50 deletions(-) diff --git a/src/init.c b/src/init.c index 845ce16376..6cac719d0e 100644 --- a/src/init.c +++ b/src/init.c @@ -155,13 +155,10 @@ init_pathman_relation_oids(void) /* Cache PATHMAN_CONFIG relation's Oid */ pathman_config_relid = get_relname_relid(PATHMAN_CONFIG, schema); - /* NOTE: add more relations to be cached right here ^^^ */ - - /* Return false if *any* relation doesn't exist yet */ if (pathman_config_relid == InvalidOid) - { return false; - } + + /* NOTE: add more relations to be cached right here ^^^ */ /* Everything is fine, proceed */ return true; diff --git a/src/pg_pathman.c b/src/pg_pathman.c index e48ed4862d..cc72277628 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -813,9 +813,11 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) Datum values[Natts_pathman_config]; bool isnull[Natts_pathman_config]; + prel = get_pathman_relation_info(relid); + shout_if_prel_is_invalid(relid, prel, PT_RANGE); + /* Get both PartRelationInfo & PATHMAN_CONFIG contents for this relation */ - if ((prel = get_pathman_relation_info(relid)) != NULL && - pathman_config_contains_relation(relid, values, isnull, NULL)) + if (pathman_config_contains_relation(relid, values, isnull, NULL)) { Datum min_rvalue, max_rvalue; @@ -827,10 +829,6 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) FmgrInfo interval_type_cmp; - if (prel->parttype != PT_RANGE) - elog(ERROR, "Relation \"%s\" is not partitioned by RANGE", - get_rel_name_or_relid(relid)); - /* Fill the FmgrInfo struct with a cmp(value, part_attribute) function */ fill_type_cmp_fmgr_info(&interval_type_cmp, value_type, prel->atttype); @@ -892,7 +890,7 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) SPI_finish(); /* close SPI connection */ } else - elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", + elog(ERROR, "pg_pathman's config does not contain relation \"%s\"", get_rel_name_or_relid(relid)); } PG_CATCH(); @@ -931,7 +929,8 @@ create_partitions(Oid relid, Datum value, Oid value_type) if (pathman_config_contains_relation(relid, NULL, NULL, &rel_xmin)) { /* If table was partitioned in some previous xact, run BGWorker */ - if (TransactionIdPrecedes(rel_xmin, GetCurrentTransactionId())) + if (TransactionIdPrecedes(rel_xmin, GetCurrentTransactionId()) || + TransactionIdEquals(rel_xmin, FrozenTransactionId)) { elog(DEBUG2, "create_partitions(): chose BGW [%u]", MyProcPid); last_partition = create_partitions_bg_worker(relid, value, value_type); @@ -1178,6 +1177,9 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, result); return; } + + default: + elog(ERROR, "Unknown partitioning type %u", prel->parttype); } result->rangeset = list_make1_irange(make_irange(0, diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 9ca1b5825b..21fdf491d7 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -180,14 +180,12 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) search_rangerel_result search_state; prel = get_pathman_relation_info(parent_oid); - - if (!prel) - PG_RETURN_NULL(); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); - /* FIXME: does this function even work? */ - search_state = search_range_partition_eq(value, &cmp_func,prel, + /* Use available PartRelationInfo to find partition */ + search_state = search_range_partition_eq(value, &cmp_func, prel, &found_rentry); /* @@ -246,9 +244,7 @@ get_range_by_part_oid(PG_FUNCTION_ARGS) const PartRelationInfo *prel; prel = get_pathman_relation_info(parent_oid); - if (!prel) - elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", - get_rel_name_or_relid(parent_oid)); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); ranges = PrelGetRangesArray(prel); @@ -266,6 +262,7 @@ get_range_by_part_oid(PG_FUNCTION_ARGS) PG_RETURN_ARRAYTYPE_P(arr); } + /* No partition found, report error */ elog(ERROR, "Relation \"%s\" has no partition \"%s\"", get_rel_name_or_relid(parent_oid), get_rel_name_or_relid(child_oid)); @@ -290,9 +287,7 @@ get_range_by_idx(PG_FUNCTION_ARGS) const PartRelationInfo *prel; prel = get_pathman_relation_info(parent_oid); - if (!prel) - elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", - get_rel_name_or_relid(parent_oid)); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); if (((uint32) abs(idx)) >= PrelChildrenCount(prel)) elog(ERROR, "Partition #%d does not exist (total amount is %u)", @@ -326,14 +321,7 @@ get_min_range_value(PG_FUNCTION_ARGS) const PartRelationInfo *prel; prel = get_pathman_relation_info(parent_oid); - if (!prel) - elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", - get_rel_name_or_relid(parent_oid)); - - if (prel->parttype != PT_RANGE) - if (!prel) - elog(ERROR, "Relation \"%s\" is not partitioned by RANGE", - get_rel_name_or_relid(parent_oid)); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); ranges = PrelGetRangesArray(prel); @@ -351,14 +339,7 @@ get_max_range_value(PG_FUNCTION_ARGS) const PartRelationInfo *prel; prel = get_pathman_relation_info(parent_oid); - if (!prel) - elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", - get_rel_name_or_relid(parent_oid)); - - if (prel->parttype != PT_RANGE) - if (!prel) - elog(ERROR, "Relation \"%s\" is not partitioned by RANGE", - get_rel_name_or_relid(parent_oid)); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); ranges = PrelGetRangesArray(prel); @@ -388,14 +369,7 @@ check_overlap(PG_FUNCTION_ARGS) const PartRelationInfo *prel; prel = get_pathman_relation_info(parent_oid); - if (!prel) - elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", - get_rel_name_or_relid(parent_oid)); - - if (prel->parttype != PT_RANGE) - if (!prel) - elog(ERROR, "Relation \"%s\" is not partitioned by RANGE", - get_rel_name_or_relid(parent_oid)); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); /* comparison functions */ fill_type_cmp_fmgr_info(&cmp_func_1, p1_type, prel->atttype); diff --git a/src/relation_info.h b/src/relation_info.h index e113d0d19e..bebb02ae29 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -19,11 +19,12 @@ /* - * Partitioning type + * Partitioning type. */ typedef enum { - PT_HASH = 1, + PT_INDIFFERENT = 0, /* for part type traits (virtual type) */ + PT_HASH, PT_RANGE } PartType; diff --git a/src/utils.c b/src/utils.c index 56cf1f067b..7410bc0522 100644 --- a/src/utils.c +++ b/src/utils.c @@ -711,6 +711,53 @@ is_string_type_internal(Oid typid) typid == CSTRINGOID; } +/* + * Common PartRelationInfo checks. Emit ERROR if anything is wrong. + */ +void +shout_if_prel_is_invalid(Oid parent_oid, + const PartRelationInfo *prel, + PartType expected_part_type) +{ + if (!prel) + elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", + get_rel_name_or_relid(parent_oid)); + + if (!PrelIsValid(prel)) + elog(ERROR, "pg_pathman's cache contains invalid entry " + "for relation \"%s\" [%u]", + get_rel_name_or_relid(parent_oid), + MyProcPid); + + /* Check partitioning type unless it's "indifferent" */ + if (expected_part_type != PT_INDIFFERENT && + expected_part_type != prel->parttype) + { + char *expected_str; + + switch (expected_part_type) + { + case PT_HASH: + expected_str = "HASH"; + break; + + case PT_RANGE: + expected_str = "RANGE"; + break; + + default: + elog(ERROR, + "expected_str selection not implemented for type %d", + expected_part_type); + } + + elog(ERROR, "Relation \"%s\" is not partitioned by %s", + get_rel_name_or_relid(parent_oid), + expected_str); + } +} + + /* * Try to find binary operator. * diff --git a/src/utils.h b/src/utils.h index 179062c47a..8013f9d775 100644 --- a/src/utils.h +++ b/src/utils.h @@ -55,6 +55,9 @@ bool is_string_type_internal(Oid typid); bool check_rinfo_for_partitioned_attr(List *rinfo, Index varno, AttrNumber varattno); +void shout_if_prel_is_invalid(Oid parent_oid, + const PartRelationInfo *prel, + PartType expected_part_type); /* * Misc. From 38acbd0fd2e211a7741edbc99fc40e77efe27963 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Mon, 22 Aug 2016 18:35:04 +0300 Subject: [PATCH 056/184] concurrent partitioning integration --- expected/pg_pathman.out | 92 +++++++++++++++++-- hash.sql | 2 + init.sql | 125 ++++++++++++++++---------- range.sql | 26 ++++-- sql/pg_pathman.sql | 14 ++- src/init.c | 6 +- src/init.h | 2 + src/pl_funcs.c | 21 ++--- src/worker.c | 28 +++++- tests/__init__.py | 0 tests/concurrent_partitioning_test.py | 2 +- 11 files changed, 236 insertions(+), 82 deletions(-) create mode 100644 tests/__init__.py diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index d6fa0b215e..3a47c0c9d8 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -11,6 +11,84 @@ INSERT INTO test.hash_rel VALUES (3, 3); SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); ERROR: Partitioning key 'value' must be NOT NULL ALTER TABLE test.hash_rel ALTER COLUMN value SET NOT NULL; +SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3, partition_data:=false); + create_hash_partitions +------------------------ + 3 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel + -> Seq Scan on hash_rel_0 + -> Seq Scan on hash_rel_1 + -> Seq Scan on hash_rel_2 +(5 rows) + +SELECT * FROM test.hash_rel; + id | value +----+------- + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +SELECT pathman.disable_parent('test.hash_rel'); + disable_parent +---------------- + +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel_0 + -> Seq Scan on hash_rel_1 + -> Seq Scan on hash_rel_2 +(4 rows) + +SELECT * FROM test.hash_rel; + id | value +----+------- +(0 rows) + +SELECT pathman.enable_parent('test.hash_rel'); + enable_parent +--------------- + +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel + -> Seq Scan on hash_rel_0 + -> Seq Scan on hash_rel_1 + -> Seq Scan on hash_rel_2 +(5 rows) + +SELECT * FROM test.hash_rel; + id | value +----+------- + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +SELECT pathman.drop_partitions('test.hash_rel'); +NOTICE: function test.hash_rel_upd_trig_func() does not exist, skipping +NOTICE: 0 rows copied from test.hash_rel_0 +NOTICE: 0 rows copied from test.hash_rel_1 +NOTICE: 0 rows copied from test.hash_rel_2 + drop_partitions +----------------- + 3 +(1 row) + SELECT pathman.create_hash_partitions('test.hash_rel', 'Value', 3); create_hash_partitions ------------------------ @@ -1140,9 +1218,9 @@ SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; DROP TABLE test.range_rel CASCADE; NOTICE: drop cascades to 16 other objects -SELECT * FROM pathman.pathman_config; - id | partrel | attname | parttype | range_interval -----+---------+---------+----------+---------------- +SELECT partrel, attname, parttype, range_interval FROM pathman.pathman_config; + partrel | attname | parttype | range_interval +---------+---------+----------+---------------- (0 rows) /* Check overlaps */ @@ -1324,10 +1402,10 @@ SELECT pathman.create_partitions_from_range('test."RangeRel"', 'dt', '2015-01-01 DROP TABLE test."RangeRel" CASCADE; NOTICE: drop cascades to 5 other objects -SELECT * FROM pathman.pathman_config; - id | partrel | attname | parttype | range_interval -----+--------------------+---------+----------+---------------- - 9 | test.num_range_rel | id | 2 | 1000 +SELECT partrel, attname, parttype, range_interval FROM pathman.pathman_config; + partrel | attname | parttype | range_interval +--------------------+---------+----------+---------------- + test.num_range_rel | id | 2 | 1000 (1 row) CREATE TABLE test."RangeRel" ( diff --git a/hash.sql b/hash.sql index 8716a0790a..e134bb5578 100644 --- a/hash.sql +++ b/hash.sql @@ -70,6 +70,8 @@ BEGIN IF partition_data = true THEN PERFORM @extschema@.disable_parent(parent_relid); PERFORM @extschema@.partition_data(parent_relid); + ELSE + PERFORM @extschema@.enable_parent(parent_relid); END IF; RETURN partitions_count; diff --git a/init.sql b/init.sql index cb752a4092..6252c45cb2 100644 --- a/init.sql +++ b/init.sql @@ -34,15 +34,43 @@ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config_params ( CREATE UNIQUE INDEX i_pathman_config_params ON @extschema@.pathman_config_params(partrel); +/* + * Invalidate relcache every time someone changes parameters config + */ +CREATE OR REPLACE FUNCTION @extschema@.pathman_config_params_trigger_func() +RETURNS TRIGGER AS +$$ +BEGIN + IF TG_OP IN ('INSERT', 'UPDATE') THEN + PERFORM @extschema@.invalidate_relcache(NEW.partrel); + END IF; + + IF TG_OP IN ('UPDATE', 'DELETE') THEN + PERFORM @extschema@.invalidate_relcache(OLD.partrel); + END IF; + + IF TG_OP = 'DELETE' THEN + RETURN OLD; + ELSE + RETURN NEW; + END IF; +END +$$ +LANGUAGE plpgsql; + +CREATE TRIGGER pathman_config_params_trigger +BEFORE INSERT OR UPDATE OR DELETE ON @extschema@.pathman_config_params +FOR EACH ROW EXECUTE PROCEDURE @extschema@.pathman_config_params_trigger_func(); + +/* + * Enable dump of config tables with pg_dump + */ SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config', ''); SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config_params', ''); -CREATE OR REPLACE FUNCTION @extschema@.on_enable_parent(relid OID) -RETURNS OID AS 'pg_pathman' LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION @extschema@.on_disable_parent(relid OID) -RETURNS OID AS 'pg_pathman' LANGUAGE C STRICT; +CREATE OR REPLACE FUNCTION @extschema@.invalidate_relcache(relid OID) +RETURNS VOID AS 'pg_pathman' LANGUAGE C STRICT; /* Include parent relation into query plan's for specified relation */ CREATE OR REPLACE FUNCTION @extschema@.enable_parent(relation REGCLASS) @@ -53,7 +81,8 @@ BEGIN ON CONFLICT (partrel) DO UPDATE SET enable_parent = True; - PERFORM @extschema@.on_enable_parent(relation::oid); + -- PERFORM @extschema@.invalidate_relcache(relation::oid); + -- PERFORM @extschema@.on_enable_parent(relation::oid); END $$ LANGUAGE plpgsql; @@ -67,7 +96,8 @@ BEGIN ON CONFLICT (partrel) DO UPDATE SET enable_parent = False; - PERFORM @extschema@.on_disable_parent(relation::oid); + -- PERFORM @extschema@.invalidate_relcache(relation::oid); + -- PERFORM @extschema@.on_disable_parent(relation::oid); END $$ LANGUAGE plpgsql; @@ -143,12 +173,12 @@ CREATE TYPE @extschema@.PathmanRange ( /* * Copy rows to partitions */ -CREATE OR REPLACE FUNCTION @extschema@.partition_data( - p_relation regclass - , p_min ANYELEMENT DEFAULT NULL::text - , p_max ANYELEMENT DEFAULT NULL::text - , p_limit INT DEFAULT NULL - , OUT p_total BIGINT) +CREATE OR REPLACE FUNCTION @extschema@._partition_data_concurrent( + p_relation regclass, + p_min ANYELEMENT DEFAULT NULL::text, + p_max ANYELEMENT DEFAULT NULL::text, + p_limit INT DEFAULT NULL, + OUT p_total BIGINT) AS $$ DECLARE @@ -201,33 +231,30 @@ END $$ LANGUAGE plpgsql; -/* - * Copy rows to partitions - */ --- CREATE OR REPLACE FUNCTION @extschema@.partition_data( --- parent_relid REGCLASS, --- OUT p_total BIGINT) --- AS --- $$ --- DECLARE --- relname TEXT; --- rec RECORD; --- cnt BIGINT := 0; - --- BEGIN --- p_total := 0; - --- /* Create partitions and copy rest of the data */ --- EXECUTE format('WITH part_data AS (DELETE FROM ONLY %1$s RETURNING *) --- INSERT INTO %1$s SELECT * FROM part_data', --- @extschema@.get_schema_qualified_name(parent_relid)); - --- /* Get number of inserted rows */ --- GET DIAGNOSTICS p_total = ROW_COUNT; --- RETURN; --- END --- $$ --- LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION @extschema@.partition_data( + parent_relid REGCLASS, + OUT p_total BIGINT) +AS +$$ +DECLARE + relname TEXT; + rec RECORD; + cnt BIGINT := 0; + +BEGIN + p_total := 0; + + /* Create partitions and copy rest of the data */ + EXECUTE format('WITH part_data AS (DELETE FROM ONLY %1$s RETURNING *) + INSERT INTO %1$s SELECT * FROM part_data', + @extschema@.get_schema_qualified_name(parent_relid)); + + /* Get number of inserted rows */ + GET DIAGNOSTICS p_total = ROW_COUNT; + RETURN; +END +$$ +LANGUAGE plpgsql; /* * Disable pathman partitioning for specified relation @@ -388,20 +415,26 @@ $$ DECLARE obj record; pg_class_oid oid; - BEGIN pg_class_oid = 'pg_catalog.pg_class'::regclass; /* Handle 'DROP TABLE' events */ WITH to_be_deleted AS ( - SELECT cfg.partrel AS rel - FROM pg_event_trigger_dropped_objects() AS events - JOIN @extschema@.pathman_config AS cfg - ON cfg.partrel::oid = events.objid + SELECT cfg.partrel AS rel FROM pg_event_trigger_dropped_objects() AS events + JOIN @extschema@.pathman_config AS cfg ON cfg.partrel::oid = events.objid WHERE events.classid = pg_class_oid ) DELETE FROM @extschema@.pathman_config WHERE partrel IN (SELECT rel FROM to_be_deleted); + + /* Cleanup params table too */ + WITH to_be_deleted AS ( + SELECT cfg.partrel AS rel FROM pg_event_trigger_dropped_objects() AS events + JOIN @extschema@.pathman_config_params AS cfg ON cfg.partrel::oid = events.objid + WHERE events.classid = pg_class_oid + ) + DELETE FROM @extschema@.pathman_config_params + WHERE partrel IN (SELECT rel FROM to_be_deleted); END $$ LANGUAGE plpgsql; @@ -448,6 +481,8 @@ BEGIN RETURNING *) SELECT count(*) from config_num_deleted INTO conf_num_del; + DELETE FROM @extschema@.pathman_config_params WHERE partrel = parent_relid; + IF conf_num_del = 0 THEN RAISE EXCEPTION 'table % has no partitions', parent_relid::text; END IF; diff --git a/range.sql b/range.sql index d89fc38bef..d004e79ec7 100644 --- a/range.sql +++ b/range.sql @@ -43,7 +43,7 @@ CREATE OR REPLACE FUNCTION @extschema@.create_range_partitions( p_start_value ANYELEMENT, p_interval INTERVAL, p_count INTEGER DEFAULT NULL, - p_partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT true) RETURNS INTEGER AS $$ DECLARE @@ -107,10 +107,12 @@ BEGIN /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(parent_relid); - /* Copy data */ - IF p_partition_data = true THEN + /* Copy data */ + IF partition_data = true THEN PERFORM @extschema@.disable_parent(parent_relid); PERFORM @extschema@.partition_data(parent_relid); + ELSE + PERFORM @extschema@.enable_parent(parent_relid); END IF; RETURN p_count; @@ -129,7 +131,7 @@ CREATE OR REPLACE FUNCTION @extschema@.create_range_partitions( p_start_value ANYELEMENT, p_interval ANYELEMENT, p_count INTEGER DEFAULT NULL, - p_partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT true) RETURNS INTEGER AS $$ DECLARE @@ -199,9 +201,11 @@ BEGIN PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - IF p_partition_data = true THEN + IF partition_data = true THEN PERFORM @extschema@.disable_parent(parent_relid); PERFORM @extschema@.partition_data(parent_relid); + ELSE + PERFORM @extschema@.enable_parent(parent_relid); END IF; RETURN p_count; @@ -220,7 +224,7 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( p_start_value ANYELEMENT, p_end_value ANYELEMENT, p_interval ANYELEMENT, - p_partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT true) RETURNS INTEGER AS $$ DECLARE @@ -266,9 +270,11 @@ BEGIN PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - IF p_partition_data = true THEN + IF partition_data = true THEN PERFORM @extschema@.disable_parent(parent_relid); PERFORM @extschema@.partition_data(parent_relid); + ELSE + PERFORM @extschema@.enable_parent(parent_relid); END IF; RETURN part_count; /* number of created partitions */ @@ -287,7 +293,7 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( p_start_value ANYELEMENT, p_end_value ANYELEMENT, p_interval INTERVAL, - p_partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT true) RETURNS INTEGER AS $$ DECLARE @@ -330,9 +336,11 @@ BEGIN PERFORM @extschema@.on_create_partitions(parent_relid); /* Copy data */ - IF p_partition_data = true THEN + IF partition_data = true THEN PERFORM @extschema@.disable_parent(parent_relid); PERFORM @extschema@.partition_data(parent_relid); + ELSE + PERFORM @extschema@.enable_parent(parent_relid); END IF; RETURN part_count; /* number of created partitions */ diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 0dcfccce9a..5b9adcf6b4 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -12,6 +12,16 @@ INSERT INTO test.hash_rel VALUES (2, 2); INSERT INTO test.hash_rel VALUES (3, 3); SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); ALTER TABLE test.hash_rel ALTER COLUMN value SET NOT NULL; +SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3, partition_data:=false); +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; +SELECT * FROM test.hash_rel; +SELECT pathman.disable_parent('test.hash_rel'); +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; +SELECT * FROM test.hash_rel; +SELECT pathman.enable_parent('test.hash_rel'); +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; +SELECT * FROM test.hash_rel; +SELECT pathman.drop_partitions('test.hash_rel'); SELECT pathman.create_hash_partitions('test.hash_rel', 'Value', 3); SELECT COUNT(*) FROM test.hash_rel; SELECT COUNT(*) FROM ONLY test.hash_rel; @@ -500,7 +510,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; DROP TABLE test.range_rel CASCADE; -SELECT * FROM pathman.pathman_config; +SELECT partrel, attname, parttype, range_interval FROM pathman.pathman_config; /* Check overlaps */ CREATE TABLE test.num_range_rel ( @@ -545,7 +555,7 @@ SELECT pathman.split_range_partition('test."RangeRel_1"', '2015-01-01'::DATE); SELECT pathman.drop_partitions('test."RangeRel"'); SELECT pathman.create_partitions_from_range('test."RangeRel"', 'dt', '2015-01-01'::DATE, '2015-01-05'::DATE, '1 day'::INTERVAL); DROP TABLE test."RangeRel" CASCADE; -SELECT * FROM pathman.pathman_config; +SELECT partrel, attname, parttype, range_interval FROM pathman.pathman_config; CREATE TABLE test."RangeRel" ( id SERIAL PRIMARY KEY, dt TIMESTAMP NOT NULL, diff --git a/src/init.c b/src/init.c index b6043a532d..5dbdccb3a2 100644 --- a/src/init.c +++ b/src/init.c @@ -137,7 +137,9 @@ unload_config(void) Size estimate_pathman_shmem_size(void) { - return estimate_dsm_config_size() + MAXALIGN(sizeof(PathmanState)); + return estimate_dsm_config_size() + + get_worker_slots_size() + + MAXALIGN(sizeof(PathmanState)); } /* @@ -249,6 +251,8 @@ init_shmem_config(void) pmstate->edit_partitions_lock = LWLockAssign(); } } + + create_worker_slots(); } /* diff --git a/src/init.h b/src/init.h index bfcb072665..86adfce062 100644 --- a/src/init.h +++ b/src/init.h @@ -28,6 +28,8 @@ Size estimate_pathman_shmem_size(void); void init_shmem_config(void); void load_config(void); void unload_config(void); +Size get_worker_slots_size(void); +void create_worker_slots(void); void fill_prel_with_partitions(const Oid *partitions, const uint32 parts_count, diff --git a/src/pl_funcs.c b/src/pl_funcs.c index a3e13ccbe5..b6bf9f89f6 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -21,6 +21,7 @@ #include "utils/typcache.h" #include "utils/array.h" #include "utils/memutils.h" +#include #include "miscadmin.h" @@ -29,8 +30,7 @@ PG_FUNCTION_INFO_V1( on_partitions_created ); PG_FUNCTION_INFO_V1( on_partitions_updated ); PG_FUNCTION_INFO_V1( on_partitions_removed ); -PG_FUNCTION_INFO_V1( on_enable_parent ); -PG_FUNCTION_INFO_V1( on_disable_parent ); +PG_FUNCTION_INFO_V1( invalidate_relcache ); PG_FUNCTION_INFO_V1( get_parent_of_partition_pl ); PG_FUNCTION_INFO_V1( get_attribute_type_name ); PG_FUNCTION_INFO_V1( find_or_create_range_partition); @@ -194,21 +194,14 @@ get_attribute_type_name(PG_FUNCTION_ARGS) } Datum -on_enable_parent(PG_FUNCTION_ARGS) +invalidate_relcache(PG_FUNCTION_ARGS) { Oid relid = DatumGetObjectId(PG_GETARG_DATUM(0)); - set_enable_parent(relid, true); - PG_RETURN_NULL(); -} - -Datum -on_disable_parent(PG_FUNCTION_ARGS) -{ - Oid relid = DatumGetObjectId(PG_GETARG_DATUM(0)); - - set_enable_parent(relid, false); - PG_RETURN_NULL(); + /* If type exists then invalidate cache */ + if (get_rel_type_id(relid) != InvalidOid) + CacheInvalidateRelcacheByRelid(relid); + PG_RETURN_VOID(); } /* diff --git a/src/worker.c b/src/worker.c index 1a0bbe5fc3..d83401b21a 100644 --- a/src/worker.c +++ b/src/worker.c @@ -81,6 +81,28 @@ typedef struct PartitionDataArgs PartitionDataArgs *slots; +/* + * Initialize shared memory + */ +void +create_worker_slots() +{ + bool found; + size_t size = get_worker_slots_size(); + + slots = (PartitionDataArgs *) + ShmemInitStruct("worker slots", size ,&found); + + if (!found) + memset(slots, 0, size); +} + +Size +get_worker_slots_size(void) +{ + return sizeof(PartitionDataArgs) * WORKER_SLOTS; +} + /* * Useful datum packing\unpacking functions for BGW. */ @@ -462,13 +484,13 @@ partition_data_bg_worker_main(Datum main_arg) /* Establish connection and start transaction */ BackgroundWorkerInitializeConnectionByOid(args->dbid, InvalidOid); - bg_worker_load_config("PartitionDataWorker"); - do { failed = false; rows = 0; StartTransactionCommand(); + bg_worker_load_config("PartitionDataWorker"); + SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); @@ -481,7 +503,7 @@ partition_data_bg_worker_main(Datum main_arg) * context will be destroyed after transaction finishes */ oldcontext = MemoryContextSwitchTo(worker_context); - sql = psprintf("SELECT %s.partition_data($1::oid, p_limit:=$2)", + sql = psprintf("SELECT %s._partition_data_concurrent($1::oid, p_limit:=$2)", get_namespace_name(get_pathman_schema())); MemoryContextSwitchTo(oldcontext); } diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/concurrent_partitioning_test.py b/tests/concurrent_partitioning_test.py index 7aa7d1c40f..f05d4c8b23 100644 --- a/tests/concurrent_partitioning_test.py +++ b/tests/concurrent_partitioning_test.py @@ -27,7 +27,7 @@ def test_concurrent(self): 'create extension pg_pathman', 'create table abc(id serial, t text)', 'insert into abc select generate_series(1, 300000)', - 'select create_hash_partitions(\'abc\', \'id\', 3, p_partition_data := false)', + 'select create_hash_partitions(\'abc\', \'id\', 3, partition_data := false)', ] node = get_new_node('test') From 15df51779f028e05831d202db56e86392b5f65ab Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Mon, 22 Aug 2016 20:05:51 +0300 Subject: [PATCH 057/184] replication test --- tests/concurrent_partitioning_test.py | 68 ++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/tests/concurrent_partitioning_test.py b/tests/concurrent_partitioning_test.py index f05d4c8b23..9a8d014be1 100644 --- a/tests/concurrent_partitioning_test.py +++ b/tests/concurrent_partitioning_test.py @@ -16,27 +16,37 @@ class ConcurrentTest(unittest.TestCase): def setUp(self): - pass - - def tearDown(self): - stop_all() - # clean_all() - - def test_concurrent(self): - setup_cmd = [ + self.setup_cmd = [ 'create extension pg_pathman', 'create table abc(id serial, t text)', 'insert into abc select generate_series(1, 300000)', 'select create_hash_partitions(\'abc\', \'id\', 3, partition_data := false)', ] + def tearDown(self): + stop_all() + # clean_all() + + def init_test_data(self, node): + """Initialize pg_pathman extension and test data""" + for cmd in self.setup_cmd: + node.safe_psql('postgres', cmd) + + def catchup_replica(self, master, replica): + """Wait until replica synchronizes with master""" + master.poll_query_until( + 'postgres', + 'SELECT pg_current_xlog_location() <= replay_location ' + 'FROM pg_stat_replication WHERE application_name = \'%s\'' + % replica.name) + + def test_concurrent(self): + """Tests concurrent partitioning""" node = get_new_node('test') node.init() node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') node.start() - - for cmd in setup_cmd: - node.safe_psql('postgres', cmd) + self.init_test_data(node) node.psql('postgres', 'select partition_data_worker(\'abc\')') @@ -60,5 +70,41 @@ def test_concurrent(self): node.stop() + def test_replication(self): + """Tests how pg_pathman works with replication""" + node = get_new_node('master') + replica = get_new_node('repl') + + # initialize master server + node.init(allows_streaming=True) + node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + node.backup('my_backup') + + # initialize replica from backup + replica.init_from_backup(node, 'my_backup', has_streaming=True) + replica.start() + + # initialize pg_pathman extension and some test data + self.init_test_data(node) + + # wait until replica catches up + self.catchup_replica(node, replica) + + # check that results are equal + self.assertEqual( + node.psql('postgres', 'explain (costs off) select * from abc'), + replica.psql('postgres', 'explain (costs off) select * from abc') + ) + + # enable parent and see if it is enabled in replica + node.psql('postgres', 'select enable_parent(\'abc\'') + + self.catchup_replica(node, replica) + self.assertEqual( + node.psql('postgres', 'explain (costs off) select * from abc'), + replica.psql('postgres', 'explain (costs off) select * from abc') + ) + if __name__ == "__main__": unittest.main() From 00c8d9e59609178533781d901572d448c3dc3fc9 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 23 Aug 2016 10:43:18 +0300 Subject: [PATCH 058/184] test pathman_config_params' trigger --- tests/concurrent_partitioning_test.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/concurrent_partitioning_test.py b/tests/concurrent_partitioning_test.py index 9a8d014be1..5374d09c4b 100644 --- a/tests/concurrent_partitioning_test.py +++ b/tests/concurrent_partitioning_test.py @@ -105,6 +105,33 @@ def test_replication(self): node.psql('postgres', 'explain (costs off) select * from abc'), replica.psql('postgres', 'explain (costs off) select * from abc') ) + self.assertEqual( + node.psql('postgres', 'select * from abc'), + replica.psql('postgres', 'select * from abc') + ) + self.assertEqual( + node.execute('postgres', 'select count(*) from abc')[0][0], + 300000 + ) + + # check that direct UPDATE in pathman_config_params invalidates + # cache + node.psql( + 'postgres', + 'update pathman_config_params set enable_parent = false') + self.catchup_replica(node, replica) + self.assertEqual( + node.psql('postgres', 'explain (costs off) select * from abc'), + replica.psql('postgres', 'explain (costs off) select * from abc') + ) + self.assertEqual( + node.psql('postgres', 'select * from abc'), + replica.psql('postgres', 'select * from abc') + ) + self.assertEqual( + node.execute('postgres', 'select count(*) from abc')[0][0], + 0 + ) if __name__ == "__main__": unittest.main() From cf4c9d57894d2e1164eab2aaf13d81a7aa78048b Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 23 Aug 2016 12:45:56 +0300 Subject: [PATCH 059/184] concurrent partitioning PL function rewrited --- init.sql | 48 ++++++++++-------------------------------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/init.sql b/init.sql index 6252c45cb2..e82e38d9e8 100644 --- a/init.sql +++ b/init.sql @@ -134,36 +134,6 @@ CREATE OR REPLACE FUNCTION @extschema@.pathman_range_out(PathmanRange) AS 'pg_pathman' LANGUAGE C IMMUTABLE STRICT; -/* -CREATE OR REPLACE FUNCTION @extschema@.get_whole_range(relid OID) - RETURNS PathmanRange - AS 'pg_pathman' - LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION @extschema@.range_value_cmp(range PathmanRange, value ANYELEMENT) - RETURNS INTEGER - AS 'pg_pathman' - LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION @extschema@.range_lower(range PathmanRange, dummy ANYELEMENT) - RETURNS ANYELEMENT - AS 'pg_pathman' - LANGUAGE C; - -CREATE OR REPLACE FUNCTION @extschema@.range_upper(range PathmanRange, dummy ANYELEMENT) - RETURNS ANYELEMENT - AS 'pg_pathman' - LANGUAGE C; - -CREATE OR REPLACE FUNCTION @extschema@.range_oid(range PathmanRange) - RETURNS OID - AS 'pg_pathman' - LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION @extschema@.range_partitions_list(parent_relid OID) - RETURNS SETOF PATHMANRANGE AS 'pg_pathman' - LANGUAGE C STRICT; -*/ CREATE TYPE @extschema@.PathmanRange ( internallength = 32, input = pathman_range_in, @@ -185,12 +155,11 @@ DECLARE v_attr TEXT; v_limit_clause TEXT := ''; v_where_clause TEXT := ''; + ctids TID[]; BEGIN SELECT attname INTO v_attr FROM @extschema@.pathman_config WHERE partrel = p_relation; - PERFORM @extschema@.debug_capture(); - p_total := 0; /* Format LIMIT clause if needed */ @@ -216,14 +185,17 @@ BEGIN /* Lock rows and copy data */ RAISE NOTICE 'Copying data to partitions...'; + EXECUTE format('SELECT array(SELECT ctid FROM ONLY %1$s %2$s %3$s FOR UPDATE NOWAIT)', + p_relation, v_where_clause, v_limit_clause) + USING p_min, p_max + INTO ctids; + EXECUTE format(' WITH data AS ( - DELETE FROM ONLY %1$s WHERE ctid IN ( - SELECT ctid FROM ONLY %1$s %2$s %3$s FOR UPDATE NOWAIT - ) RETURNING *) - INSERT INTO %1$s SELECT * FROM data' - , p_relation, v_where_clause, v_limit_clause) - USING p_min, p_max; + DELETE FROM ONLY %1$s WHERE ctid = ANY($1) RETURNING *) + INSERT INTO %1$s SELECT * FROM data', + p_relation) + USING ctids; GET DIAGNOSTICS p_total = ROW_COUNT; RETURN; From 4efb205d18768a0c6cdbb8b2ec52e6ebc753d085 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 23 Aug 2016 14:24:36 +0300 Subject: [PATCH 060/184] remove useless includes, raise warning if PostgreSQL <= 9.5.4 --- src/hooks.c | 3 ++- src/init.c | 4 ++-- src/pathman.h | 16 +++++++--------- src/pg_pathman.c | 1 + src/relation_info.c | 1 + src/utils.c | 10 ++-------- 6 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index f692086516..2101875ec4 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -12,12 +12,13 @@ #include "init.h" #include "runtimeappend.h" #include "runtime_merge_append.h" +#include "partition_filter.h" #include "utils.h" #include "miscadmin.h" #include "optimizer/cost.h" #include "optimizer/restrictinfo.h" -#include "partition_filter.h" +#include "utils/typcache.h" set_join_pathlist_hook_type set_join_pathlist_next = NULL; diff --git a/src/init.c b/src/init.c index 6cac719d0e..51934beacf 100644 --- a/src/init.c +++ b/src/init.c @@ -20,10 +20,10 @@ #include "access/htup_details.h" #include "access/sysattr.h" #include "catalog/indexing.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_inherits.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_type.h" -#include "catalog/pg_inherits.h" -#include "catalog/pg_constraint.h" #include "executor/spi.h" #include "miscadmin.h" #include "optimizer/clauses.h" diff --git a/src/pathman.h b/src/pathman.h index 3daa06f4e1..e6a308809a 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -11,15 +11,11 @@ #ifndef PATHMAN_H #define PATHMAN_H -#include "dsm_array.h" #include "init.h" #include "relation_info.h" #include "rangeset.h" #include "postgres.h" -#include "utils/date.h" -#include "utils/snapshot.h" -#include "utils/typcache.h" #include "nodes/makefuncs.h" #include "nodes/primnodes.h" #include "nodes/execnodes.h" @@ -28,16 +24,18 @@ /* Check PostgreSQL version (9.5.4 contains an important fix for BGW) */ -#if PG_VERSION_NUM < 90504 - #error "Cannot build pg_pathman with PostgreSQL version lower than 9.5.4" +#if PG_VERSION_NUM < 90503 + #error "Cannot build pg_pathman with PostgreSQL version lower than 9.5.3" +#elif PG_VERSION_NUM < 90504 + #warning "It is STRONGLY recommended to use pg_pathman with PostgreSQL 9.5.4 since it contains important fixes" #endif /* Get CString representation of Datum (simple wrapper) */ #ifdef USE_ASSERT_CHECKING -#include "utils.h" -#define DebugPrintDatum(datum, typid) ( datum_to_cstring((datum), (typid)) ) + #include "utils.h" + #define DebugPrintDatum(datum, typid) ( datum_to_cstring((datum), (typid)) ) #else -#define DebugPrintDatum(datum, typid) ( "[use --enable-cassert]" ) + #define DebugPrintDatum(datum, typid) ( "[use --enable-cassert]" ) #endif diff --git a/src/pg_pathman.c b/src/pg_pathman.c index cc72277628..71227bf82d 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -41,6 +41,7 @@ #include "utils/syscache.h" #include "utils/selfuncs.h" #include "utils/snapmgr.h" +#include "utils/typcache.h" PG_MODULE_MAGIC; diff --git a/src/relation_info.c b/src/relation_info.c index 81f7797117..05fd61c07c 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -24,6 +24,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/snapmgr.h" +#include "utils/typcache.h" /* diff --git a/src/utils.c b/src/utils.c index 7410bc0522..17ce4fa701 100644 --- a/src/utils.c +++ b/src/utils.c @@ -15,24 +15,18 @@ #include "access/sysattr.h" #include "access/xact.h" #include "catalog/heap.h" -#include "catalog/namespace.h" #include "catalog/pg_type.h" #include "catalog/pg_extension.h" #include "commands/extension.h" -#include "executor/spi.h" -#include "fmgr.h" #include "miscadmin.h" -#include "nodes/nodeFuncs.h" -#include "nodes/makefuncs.h" #include "optimizer/var.h" #include "optimizer/restrictinfo.h" #include "parser/parse_oper.h" -#include "rewrite/rewriteManip.h" #include "utils/builtins.h" -#include "utils/memutils.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/syscache.h" -#include "utils/fmgroids.h" +#include "utils/typcache.h" #define TABLEOID_STR(subst) ( "pathman_tableoid" subst ) From 07315526073d2f2c2adfedf374b3cbf4eb494768 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 23 Aug 2016 18:33:59 +0300 Subject: [PATCH 061/184] add the partition name parameter to some PL functions --- range.sql | 167 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 107 insertions(+), 60 deletions(-) diff --git a/range.sql b/range.sql index d004e79ec7..221e6428b2 100644 --- a/range.sql +++ b/range.sql @@ -396,7 +396,8 @@ $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION @extschema@.create_single_range_partition( parent_relid REGCLASS, p_start_value ANYELEMENT, - p_end_value ANYELEMENT) + p_end_value ANYELEMENT, + partition_name TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -423,34 +424,42 @@ BEGIN v_seq_name := @extschema@.get_sequence_name(v_plain_schema, v_plain_relname); - /* Get next value from sequence */ - LOOP - v_part_num := nextval(v_seq_name); - v_plain_child_relname := format('%s_%s', v_plain_relname, v_part_num); - v_child_relname := format('%s.%s', - quote_ident(v_plain_schema), - quote_ident(v_plain_child_relname)); - - v_child_relname_exists := count(*) > 0 - FROM pg_class - WHERE relname = v_plain_child_relname AND - relnamespace = v_plain_schema::regnamespace - LIMIT 1; - - EXIT WHEN v_child_relname_exists = false; - END LOOP; + IF partition_name IS NULL THEN + /* Get next value from sequence */ + LOOP + v_part_num := nextval(v_seq_name); + v_plain_child_relname := format('%s_%s', v_plain_relname, v_part_num); + v_child_relname := format('%s.%s', + quote_ident(v_plain_schema), + quote_ident(v_plain_child_relname)); + + v_child_relname_exists := count(*) > 0 + FROM pg_class + WHERE relname = v_plain_child_relname AND + relnamespace = v_plain_schema::regnamespace + LIMIT 1; + + EXIT WHEN v_child_relname_exists = false; + END LOOP; + ELSE + v_child_relname := partition_name; + END IF; - EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', - v_child_relname, - @extschema@.get_schema_qualified_name(parent_relid)); - - EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', - v_child_relname, - @extschema@.build_check_constraint_name(v_child_relname::regclass, - v_attname), - @extschema@.build_range_condition(v_attname, - p_start_value, - p_end_value)); + EXECUTE format( + 'CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', + v_child_relname, + @extschema@.get_schema_qualified_name(parent_relid)); + + EXECUTE format( + 'ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', + v_child_relname, + @extschema@.build_check_constraint_name( + v_child_relname::regclass, + v_attname), + @extschema@.build_range_condition( + v_attname, + p_start_value, + p_end_value)); RETURN v_child_relname; END @@ -463,6 +472,7 @@ SET client_min_messages = WARNING; CREATE OR REPLACE FUNCTION @extschema@.split_range_partition( p_partition REGCLASS, p_value ANYELEMENT, + partition_name TEXT DEFAULT NULL, OUT p_range ANYARRAY) RETURNS ANYARRAY AS $$ @@ -669,7 +679,8 @@ $$ LANGUAGE plpgsql; * Append new partition */ CREATE OR REPLACE FUNCTION @extschema@.append_range_partition( - parent_relid REGCLASS) + parent_relid REGCLASS, + partition_name TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -690,14 +701,20 @@ BEGIN v_atttype := @extschema@.get_attribute_type_name(parent_relid, v_attname); - EXECUTE format('SELECT @extschema@.append_partition_internal($1, $2, $3, ARRAY[]::%s[])', - v_atttype) - INTO v_part_name - USING parent_relid, v_atttype, v_interval; + EXECUTE + format( + 'SELECT @extschema@.append_partition_internal($1, $2, $3, ARRAY[]::%s[], $4)', + v_atttype) + USING + parent_relid, + v_atttype, + v_interval, + partition_name + INTO + v_part_name; /* Invalidate cache */ PERFORM @extschema@.on_update_partitions(parent_relid); - RETURN v_part_name; EXCEPTION WHEN others THEN @@ -711,7 +728,8 @@ CREATE OR REPLACE FUNCTION @extschema@.append_partition_internal( parent_relid REGCLASS, p_atttype TEXT, p_interval TEXT, - p_range ANYARRAY DEFAULT NULL) + p_range ANYARRAY DEFAULT NULL, + partition_name TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -721,14 +739,23 @@ BEGIN p_range := @extschema@.get_range_by_idx(parent_relid, -1, 0); IF @extschema@.is_date_type(p_atttype::regtype) THEN - v_part_name := @extschema@.create_single_range_partition(parent_relid - , p_range[2] - , p_range[2] + p_interval::interval); + v_part_name := @extschema@.create_single_range_partition( + parent_relid, + p_range[2], + p_range[2] + p_interval::interval, + partition_name); ELSE - EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $2 + $3::%s)', - p_atttype) - USING parent_relid, p_range[2], p_interval - INTO v_part_name; + EXECUTE + format( + 'SELECT @extschema@.create_single_range_partition($1, $2, $2 + $3::%s, $4)', + p_atttype) + USING + parent_relid, + p_range[2], + p_interval, + partition_name + INTO + v_part_name; END IF; RETURN v_part_name; @@ -741,7 +768,8 @@ LANGUAGE plpgsql; * Prepend new partition */ CREATE OR REPLACE FUNCTION @extschema@.prepend_range_partition( - parent_relid REGCLASS) + parent_relid REGCLASS, + partition_name TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -762,14 +790,20 @@ BEGIN v_atttype := @extschema@.get_attribute_type_name(parent_relid, v_attname); - EXECUTE format('SELECT @extschema@.prepend_partition_internal($1, $2, $3, ARRAY[]::%s[])', - v_atttype) - INTO v_part_name - USING parent_relid, v_atttype, v_interval; + EXECUTE + format( + 'SELECT @extschema@.prepend_partition_internal($1, $2, $3, ARRAY[]::%s[], $4)', + v_atttype) + USING + parent_relid, + v_atttype, + v_interval, + partition_name + INTO + v_part_name; /* Invalidate cache */ PERFORM @extschema@.on_update_partitions(parent_relid); - RETURN v_part_name; EXCEPTION WHEN others THEN @@ -783,7 +817,8 @@ CREATE OR REPLACE FUNCTION @extschema@.prepend_partition_internal( parent_relid REGCLASS, p_atttype TEXT, p_interval TEXT, - p_range ANYARRAY DEFAULT NULL) + p_range ANYARRAY DEFAULT NULL, + partition_name TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -793,14 +828,23 @@ BEGIN p_range := @extschema@.get_range_by_idx(parent_relid, 0, 0); IF @extschema@.is_date_type(p_atttype::regtype) THEN - v_part_name := @extschema@.create_single_range_partition(parent_relid, - p_range[1] - p_interval::interval, - p_range[1]); + v_part_name := @extschema@.create_single_range_partition( + parent_relid, + p_range[1] - p_interval::interval, + p_range[1], + partition_name); ELSE - EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2 - $3::%s, $2)', - p_atttype) - USING parent_relid, p_range[1], p_interval - INTO v_part_name; + EXECUTE + format( + 'SELECT @extschema@.create_single_range_partition($1, $2 - $3::%s, $2, $4)', + p_atttype) + USING + parent_relid, + p_range[1], + p_interval, + partition_name + INTO + v_part_name; END IF; RETURN v_part_name; @@ -815,7 +859,8 @@ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION @extschema@.add_range_partition( parent_relid REGCLASS, p_start_value ANYELEMENT, - p_end_value ANYELEMENT) + p_end_value ANYELEMENT, + partition_name TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -832,9 +877,11 @@ BEGIN END IF; /* Create new partition */ - v_part_name := @extschema@.create_single_range_partition(parent_relid, - p_start_value, - p_end_value); + v_part_name :=@extschema@.create_single_range_partition( + parent_relid, + p_start_value, + p_end_value, + partition_name); PERFORM @extschema@.on_update_partitions(parent_relid); RETURN v_part_name; From a2c30a505572f4d56c3f38018ea3c9d3b7b92e42 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 25 Aug 2016 18:19:41 +0300 Subject: [PATCH 062/184] improve error handling during pg_pathman's initialization --- src/init.c | 66 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/src/init.c b/src/init.c index 51934beacf..025d2c0fc6 100644 --- a/src/init.c +++ b/src/init.c @@ -42,7 +42,6 @@ #define PART_RELS_SIZE 10 #define CHILD_FACTOR 500 - /* Storage for PartRelationInfos */ HTAB *partitioned_rels = NULL; @@ -52,6 +51,9 @@ HTAB *parent_cache = NULL; bool initialization_needed = true; static bool relcache_callback_needed = true; +/* Help user in case of emergency */ +#define INIT_ERROR_HINT "pg_pathman will be disabled to allow you fix this" + static bool init_pathman_relation_oids(void); static void fini_pathman_relation_oids(void); @@ -284,9 +286,13 @@ fill_prel_with_partitions(const Oid *partitions, if (validate_hash_constraint(con_expr, prel, &hash)) prel->children[hash] = partitions[i]; else - elog(ERROR, - "Wrong constraint format for HASH partition \"%s\"", - get_rel_name_or_relid(partitions[i])); + { + DisablePathman(); /* disable pg_pathman since config is broken */ + ereport(ERROR, + (errmsg("Wrong constraint format for HASH partition \"%s\"", + get_rel_name_or_relid(partitions[i])), + errhint(INIT_ERROR_HINT))); + } } break; @@ -302,15 +308,24 @@ fill_prel_with_partitions(const Oid *partitions, prel->ranges[i].max = range_max; } else - elog(ERROR, - "Wrong constraint format for RANGE partition \"%s\"", - get_rel_name_or_relid(partitions[i])); + { + DisablePathman(); /* disable pg_pathman since config is broken */ + ereport(ERROR, + (errmsg("Wrong constraint format for RANGE partition \"%s\"", + get_rel_name_or_relid(partitions[i])), + errhint(INIT_ERROR_HINT))); + } } break; default: - elog(ERROR, "Unknown partitioning type for relation \"%s\"", - get_rel_name_or_relid(prel->key)); + { + DisablePathman(); /* disable pg_pathman since config is broken */ + ereport(ERROR, + (errmsg("Unknown partitioning type for relation \"%s\"", + get_rel_name_or_relid(prel->key)), + errhint(INIT_ERROR_HINT))); + } } } @@ -350,9 +365,12 @@ fill_prel_with_partitions(const Oid *partitions, for (i = 0; i < PrelChildrenCount(prel); i++) { if (prel->children[i] == InvalidOid) + { + DisablePathman(); /* disable pg_pathman since config is broken */ elog(ERROR, "pg_pathman's cache for relation \"%s\" " "has not been properly initialized", get_rel_name_or_relid(prel->key)); + } } #endif } @@ -605,11 +623,10 @@ read_pathman_config(void) if (get_rel_type_id(relid) == InvalidOid) { DisablePathman(); /* disable pg_pathman since config is broken */ - ereport(ERROR, (errmsg("Table \"%s\" contains nonexistent relation %u", PATHMAN_CONFIG, relid), - errdetail("pg_pathman will be disabled"))); + errhint(INIT_ERROR_HINT))); } /* Create or update PartRelationInfo for this partitioned table */ @@ -638,7 +655,15 @@ get_partition_constraint_expr(Oid partition, AttrNumber part_attno) Expr *expr; /* expression tree for constraint */ conname = build_check_constraint_name_internal(partition, part_attno); - conid = get_relation_constraint_oid(partition, conname, false); + conid = get_relation_constraint_oid(partition, conname, true); + if (conid == InvalidOid) + { + DisablePathman(); /* disable pg_pathman since config is broken */ + ereport(ERROR, + (errmsg("constraint \"%s\" for partition \"%s\" does not exist", + conname, get_rel_name_or_relid(partition)), + errhint(INIT_ERROR_HINT))); + } con_tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(conid)); conbin_datum = SysCacheGetAttr(CONSTROID, con_tuple, @@ -646,9 +671,14 @@ get_partition_constraint_expr(Oid partition, AttrNumber part_attno) &conbin_isnull); if (conbin_isnull) { - elog(DEBUG2, "conbin is null for constraint %s", conname); + DisablePathman(); /* disable pg_pathman since config is broken */ + ereport(WARNING, + (errmsg("constraint \"%s\" for partition \"%s\" has NULL conbin", + conname, get_rel_name_or_relid(partition)), + errhint(INIT_ERROR_HINT))); pfree(conname); - return NULL; + + return NULL; /* could not parse */ } pfree(conname); @@ -690,6 +720,9 @@ validate_range_constraint(const Expr *expr, const BoolExpr *boolexpr = (const BoolExpr *) expr; const OpExpr *opexpr; + if (!expr) + return false; + /* it should be an AND operator on top */ if (!and_clause((Node *) expr)) return false; @@ -779,6 +812,9 @@ validate_hash_constraint(const Expr *expr, *type_hash_proc_expr; const Var *var; /* partitioned column */ + if (!expr) + return false; + if (!IsA(expr, OpExpr)) return false; eq_expr = (const OpExpr *) expr; @@ -825,7 +861,7 @@ validate_hash_constraint(const Expr *expr, return false; /* Check that PARTITIONS_COUNT is equal to total amount of partitions */ - if (DatumGetUInt32(((Const*) second)->constvalue) != PrelChildrenCount(prel)) + if (DatumGetUInt32(((Const *) second)->constvalue) != PrelChildrenCount(prel)) return false; /* Check that CUR_PARTITION_HASH is Const */ From bea88a96321b3413de04923f6f83a689a9a06ba6 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Thu, 25 Aug 2016 18:55:44 +0300 Subject: [PATCH 063/184] tests for zero partitions case --- init.sql | 9 +++++++ range.sql | 67 +++++++++++++++++++++++++++++++--------------- sql/pg_pathman.sql | 15 +++++++++++ src/pl_funcs.c | 4 ++- src/worker.c | 9 +++++-- 5 files changed, 80 insertions(+), 24 deletions(-) diff --git a/init.sql b/init.sql index e82e38d9e8..bf9037fc5e 100644 --- a/init.sql +++ b/init.sql @@ -72,6 +72,15 @@ SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config_params', CREATE OR REPLACE FUNCTION @extschema@.invalidate_relcache(relid OID) RETURNS VOID AS 'pg_pathman' LANGUAGE C STRICT; +CREATE OR REPLACE FUNCTION @extschema@.partitions_count(relation REGCLASS) +RETURNS INT AS +$$ +BEGIN + RETURN count(*) FROM pg_inherits WHERE inhparent = relation; +END +$$ +LANGUAGE plpgsql; + /* Include parent relation into query plan's for specified relation */ CREATE OR REPLACE FUNCTION @extschema@.enable_parent(relation REGCLASS) RETURNS VOID AS diff --git a/range.sql b/range.sql index 221e6428b2..9eda1b55f1 100644 --- a/range.sql +++ b/range.sql @@ -59,6 +59,10 @@ BEGIN p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); + IF p_count < 0 THEN + RAISE EXCEPTION 'Partitions count must not be less than zero'; + END IF; + /* Try to determine partitions count if not set */ IF p_count IS NULL THEN EXECUTE format('SELECT count(*), max(%s) FROM %s', p_attribute, parent_relid) @@ -76,13 +80,19 @@ BEGIN END LOOP; END IF; - /* Check boundaries */ - EXECUTE format('SELECT @extschema@.check_boundaries(''%s'', ''%s'', ''%s'', ''%s''::%s)', - parent_relid, - p_attribute, - p_start_value, - p_start_value + p_interval * p_count, - pg_typeof(p_start_value)); + /* + * In case when user doesn't want to automatically create partitions + * and specifies partition count as 0 then do not check boundaries + */ + IF p_count != 0 THEN + /* Check boundaries */ + EXECUTE format('SELECT @extschema@.check_boundaries(''%s'', ''%s'', ''%s'', ''%s''::%s)', + parent_relid, + p_attribute, + p_start_value, + p_start_value + p_interval * p_count, + pg_typeof(p_start_value)); + END IF; SELECT * INTO v_plain_schema, v_plain_relname FROM @extschema@.get_plain_schema_and_relname(parent_relid); @@ -147,8 +157,8 @@ BEGIN p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); - IF p_count <= 0 THEN - RAISE EXCEPTION 'Partitions count must be greater than zero'; + IF p_count < 0 THEN + RAISE EXCEPTION 'Partitions count must not be less than zero'; END IF; /* Try to determine partitions count if not set */ @@ -172,11 +182,17 @@ BEGIN END LOOP; END IF; - /* check boundaries */ - PERFORM @extschema@.check_boundaries(parent_relid, - p_attribute, - p_start_value, - p_start_value + p_interval * p_count); + /* + * In case when user doesn't want to automatically create partitions + * and specifies partition count as 0 then do not check boundaries + */ + IF p_count != 0 THEN + /* check boundaries */ + PERFORM @extschema@.check_boundaries(parent_relid, + p_attribute, + p_start_value, + p_start_value + p_interval * p_count); + END IF; SELECT * INTO v_plain_schema, v_plain_relname FROM @extschema@.get_plain_schema_and_relname(parent_relid); @@ -521,7 +537,8 @@ BEGIN v_new_partition := @extschema@.create_single_range_partition( @extschema@.get_schema_qualified_name(v_parent_relid), p_value, - p_range[2]); + p_range[2], + partition_name); /* Copy data */ v_cond := @extschema@.build_range_condition(v_attname, p_value, p_range[2]); @@ -736,6 +753,10 @@ DECLARE v_part_name TEXT; BEGIN + IF @extschema@.partitions_count(parent_relid) = 0 THEN + RAISE EXCEPTION 'Cannot append to empty partitions set'; + END IF; + p_range := @extschema@.get_range_by_idx(parent_relid, -1, 0); IF @extschema@.is_date_type(p_atttype::regtype) THEN @@ -825,6 +846,10 @@ DECLARE v_part_name TEXT; BEGIN + IF @extschema@.partitions_count(parent_relid) = 0 THEN + RAISE EXCEPTION 'Cannot prepend to empty partitions set'; + END IF; + p_range := @extschema@.get_range_by_idx(parent_relid, 0, 0); IF @extschema@.is_date_type(p_atttype::regtype) THEN @@ -865,17 +890,17 @@ RETURNS TEXT AS $$ DECLARE v_part_name TEXT; - BEGIN - /* check range overlap */ - IF @extschema@.check_overlap(parent_relid, p_start_value, p_end_value) THEN - RAISE EXCEPTION 'Specified range overlaps with existing partitions'; - END IF; - IF p_start_value >= p_end_value THEN RAISE EXCEPTION 'Failed to create partition: p_start_value is greater than p_end_value'; END IF; + /* check range overlap */ + IF @extschema@.partitions_count(parent_relid) > 0 + AND @extschema@.check_overlap(parent_relid, p_start_value, p_end_value) THEN + RAISE EXCEPTION 'Specified range overlaps with existing partitions'; + END IF; + /* Create new partition */ v_part_name :=@extschema@.create_single_range_partition( parent_relid, diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 5b9adcf6b4..d8dbefcd42 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -471,6 +471,21 @@ CREATE TABLE test.range_rel_test2 ( dt TIMESTAMP); SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test2', '2013-01-01'::DATE, '2014-01-01'::DATE); +/* + * Zero partitions count and adding partitions with specified name + */ +CREATE TABLE test.zero( + id SERIAL PRIMARY KEY, + value INT NOT NULL); +INSERT INTO test.zero SELECT g, g FROM generate_series(1, 100) as g; +SELECT pathman.create_range_partitions('test.zero', 'value', 50, 10, 0); +SELECT pathman.append_range_partition('test.zero', 'test.zero_0'); +SELECT pathman.prepend_range_partition('test.zero', 'test.zero_1'); +SELECT pathman.add_range_partition('test.zero', 50, 70, 'test.zero_50'); +SELECT pathman.append_range_partition('test.zero', 'test.zero_appended'); +SELECT pathman.prepend_range_partition('test.zero', 'test.zero_prepended'); +SELECT pathman.split_range_partition('test.zero_50', 60, 'test.zero_60'); + /* * Check that altering table columns doesn't break trigger */ diff --git a/src/pl_funcs.c b/src/pl_funcs.c index e461c05508..a8cb44d0b6 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -96,10 +96,12 @@ on_partitions_created_internal(Oid partitioned_table, bool add_callbacks) static void on_partitions_updated_internal(Oid partitioned_table, bool add_callbacks) { - /* TODO: shall we emit relcache invalidation event here? */ + bool found; + elog(DEBUG2, "on_partitions_updated() [add_callbacks = %s] " "triggered for relation %u", (add_callbacks ? "true" : "false"), partitioned_table); + invalidate_pathman_relation_info(partitioned_table, &found); } static void diff --git a/src/worker.c b/src/worker.c index e0cd88d77f..a2f6720869 100644 --- a/src/worker.c +++ b/src/worker.c @@ -498,6 +498,7 @@ partition_data_bg_worker_main(Datum main_arg) SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); + /* Do some preparation within the first iteration */ if (sql == NULL) { MemoryContext oldcontext; @@ -530,10 +531,12 @@ partition_data_bg_worker_main(Datum main_arg) } PG_CATCH(); { + ErrorData *error; EmitErrorReport(); + error = CopyErrorData(); + elog(LOG, "Worker error: %s", error->message); FlushErrorState(); - elog(WARNING, "Error #%u", failures_count); /* * The most common exception we can catch here is a deadlock with * concurrent user queries. Check that attempts count doesn't exceed @@ -543,7 +546,9 @@ partition_data_bg_worker_main(Datum main_arg) { pfree(sql); args->status = WS_FREE; - elog(ERROR, "Failures count exceeded 100. Finishing..."); + elog(LOG, + "The concurrent partitioning worker exiting because the " + "maximum attempts count exceeded. See the error message below"); exit(1); } failed = true; From 1f9cc96ff0d7c204ea421c500bd61c96c7065cf3 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 25 Aug 2016 19:29:43 +0300 Subject: [PATCH 064/184] introduce safe function PrelLastChild(), fixes for get_range_by_idx() --- src/hooks.c | 2 +- src/nodes_common.c | 2 +- src/pg_pathman.c | 28 ++++++++++------------------ src/pl_funcs.c | 21 ++++++++++++++------- src/relation_info.h | 20 ++++++++++++++++---- 5 files changed, 42 insertions(+), 31 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 2101875ec4..52495f8986 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -227,7 +227,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb rte->inh = true; /* we must restore 'inh' flag! */ children = PrelGetChildrenArray(prel); - ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); + ranges = list_make1_irange(make_irange(0, PrelLastChild(prel), false)); /* Make wrappers over restrictions and collect final rangeset */ InitWalkerContext(&context, prel, NULL, false); diff --git a/src/nodes_common.c b/src/nodes_common.c index 6e783cafa8..a733070f53 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -502,7 +502,7 @@ rescan_append_common(CustomScanState *node) Assert(prel); /* First we select all available partitions... */ - ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); + ranges = list_make1_irange(make_irange(0, PrelLastChild(prel), false)); InitWalkerContext(&scan_state->wcxt, prel, econtext, false); foreach (lc, scan_state->custom_exprs) diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 71227bf82d..db593480cd 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -320,7 +320,7 @@ handle_modification_query(Query *parse) return; /* Parse syntax tree and extract partition ranges */ - ranges = list_make1_irange(make_irange(0, PrelChildrenCount(prel) - 1, false)); + ranges = list_make1_irange(make_irange(0, PrelLastChild(prel), false)); expr = (Expr *) eval_const_expressions(NULL, parse->jointree->quals); if (!expr) return; @@ -680,7 +680,7 @@ walk_expr_tree(Expr *expr, WalkerContext *context) result->orig = (const Node *) expr; result->args = NIL; result->rangeset = list_make1_irange( - make_irange(0, PrelChildrenCount(context->prel) - 1, true)); + make_irange(0, PrelLastChild(context->prel), true)); result->paramsel = 1.0; return result; } @@ -839,7 +839,7 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) /* Read max & min range values from PartRelationInfo */ min_rvalue = prel->ranges[0].min; - max_rvalue = prel->ranges[PrelChildrenCount(prel) - 1].max; + max_rvalue = prel->ranges[PrelLastChild(prel)].max; /* If this is a *date type*, cast 'range_interval' to INTERVAL */ if (is_date_type_internal(value_type)) @@ -1183,9 +1183,7 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, elog(ERROR, "Unknown partitioning type %u", prel->parttype); } - result->rangeset = list_make1_irange(make_irange(0, - PrelChildrenCount(prel) - 1, - true)); + result->rangeset = list_make1_irange(make_irange(0, PrelLastChild(prel), true)); result->paramsel = 1.0; } @@ -1211,9 +1209,7 @@ handle_binary_opexpr_param(const PartRelationInfo *prel, tce = lookup_type_cache(vartype, TYPECACHE_BTREE_OPFAMILY); strategy = get_op_opfamily_strategy(expr->opno, tce->btree_opf); - result->rangeset = list_make1_irange(make_irange(0, - PrelChildrenCount(prel) - 1, - true)); + result->rangeset = list_make1_irange(make_irange(0, PrelLastChild(prel), true)); if (strategy == BTEqualStrategyNumber) { @@ -1311,7 +1307,7 @@ handle_const(const Const *c, WalkerContext *context) if (!context->for_insert) { result->rangeset = list_make1_irange(make_irange(0, - PrelChildrenCount(prel) - 1, + PrelLastChild(prel), true)); result->paramsel = 1.0; @@ -1382,9 +1378,7 @@ handle_opexpr(const OpExpr *expr, WalkerContext *context) } } - result->rangeset = list_make1_irange(make_irange(0, - PrelChildrenCount(prel) - 1, - true)); + result->rangeset = list_make1_irange(make_irange(0, PrelLastChild(prel), true)); result->paramsel = 1.0; return result; } @@ -1456,7 +1450,7 @@ handle_boolexpr(const BoolExpr *expr, WalkerContext *context) if (expr->boolop == AND_EXPR) result->rangeset = list_make1_irange(make_irange(0, - PrelChildrenCount(prel) - 1, + PrelLastChild(prel), false)); else result->rangeset = NIL; @@ -1479,7 +1473,7 @@ handle_boolexpr(const BoolExpr *expr, WalkerContext *context) break; default: result->rangeset = list_make1_irange(make_irange(0, - PrelChildrenCount(prel) - 1, + PrelLastChild(prel), false)); break; } @@ -1581,9 +1575,7 @@ handle_arrexpr(const ScalarArrayOpExpr *expr, WalkerContext *context) result->paramsel = DEFAULT_INEQ_SEL; handle_arrexpr_return: - result->rangeset = list_make1_irange(make_irange(0, - PrelChildrenCount(prel) - 1, - true)); + result->rangeset = list_make1_irange(make_irange(0, PrelLastChild(prel), true)); return result; } diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 21fdf491d7..222beecd51 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -11,6 +11,7 @@ #include "pathman.h" #include "init.h" #include "utils.h" +#include "relation_info.h" #include "access/htup_details.h" #include "access/nbtree.h" @@ -289,17 +290,23 @@ get_range_by_idx(PG_FUNCTION_ARGS) prel = get_pathman_relation_info(parent_oid); shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); - if (((uint32) abs(idx)) >= PrelChildrenCount(prel)) + /* Now we have to deal with 'idx' */ + if (idx < -1) + { + elog(ERROR, "Negative indices other than -1 (last partition) are not allowed"); + } + else if (idx == -1 && PrelChildrenCount(prel) > 0) + { + idx = PrelLastChild(prel); + } + else if (((uint32) abs(idx)) >= PrelChildrenCount(prel)) + { elog(ERROR, "Partition #%d does not exist (total amount is %u)", idx, PrelChildrenCount(prel)); + } ranges = PrelGetRangesArray(prel); - if (idx == -1) - idx = PrelChildrenCount(prel) - 1; - else if (idx < -1) - elog(ERROR, "Negative indices other than -1 (last partition) are not allowed"); - elems[0] = ranges[idx].min; elems[1] = ranges[idx].max; @@ -343,7 +350,7 @@ get_max_range_value(PG_FUNCTION_ARGS) ranges = PrelGetRangesArray(prel); - PG_RETURN_DATUM(ranges[PrelChildrenCount(prel) - 1].max); + PG_RETURN_DATUM(ranges[PrelLastChild(prel)].max); } /* diff --git a/src/relation_info.h b/src/relation_info.h index bebb02ae29..e28005059c 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -107,13 +107,25 @@ typedef enum * PartRelationInfo field access macros. */ -#define PrelGetChildrenArray(prel) ( (prel)->children ) +#define PrelGetChildrenArray(prel) ( (prel)->children ) -#define PrelGetRangesArray(prel) ( (prel)->ranges ) +#define PrelGetRangesArray(prel) ( (prel)->ranges ) -#define PrelChildrenCount(prel) ( (prel)->children_count ) +#define PrelChildrenCount(prel) ( (prel)->children_count ) -#define PrelIsValid(prel) ( (prel) && (prel)->valid ) +#define PrelIsValid(prel) ( (prel) && (prel)->valid ) + +inline static uint32 +PrelLastChild(const PartRelationInfo *prel) +{ + Assert(PrelIsValid(prel)); + + if (PrelChildrenCount(prel) == 0) + elog(ERROR, "pg_pathman's cache entry for relation %u has 0 children", + prel->key); + + return PrelChildrenCount(prel) - 1; /* last partition */ +} const PartRelationInfo *refresh_pathman_relation_info(Oid relid, From a6e426004ca174154c239fd8490cd89075db3be1 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 26 Aug 2016 02:02:46 +0300 Subject: [PATCH 065/184] introduce function add_to_pathman_config(), remove useless column 'id' from pathman_config, replace pg_pathman_enable & initialization_needed with struct PathmanInitState, small fixes & refactoring --- expected/pg_pathman.out | 10 ++-- init.sql | 46 +++++++++------- src/hooks.c | 11 +--- src/init.c | 58 ++++++++++++++++---- src/init.h | 57 ++++++++++++++++++-- src/pathman.h | 32 +++-------- src/pg_pathman.c | 28 ++++------ src/pl_funcs.c | 116 ++++++++++++++++++++++++++++++++++++++-- 8 files changed, 263 insertions(+), 95 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index d6fa0b215e..c60f272786 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1141,8 +1141,8 @@ SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; DROP TABLE test.range_rel CASCADE; NOTICE: drop cascades to 16 other objects SELECT * FROM pathman.pathman_config; - id | partrel | attname | parttype | range_interval -----+---------+---------+----------+---------------- + partrel | attname | parttype | range_interval +---------+---------+----------+---------------- (0 rows) /* Check overlaps */ @@ -1325,9 +1325,9 @@ SELECT pathman.create_partitions_from_range('test."RangeRel"', 'dt', '2015-01-01 DROP TABLE test."RangeRel" CASCADE; NOTICE: drop cascades to 5 other objects SELECT * FROM pathman.pathman_config; - id | partrel | attname | parttype | range_interval -----+--------------------+---------+----------+---------------- - 9 | test.num_range_rel | id | 2 | 1000 + partrel | attname | parttype | range_interval +--------------------+---------+----------+---------------- + test.num_range_rel | id | 2 | 1000 (1 row) CREATE TABLE test."RangeRel" ( diff --git a/init.sql b/init.sql index 5c83360830..60d5e645ad 100644 --- a/init.sql +++ b/init.sql @@ -18,8 +18,7 @@ * range_interval - base interval for RANGE partitioning as string */ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config ( - id SERIAL PRIMARY KEY, - partrel REGCLASS NOT NULL, + partrel REGCLASS NOT NULL PRIMARY KEY, attname TEXT NOT NULL, parttype INTEGER NOT NULL, range_interval TEXT, @@ -100,7 +99,7 @@ BEGIN RAISE EXCEPTION 'Partitioning key ''%'' must be NOT NULL', p_attribute; END IF; - /* Check if there are foreign keys reference to the relation */ + /* Check if there are foreign keys that reference the relation */ FOR v_rec IN (SELECT * FROM pg_constraint WHERE confrelid = p_relation::regclass::oid) LOOP @@ -243,12 +242,9 @@ CREATE OR REPLACE FUNCTION @extschema@.drop_triggers( parent_relid REGCLASS) RETURNS VOID AS $$ -DECLARE - funcname TEXT; - BEGIN - funcname := @extschema@.build_update_trigger_func_name(parent_relid); - EXECUTE format('DROP FUNCTION IF EXISTS %s() CASCADE', funcname); + EXECUTE format('DROP FUNCTION IF EXISTS %s() CASCADE', + @extschema@.build_update_trigger_func_name(parent_relid)); END $$ LANGUAGE plpgsql; @@ -317,12 +313,14 @@ EXECUTE PROCEDURE @extschema@.pathman_ddl_trigger_func(); /* - * Check if regclass is date or timestamp + * Attach partitioned table */ -CREATE OR REPLACE FUNCTION @extschema@.is_date_type( - typid REGTYPE) -RETURNS BOOLEAN AS 'pg_pathman', 'is_date_type' -LANGUAGE C STRICT; +CREATE OR REPLACE FUNCTION @extschema@.add_to_pathman_config( + parent_relid REGCLASS, + attname TEXT, + range_interval TEXT DEFAULT NULL) +RETURNS BOOLEAN AS 'pg_pathman', 'add_to_pathman_config' +LANGUAGE C; CREATE OR REPLACE FUNCTION @extschema@.on_create_partitions( @@ -341,6 +339,21 @@ RETURNS VOID AS 'pg_pathman', 'on_partitions_removed' LANGUAGE C STRICT; +/* + * Get parent of pg_pathman's partition. + */ +CREATE OR REPLACE FUNCTION @extschema@.get_parent_of_partition(REGCLASS) +RETURNS REGCLASS AS 'pg_pathman', 'get_parent_of_partition_pl' +LANGUAGE C STRICT; + +/* + * Check if regclass is date or timestamp + */ +CREATE OR REPLACE FUNCTION @extschema@.is_date_type( + typid REGTYPE) +RETURNS BOOLEAN AS 'pg_pathman', 'is_date_type' +LANGUAGE C STRICT; + /* * Checks if attribute is nullable */ @@ -389,10 +402,3 @@ LANGUAGE C STRICT; CREATE OR REPLACE FUNCTION @extschema@.debug_capture() RETURNS VOID AS 'pg_pathman', 'debug_capture' LANGUAGE C STRICT; - -/* - * Get parent of pg_pathman's partition. - */ -CREATE OR REPLACE FUNCTION @extschema@.get_parent_of_partition(REGCLASS) -RETURNS REGCLASS AS 'pg_pathman', 'get_parent_of_partition_pl' -LANGUAGE C STRICT; diff --git a/src/hooks.c b/src/hooks.c index 52495f8986..fd283ec936 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -356,15 +356,8 @@ pg_pathman_enable_assign_hook(bool newval, void *extra) elog(DEBUG2, "pg_pathman_enable_assign_hook() [newval = %s] triggered", newval ? "true" : "false"); - if (initialization_needed) - { - elog(DEBUG2, "pg_pathman is not yet initialized, " - "pg_pathman.enable is set to false"); - return; - } - /* Return quickly if nothing has changed */ - if (newval == (pg_pathman_enable && + if (newval == (pg_pathman_init_state.pg_pathman_enable && pg_pathman_enable_runtimeappend && pg_pathman_enable_runtime_merge_append && pg_pathman_enable_partition_filter)) @@ -459,7 +452,7 @@ pathman_post_parse_analysis_hook(ParseState *pstate, Query *query) /* Load config if pg_pathman exists & it's still necessary */ if (IsPathmanEnabled() && - initialization_needed && + !IsPathmanInitialized() && /* Now evaluate the most expensive clause */ get_pathman_schema() != InvalidOid) { diff --git a/src/init.c b/src/init.c index 025d2c0fc6..043307d860 100644 --- a/src/init.c +++ b/src/init.c @@ -38,23 +38,27 @@ #include "utils/snapmgr.h" +/* Help user in case of emergency */ +#define INIT_ERROR_HINT "pg_pathman will be disabled to allow you to resolve this issue" + /* Initial size of 'partitioned_rels' table */ #define PART_RELS_SIZE 10 #define CHILD_FACTOR 500 + /* Storage for PartRelationInfos */ -HTAB *partitioned_rels = NULL; +HTAB *partitioned_rels = NULL; /* Storage for PartParentInfos */ -HTAB *parent_cache = NULL; +HTAB *parent_cache = NULL; -bool initialization_needed = true; -static bool relcache_callback_needed = true; - -/* Help user in case of emergency */ -#define INIT_ERROR_HINT "pg_pathman will be disabled to allow you fix this" +/* pg_pathman's init status */ +PathmanInitState pg_pathman_init_state; +/* Shall we install new relcache callback? */ +static bool relcache_callback_needed = true; +/* Functions for various local caches */ static bool init_pathman_relation_oids(void); static void fini_pathman_relation_oids(void); static void init_local_cache(void); @@ -81,6 +85,41 @@ static bool read_opexpr_const(const OpExpr *opexpr, static int oid_cmp(const void *p1, const void *p2); +/* + * Save and restore main init state. + */ + +void +save_pathman_init_state(PathmanInitState *temp_init_state) +{ + *temp_init_state = pg_pathman_init_state; +} + +void +restore_pathman_init_state(const PathmanInitState *temp_init_state) +{ + pg_pathman_init_state = *temp_init_state; +} + +/* + * Create main GUC. + */ +void +init_main_pathman_toggle(void) +{ + /* Main toggle, load_config() will enable it */ + DefineCustomBoolVariable("pg_pathman.enable", + "Enables pg_pathman's optimizations during the planner stage", + NULL, + &pg_pathman_init_state.pg_pathman_enable, + true, + PGC_USERSET, + 0, + NULL, + pg_pathman_enable_assign_hook, + NULL); +} + /* * Create local PartRelationInfo cache & load pg_pathman's config. * Return true on success. May occasionally emit ERROR. @@ -111,7 +150,7 @@ load_config(void) } /* Mark pg_pathman as initialized */ - initialization_needed = false; + pg_pathman_init_state.initialization_needed = false; elog(DEBUG2, "pg_pathman's config has been loaded successfully [%u]", MyProcPid); @@ -131,7 +170,7 @@ unload_config(void) fini_local_cache(); /* Mark pg_pathman as uninitialized */ - initialization_needed = true; + pg_pathman_init_state.initialization_needed = true; elog(DEBUG2, "pg_pathman's config has been unloaded successfully [%u]", MyProcPid); } @@ -539,7 +578,6 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, heap_deform_tuple(htup, RelationGetDescr(rel), values, isnull); /* Perform checks for non-NULL columns */ - Assert(!isnull[Anum_pathman_config_id - 1]); Assert(!isnull[Anum_pathman_config_partrel - 1]); Assert(!isnull[Anum_pathman_config_attname - 1]); Assert(!isnull[Anum_pathman_config_parttype - 1]); diff --git a/src/init.h b/src/init.h index ec8c537993..f06f2c70b5 100644 --- a/src/init.h +++ b/src/init.h @@ -15,14 +15,63 @@ #include "postgres.h" #include "storage/lmgr.h" -#include "utils/snapshot.h" +#include "utils/guc.h" #include "utils/hsearch.h" +#include "utils/snapshot.h" + + +/* + * pg_pathman's initialization state structure. + */ +typedef struct +{ + bool pg_pathman_enable; /* GUC variable implementation */ + bool initialization_needed; /* do we need to perform init? */ +} PathmanInitState; -extern HTAB *partitioned_rels; -extern HTAB *parent_cache; -extern bool initialization_needed; +extern HTAB *partitioned_rels; +extern HTAB *parent_cache; +/* pg_pathman's initialization state */ +extern PathmanInitState pg_pathman_init_state; + + +/* + * Check if pg_pathman is initialized. + */ +#define IsPathmanInitialized() ( !pg_pathman_init_state.initialization_needed ) + +/* + * Check if pg_pathman is enabled. + */ +#define IsPathmanEnabled() ( pg_pathman_init_state.pg_pathman_enable ) + +/* + * Check if pg_pathman is initialized & enabled. + */ +#define IsPathmanReady() ( IsPathmanInitialized() && IsPathmanEnabled() ) + +/* + * Emergency disable mechanism. + */ +#define DisablePathman() \ + do { \ + pg_pathman_init_state.pg_pathman_enable = false; \ + pg_pathman_init_state.initialization_needed = true; \ + } while (0) + + +/* + * Save and restore PathmanInitState. + */ +void save_pathman_init_state(PathmanInitState *temp_init_state); +void restore_pathman_init_state(const PathmanInitState *temp_init_state); + +/* + * Create main GUC variable. + */ +void init_main_pathman_toggle(void); Size estimate_pathman_shmem_size(void); void init_shmem_config(void); diff --git a/src/pathman.h b/src/pathman.h index e6a308809a..9cd7789fc1 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -11,7 +11,6 @@ #ifndef PATHMAN_H #define PATHMAN_H -#include "init.h" #include "relation_info.h" #include "rangeset.h" @@ -43,16 +42,17 @@ * Definitions for the "pathman_config" table. */ #define PATHMAN_CONFIG "pathman_config" -#define Natts_pathman_config 5 -#define Anum_pathman_config_id 1 /* primary key */ -#define Anum_pathman_config_partrel 2 /* partitioned relation (regclass) */ -#define Anum_pathman_config_attname 3 /* partitioned column (text) */ -#define Anum_pathman_config_parttype 4 /* partitioning type (1|2) */ -#define Anum_pathman_config_range_interval 5 /* interval for RANGE pt. (text) */ +#define Natts_pathman_config 4 +#define Anum_pathman_config_partrel 1 /* partitioned relation (regclass) */ +#define Anum_pathman_config_attname 2 /* partitioned column (text) */ +#define Anum_pathman_config_parttype 3 /* partitioning type (1|2) */ +#define Anum_pathman_config_range_interval 4 /* interval for RANGE pt. (text) */ /* type modifier (typmod) for 'range_interval' */ #define PATHMAN_CONFIG_interval_typmod -1 +#define PATHMAN_CONFIG_ID_SEQ "pathman_config_id_seq" + /* * Cache current PATHMAN_CONFIG relid (set during load_config()). */ @@ -96,27 +96,9 @@ extern List *inheritance_enabled_relids; */ extern List *inheritance_disabled_relids; -extern bool pg_pathman_enable; extern PathmanState *pmstate; -#define PATHMAN_GET_DATUM(value, by_val) \ - ( (by_val) ? (Datum) (value) : PointerGetDatum(&value) ) - -/* - * Check if pg_pathman is initialized & enabled. - */ -#define IsPathmanReady() ( !initialization_needed && pg_pathman_enable ) - -#define IsPathmanEnabled() ( pg_pathman_enable ) - -#define DisablePathman() \ - do { \ - pg_pathman_enable = false; \ - initialization_needed = true; \ - } while (0) - - int append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte, int index, Oid childOID, List *wrappers); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index db593480cd..dd4d82def4 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -34,7 +34,6 @@ #include "optimizer/cost.h" #include "utils/builtins.h" #include "utils/datum.h" -#include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -49,7 +48,6 @@ PG_MODULE_MAGIC; List *inheritance_disabled_relids = NIL; List *inheritance_enabled_relids = NIL; -bool pg_pathman_enable = true; PathmanState *pmstate; Oid pathman_config_relid = InvalidOid; @@ -126,19 +124,26 @@ static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo void _PG_init(void) { + PathmanInitState temp_init_state; + if (!process_shared_preload_libraries_in_progress) { elog(ERROR, "Pathman module must be initialized in postmaster. " "Put the following line to configuration file: " "shared_preload_libraries='pg_pathman'"); - - initialization_needed = false; } /* Request additional shared resources */ RequestAddinShmemSpace(estimate_pathman_shmem_size()); RequestAddinLWLocks(3); + /* Assign pg_pathman's initial state */ + temp_init_state.initialization_needed = true; + temp_init_state.pg_pathman_enable = true; + + /* Apply initial state */ + restore_pathman_init_state(&temp_init_state); + /* Initialize 'next' hook pointers */ set_rel_pathlist_hook_next = set_rel_pathlist_hook; set_rel_pathlist_hook = pathman_rel_pathlist_hook; @@ -151,22 +156,11 @@ _PG_init(void) planner_hook_next = planner_hook; planner_hook = pathman_planner_hook; - /* Initialize custom nodes */ + /* Initialize static data for all subsystems */ + init_main_pathman_toggle(); init_runtimeappend_static_data(); init_runtime_merge_append_static_data(); init_partition_filter_static_data(); - - /* Main toggle, load_config() will enable it */ - DefineCustomBoolVariable("pg_pathman.enable", - "Enables pg_pathman's optimizations during the planner stage", - NULL, - &pg_pathman_enable, - true, - PGC_USERSET, - 0, - NULL, - pg_pathman_enable_assign_hook, - NULL); } /* diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 222beecd51..151bd65009 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -13,6 +13,8 @@ #include "utils.h" #include "relation_info.h" +#include "catalog/indexing.h" +#include "commands/sequence.h" #include "access/htup_details.h" #include "access/nbtree.h" #include "access/xact.h" @@ -45,6 +47,7 @@ PG_FUNCTION_INFO_V1( build_update_trigger_func_name ); PG_FUNCTION_INFO_V1( build_update_trigger_name ); PG_FUNCTION_INFO_V1( is_date_type ); PG_FUNCTION_INFO_V1( is_attribute_nullable ); +PG_FUNCTION_INFO_V1( add_to_pathman_config ); PG_FUNCTION_INFO_V1( debug_capture ); @@ -53,6 +56,13 @@ static void on_partitions_updated_internal(Oid partitioned_table, bool add_callb static void on_partitions_removed_internal(Oid partitioned_table, bool add_callbacks); +static bool +check_relation_exists(Oid relid) +{ + return get_rel_type_id(relid) != InvalidOid; +} + + /* * Callbacks. */ @@ -295,7 +305,7 @@ get_range_by_idx(PG_FUNCTION_ARGS) { elog(ERROR, "Negative indices other than -1 (last partition) are not allowed"); } - else if (idx == -1 && PrelChildrenCount(prel) > 0) + else if (idx == -1) { idx = PrelLastChild(prel); } @@ -506,7 +516,7 @@ build_check_constraint_name_attnum(PG_FUNCTION_ARGS) AttrNumber attnum = PG_GETARG_INT16(1); const char *result; - if (get_rel_type_id(relid) == InvalidOid) + if (!check_relation_exists(relid)) elog(ERROR, "Invalid relation %u", relid); /* We explicitly do not support system attributes */ @@ -527,7 +537,7 @@ build_check_constraint_name_attname(PG_FUNCTION_ARGS) AttrNumber attnum = get_attnum(relid, text_to_cstring(attname)); const char *result; - if (get_rel_type_id(relid) == InvalidOid) + if (!check_relation_exists(relid)) elog(ERROR, "Invalid relation %u", relid); if (attnum == InvalidAttrNumber) @@ -547,7 +557,7 @@ build_update_trigger_func_name(PG_FUNCTION_ARGS) const char *result; /* Check that relation exists */ - if (get_rel_type_id(relid) == InvalidOid) + if (!check_relation_exists(relid)) elog(ERROR, "Invalid relation %u", relid); nspid = get_rel_namespace(relid); @@ -566,7 +576,7 @@ build_update_trigger_name(PG_FUNCTION_ARGS) const char *result; /* trigger's name can't be qualified */ /* Check that relation exists */ - if (get_rel_type_id(relid) == InvalidOid) + if (!check_relation_exists(relid)) elog(ERROR, "Invalid relation %u", relid); result = quote_identifier(psprintf("%s_upd_trig", get_rel_name(relid))); @@ -574,6 +584,102 @@ build_update_trigger_name(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(cstring_to_text(result)); } + +/* + * Try to add previously partitioned table to PATHMAN_CONFIG. + */ +Datum +add_to_pathman_config(PG_FUNCTION_ARGS) +{ + Oid relid; + text *attname; + PartType parttype; + + Relation pathman_config; + Datum values[Natts_pathman_config]; + bool isnull[Natts_pathman_config]; + HeapTuple htup; + CatalogIndexState indstate; + + PathmanInitState init_state; + MemoryContext old_mcxt = CurrentMemoryContext; + + if (PG_ARGISNULL(0)) + elog(ERROR, "parent_relid should not be null"); + + if (PG_ARGISNULL(1)) + elog(ERROR, "attname should not be null"); + + /* Read parameters */ + relid = PG_GETARG_OID(0); + attname = PG_GETARG_TEXT_P(1); + + /* Check that relation exists */ + if (!check_relation_exists(relid)) + elog(ERROR, "Invalid relation %u", relid); + + if (get_attnum(relid, text_to_cstring(attname)) == InvalidAttrNumber) + elog(ERROR, "Relation \"%s\" has no column '%s'", + get_rel_name_or_relid(relid), text_to_cstring(attname)); + + /* Select partitioning type using 'range_interval' */ + parttype = PG_ARGISNULL(2) ? PT_HASH : PT_RANGE; + + /* + * Initialize columns (partrel, attname, parttype, range_interval). + */ + values[Anum_pathman_config_partrel - 1] = ObjectIdGetDatum(relid); + isnull[Anum_pathman_config_partrel - 1] = false; + + values[Anum_pathman_config_attname - 1] = PointerGetDatum(attname); + isnull[Anum_pathman_config_attname - 1] = false; + + values[Anum_pathman_config_parttype - 1] = Int32GetDatum(parttype); + isnull[Anum_pathman_config_parttype - 1] = false; + + values[Anum_pathman_config_range_interval - 1] = PG_GETARG_DATUM(2); + isnull[Anum_pathman_config_range_interval - 1] = PG_ARGISNULL(2); + + /* Insert new row into PATHMAN_CONFIG */ + pathman_config = heap_open(get_pathman_config_relid(), RowExclusiveLock); + htup = heap_form_tuple(RelationGetDescr(pathman_config), values, isnull); + simple_heap_insert(pathman_config, htup); + indstate = CatalogOpenIndexes(pathman_config); + CatalogIndexInsert(indstate, htup); + CatalogCloseIndexes(indstate); + heap_close(pathman_config, RowExclusiveLock); + + /* Now try to create a PartRelationInfo */ + PG_TRY(); + { + /* Some flags might change during refresh attempt */ + save_pathman_init_state(&init_state); + + refresh_pathman_relation_info(relid, parttype, text_to_cstring(attname)); + } + PG_CATCH(); + { + ErrorData *edata; + + /* Switch to the original context & copy edata */ + MemoryContextSwitchTo(old_mcxt); + edata = CopyErrorData(); + FlushErrorState(); + + /* We have to restore all changed flags */ + restore_pathman_init_state(&init_state); + + /* Show error message */ + elog(ERROR, "%s", edata->message); + + FreeErrorData(edata); + } + PG_END_TRY(); + + PG_RETURN_BOOL(true); +} + + /* * NOTE: used for DEBUG, set breakpoint here. */ From 46115f971fbad6c5ea7a588e04227e14dbb05a10 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 26 Aug 2016 02:32:13 +0300 Subject: [PATCH 066/184] remove struct ShmemRelationInfo --- src/relation_info.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/relation_info.h b/src/relation_info.h index e28005059c..8cc2de687d 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -65,19 +65,6 @@ typedef struct hash_proc; /* hash function for 'atttype' */ } PartRelationInfo; -/* - * ShmemRelationInfo - * Per-relation misc information stored in shmem - */ -typedef struct -{ - Oid key; /* partitioned table's Oid */ - - pg_atomic_flag dirty; /* is anyone performing any of the - partitioning-related operations - on this table at the moment? */ -} ShmemRelationInfo; - /* * RelParentInfo * Cached parent of the specified partition. From 951cde131a12be1f534fa897ce45b129a7035046 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 26 Aug 2016 02:47:40 +0300 Subject: [PATCH 067/184] restrict partitioning of temporary tables --- hash.sql | 3 +++ init.sql | 10 ++++++++++ range.sql | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/hash.sql b/hash.sql index 1292083e62..9b508a99b8 100644 --- a/hash.sql +++ b/hash.sql @@ -69,6 +69,9 @@ BEGIN PERFORM @extschema@.partition_data(parent_relid); RETURN partitions_count; + +EXCEPTION WHEN others THEN + RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql SET client_min_messages = WARNING; diff --git a/init.sql b/init.sql index 60d5e645ad..eefa56e15f 100644 --- a/init.sql +++ b/init.sql @@ -88,8 +88,18 @@ $$ DECLARE v_rec RECORD; is_referenced BOOLEAN; + rel_persistence CHAR; BEGIN + /* Ignore temporary tables */ + SELECT relpersistence FROM pg_catalog.pg_class + WHERE oid = p_relation INTO rel_persistence; + + IF rel_persistence = 't'::CHAR THEN + RAISE EXCEPTION 'Temporary table "%" cannot be partitioned', + quote_ident(p_relation::TEXT); + END IF; + IF EXISTS (SELECT * FROM @extschema@.pathman_config WHERE partrel = p_relation) THEN RAISE EXCEPTION 'Relation "%" has already been partitioned', p_relation; diff --git a/range.sql b/range.sql index eb676840cd..001e783d57 100644 --- a/range.sql +++ b/range.sql @@ -875,7 +875,7 @@ BEGIN WHERE oid = p_partition INTO rel_persistence; IF rel_persistence = 't'::CHAR THEN - RAISE EXCEPTION 'Temporary table \"%\" cannot be used as a partition', + RAISE EXCEPTION 'Temporary table "%" cannot be used as a partition', quote_ident(p_partition::TEXT); END IF; From 3007f79dcedbb6d2142078f947ffe5acc9a6f9df Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 26 Aug 2016 11:08:12 +0300 Subject: [PATCH 068/184] auto partitions creation parameter added --- expected/pg_pathman.out | 40 +++++++++++++++++++++++ init.sql | 70 ++++++++++++++++++++++++++++++++--------- sql/pg_pathman.sql | 1 + src/init.c | 7 ++--- src/init.h | 4 ++- src/pathman.h | 4 +-- src/relation_info.c | 11 +++++-- src/relation_info.h | 1 + 8 files changed, 113 insertions(+), 25 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 3a47c0c9d8..14d26dd80c 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1109,6 +1109,46 @@ CREATE TABLE test.range_rel_test2 ( dt TIMESTAMP); SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test2', '2013-01-01'::DATE, '2014-01-01'::DATE); ERROR: Partition must have the exact same structure as parent +/* + * Zero partitions count and adding partitions with specified name + */ +CREATE TABLE test.zero( + id SERIAL PRIMARY KEY, + value INT NOT NULL); +INSERT INTO test.zero SELECT g, g FROM generate_series(1, 100) as g; +SELECT pathman.create_range_partitions('test.zero', 'value', 50, 10, 0); +NOTICE: sequence "zero_seq" does not exist, skipping + create_range_partitions +------------------------- + 0 +(1 row) + +SELECT pathman.append_range_partition('test.zero', 'test.zero_0'); +ERROR: Cannot append to empty partitions set +SELECT pathman.prepend_range_partition('test.zero', 'test.zero_1'); +ERROR: Cannot prepend to empty partitions set +SELECT pathman.add_range_partition('test.zero', 50, 70, 'test.zero_50'); + add_range_partition +--------------------- + test.zero_50 +(1 row) + +SELECT pathman.append_range_partition('test.zero', 'test.zero_appended'); +ERROR: Partition #-1 does not exist (total amount is 1) +SELECT pathman.prepend_range_partition('test.zero', 'test.zero_prepended'); + prepend_range_partition +------------------------- + test.zero_prepended +(1 row) + +SELECT pathman.split_range_partition('test.zero_50', 60, 'test.zero_60'); + split_range_partition +----------------------- + {50,70} +(1 row) + +DROP TABLE test.zero CASCADE; +NOTICE: drop cascades to 3 other objects /* * Check that altering table columns doesn't break trigger */ diff --git a/init.sql b/init.sql index bf9037fc5e..bc5be72fb6 100644 --- a/init.sql +++ b/init.sql @@ -29,7 +29,8 @@ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config ( CREATE TABLE IF NOT EXISTS @extschema@.pathman_config_params ( partrel REGCLASS NOT NULL, - enable_parent BOOLEAN NOT NULL DEFAULT TRUE + enable_parent BOOLEAN NOT NULL DEFAULT TRUE, + auto BOOLEAN NOT NULL DEFAULT TRUE ); CREATE UNIQUE INDEX i_pathman_config_params ON @extschema@.pathman_config_params(partrel); @@ -81,32 +82,73 @@ END $$ LANGUAGE plpgsql; -/* Include parent relation into query plan's for specified relation */ +/* + * Set additional param + */ +CREATE OR REPLACE FUNCTION @extschema@.pathman_set_param( + relation REGCLASS, + param TEXT, + value BOOLEAN) +RETURNS VOID AS +$$ +BEGIN + EXECUTE format( + 'INSERT INTO @extschema@.pathman_config_params (partrel, %1$s) + VALUES ($1, $2) + ON CONFLICT (partrel) DO + UPDATE SET %1$s = $2', + param) + USING + relation, + value; +END +$$ +LANGUAGE plpgsql; + +/* + * Include parent relation into query plan's for specified relation + */ CREATE OR REPLACE FUNCTION @extschema@.enable_parent(relation REGCLASS) RETURNS VOID AS $$ BEGIN - INSERT INTO @extschema@.pathman_config_params values (relation, True) - ON CONFLICT (partrel) DO - UPDATE SET enable_parent = True; + PERFORM @extschema@.pathman_set_param(relation, 'enable_parent', True); +END +$$ +LANGUAGE plpgsql; - -- PERFORM @extschema@.invalidate_relcache(relation::oid); - -- PERFORM @extschema@.on_enable_parent(relation::oid); +/* + * Do not include parent relation into query plan's for specified relation + */ +CREATE OR REPLACE FUNCTION @extschema@.disable_parent(relation REGCLASS) +RETURNS VOID AS +$$ +BEGIN + PERFORM @extschema@.pathman_set_param(relation, 'enable_parent', False); END $$ LANGUAGE plpgsql; -/* Do not include parent relation into query plan's for specified relation */ -CREATE OR REPLACE FUNCTION @extschema@.disable_parent(relation REGCLASS) +/* + * Enable auto partition creation + */ +CREATE OR REPLACE FUNCTION @extschema@.enable_auto(relation REGCLASS) RETURNS VOID AS $$ BEGIN - INSERT INTO @extschema@.pathman_config_params values (relation, False) - ON CONFLICT (partrel) DO - UPDATE SET enable_parent = False; + PERFORM @extschema@.pathman_set_param(relation, 'auto', True); +END +$$ +LANGUAGE plpgsql; - -- PERFORM @extschema@.invalidate_relcache(relation::oid); - -- PERFORM @extschema@.on_disable_parent(relation::oid); +/* + * Disable auto partition creation + */ +CREATE OR REPLACE FUNCTION @extschema@.disable_auto(relation REGCLASS) +RETURNS VOID AS +$$ +BEGIN + PERFORM @extschema@.pathman_set_param(relation, 'auto', False); END $$ LANGUAGE plpgsql; diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index d8dbefcd42..1663f47b5e 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -485,6 +485,7 @@ SELECT pathman.add_range_partition('test.zero', 50, 70, 'test.zero_50'); SELECT pathman.append_range_partition('test.zero', 'test.zero_appended'); SELECT pathman.prepend_range_partition('test.zero', 'test.zero_prepended'); SELECT pathman.split_range_partition('test.zero_50', 60, 'test.zero_60'); +DROP TABLE test.zero CASCADE; /* * Check that altering table columns doesn't break trigger diff --git a/src/init.c b/src/init.c index 8818df453c..f74bab7428 100644 --- a/src/init.c +++ b/src/init.c @@ -570,7 +570,7 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, * Return 'enable_parent' parameter of relation */ bool -read_enable_parent_parameter(Oid relid) +read_pathman_params(Oid relid, Datum *values, bool *isnull) { Relation rel; HeapScanDesc scan; @@ -590,12 +590,9 @@ read_enable_parent_parameter(Oid relid) if ((htup = heap_getnext(scan, ForwardScanDirection)) != NULL) { - Datum values[Natts_pathman_config_params]; - bool isnull[Natts_pathman_config_params]; - /* Extract data if necessary */ heap_deform_tuple(htup, RelationGetDescr(rel), values, isnull); - result = values[Anum_pathman_config_params_enable_parent - 1]; + result = true; } /* Clean resources */ diff --git a/src/init.h b/src/init.h index fc21d16907..8444137555 100644 --- a/src/init.h +++ b/src/init.h @@ -47,6 +47,8 @@ bool pathman_config_contains_relation(Oid relid, bool *isnull, TransactionId *xmin); -bool read_enable_parent_parameter(Oid relid); +bool read_pathman_params(Oid relid, + Datum *values, + bool *isnull); #endif diff --git a/src/pathman.h b/src/pathman.h index 2414b39976..606f474bf5 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -57,10 +57,10 @@ * Definitions for the "pathman_config_params" table */ #define PATHMAN_CONFIG_PARAMS "pathman_config_params" -// #define PATHMAN_CONFIG_PARAMS_INDEX "i_pathman_config_params" -#define Natts_pathman_config_params 2 +#define Natts_pathman_config_params 3 #define Anum_pathman_config_params_partrel 1 /* primary key */ #define Anum_pathman_config_params_enable_parent 2 /* include parent into plan */ +#define Anum_pathman_config_params_auto 3 /* auto partitions creation */ /* * Cache current PATHMAN_CONFIG relid (set during load_config()). diff --git a/src/relation_info.c b/src/relation_info.c index e511d6d7c3..71e4bc61fc 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -74,6 +74,8 @@ refresh_pathman_relation_info(Oid relid, i; bool found; PartRelationInfo *prel; + Datum param_values[Natts_pathman_config_params]; + bool param_isnull[Natts_pathman_config_params]; prel = (PartRelationInfo *) hash_search(partitioned_rels, (const void *) &relid, @@ -158,10 +160,13 @@ refresh_pathman_relation_info(Oid relid, pfree(prel_children); /* - * Read additional parameter ('enable_parent' is the only one at - * the moment) + * Read additional parameters ('enable_parent' and 'auto' at the moment) */ - prel->enable_parent = read_enable_parent_parameter(relid); + if (read_pathman_params(relid, param_values, param_isnull)) + { + prel->enable_parent = param_values[Anum_pathman_config_params_enable_parent - 1]; + prel->auto_partition = param_values[Anum_pathman_config_params_auto - 1]; + } /* We've successfully built a cache entry */ prel->valid = true; diff --git a/src/relation_info.h b/src/relation_info.h index 90f12fa573..b18f32a7c4 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -48,6 +48,7 @@ typedef struct Oid key; /* partitioned table's Oid */ bool valid; /* is this entry valid? */ bool enable_parent; /* include parent to the plan */ + bool auto_partition; /* auto partition creation */ uint32 children_count; Oid *children; /* Oids of child partitions */ From d7665c3800b01d60eb2c231922b9c6b9af63befe Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 26 Aug 2016 11:42:15 +0300 Subject: [PATCH 069/184] enable/disable auto partition creation --- expected/pg_pathman.out | 17 ++++++++++++++++- sql/pg_pathman.sql | 5 +++++ src/partition_filter.c | 18 +++++++++++++----- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 14d26dd80c..c9c2702120 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1256,8 +1256,23 @@ SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; 74 | Sun Mar 15 00:00:00 2015 (1 row) +SELECT pathman.disable_auto('test.range_rel'); + disable_auto +-------------- + +(1 row) + +INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); +ERROR: There is no suitable partition for key 'Mon Jun 01 00:00:00 2015' +SELECT pathman.enable_auto('test.range_rel'); + enable_auto +------------- + +(1 row) + +INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); DROP TABLE test.range_rel CASCADE; -NOTICE: drop cascades to 16 other objects +NOTICE: drop cascades to 20 other objects SELECT partrel, attname, parttype, range_interval FROM pathman.pathman_config; partrel | attname | parttype | range_interval ---------+---------+----------+---------------- diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 1663f47b5e..2589e516c2 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -525,6 +525,11 @@ SELECT * FROM test.range_rel WHERE dt = '2014-12-15'; EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; +SELECT pathman.disable_auto('test.range_rel'); +INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); +SELECT pathman.enable_auto('test.range_rel'); +INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); + DROP TABLE test.range_rel CASCADE; SELECT partrel, attname, parttype, range_interval FROM pathman.pathman_config; diff --git a/src/partition_filter.c b/src/partition_filter.c index db168f4cdf..9e48807fda 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -204,12 +204,20 @@ partition_filter_exec(CustomScanState *node) elog(ERROR, "PartitionFilter selected more than one partition"); else if (nparts == 0) { - selected_partid = create_partitions(state->partitioned_table, - state->temp_const.constvalue, - state->temp_const.consttype); + if (prel->auto_partition) + { + selected_partid = create_partitions(state->partitioned_table, + state->temp_const.constvalue, + state->temp_const.consttype); - /* get_pathman_relation_info() will refresh this entry */ - invalidate_pathman_relation_info(state->partitioned_table, NULL); + /* get_pathman_relation_info() will refresh this entry */ + invalidate_pathman_relation_info(state->partitioned_table, NULL); + } + else + elog(ERROR, + "There is no suitable partition for key '%s'", + datum_to_cstring(state->temp_const.constvalue, + state->temp_const.consttype)); } else selected_partid = parts[0]; From 5992123b3b1190209b799e62e887aa04526b7d10 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 26 Aug 2016 12:23:21 +0300 Subject: [PATCH 070/184] tests fixed --- expected/pg_pathman.out | 8 ++++++-- sql/pg_pathman.sql | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 09cc5519c6..bbf2844572 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1134,7 +1134,11 @@ SELECT pathman.add_range_partition('test.zero', 50, 70, 'test.zero_50'); (1 row) SELECT pathman.append_range_partition('test.zero', 'test.zero_appended'); -ERROR: Partition #-1 does not exist (total amount is 1) + append_range_partition +------------------------ + test.zero_appended +(1 row) + SELECT pathman.prepend_range_partition('test.zero', 'test.zero_prepended'); prepend_range_partition ------------------------- @@ -1148,7 +1152,7 @@ SELECT pathman.split_range_partition('test.zero_50', 60, 'test.zero_60'); (1 row) DROP TABLE test.zero CASCADE; -NOTICE: drop cascades to 3 other objects +NOTICE: drop cascades to 4 other objects /* * Check that altering table columns doesn't break trigger */ diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 2589e516c2..1cf5118d4e 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -531,7 +531,7 @@ SELECT pathman.enable_auto('test.range_rel'); INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); DROP TABLE test.range_rel CASCADE; -SELECT partrel, attname, parttype, range_interval FROM pathman.pathman_config; +SELECT * FROM pathman.pathman_config; /* Check overlaps */ CREATE TABLE test.num_range_rel ( @@ -576,7 +576,7 @@ SELECT pathman.split_range_partition('test."RangeRel_1"', '2015-01-01'::DATE); SELECT pathman.drop_partitions('test."RangeRel"'); SELECT pathman.create_partitions_from_range('test."RangeRel"', 'dt', '2015-01-01'::DATE, '2015-01-05'::DATE, '1 day'::INTERVAL); DROP TABLE test."RangeRel" CASCADE; -SELECT partrel, attname, parttype, range_interval FROM pathman.pathman_config; +SELECT * FROM pathman.pathman_config; CREATE TABLE test."RangeRel" ( id SERIAL PRIMARY KEY, dt TIMESTAMP NOT NULL, From 0084ae69dc7bde8260bed6a326f7cba277badaae Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 26 Aug 2016 14:28:50 +0300 Subject: [PATCH 071/184] concurrent partitioning function renamed --- init.sql | 2 +- src/worker.c | 4 ++-- .../{concurrent_partitioning_test.py => partitioning_test.py} | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) rename tests/{concurrent_partitioning_test.py => partitioning_test.py} (97%) diff --git a/init.sql b/init.sql index 50dc6d04e0..b978f4cf0c 100644 --- a/init.sql +++ b/init.sql @@ -167,7 +167,7 @@ RETURNS TABLE ( CREATE OR REPLACE VIEW @extschema@.pathman_active_workers AS SELECT * FROM @extschema@.active_workers(); -CREATE OR REPLACE FUNCTION @extschema@.partition_data_worker(relation regclass) +CREATE OR REPLACE FUNCTION @extschema@.partition_data_concurrent(relation regclass) RETURNS VOID AS 'pg_pathman' LANGUAGE C STRICT; CREATE OR REPLACE FUNCTION @extschema@.stop_worker(relation regclass) diff --git a/src/worker.c b/src/worker.c index a2f6720869..b2a2f92230 100644 --- a/src/worker.c +++ b/src/worker.c @@ -37,7 +37,7 @@ static void create_partitions_bg_worker_main(Datum main_arg); static void partition_data_bg_worker_main(Datum main_arg); static void handle_sigterm(SIGNAL_ARGS); -PG_FUNCTION_INFO_V1( partition_data_worker ); +PG_FUNCTION_INFO_V1( partition_data_concurrent ); PG_FUNCTION_INFO_V1( active_workers ); PG_FUNCTION_INFO_V1( stop_worker ); @@ -374,7 +374,7 @@ create_partitions_bg_worker_main(Datum main_arg) * immediately */ Datum -partition_data_worker( PG_FUNCTION_ARGS ) +partition_data_concurrent( PG_FUNCTION_ARGS ) { Oid relid = PG_GETARG_OID(0); int empty_slot_idx = -1; diff --git a/tests/concurrent_partitioning_test.py b/tests/partitioning_test.py similarity index 97% rename from tests/concurrent_partitioning_test.py rename to tests/partitioning_test.py index 5374d09c4b..fbf0e62bf6 100644 --- a/tests/concurrent_partitioning_test.py +++ b/tests/partitioning_test.py @@ -13,7 +13,7 @@ import time -class ConcurrentTest(unittest.TestCase): +class PartitioningTests(unittest.TestCase): def setUp(self): self.setup_cmd = [ @@ -48,7 +48,7 @@ def test_concurrent(self): node.start() self.init_test_data(node) - node.psql('postgres', 'select partition_data_worker(\'abc\')') + node.psql('postgres', 'select partition_data_concurrent(\'abc\')') while True: # update some rows to check for deadlocks From 94228873ea5564aaeae3aa6b35268422eb2d571e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 26 Aug 2016 14:54:39 +0300 Subject: [PATCH 072/184] add new TODO regarding the spotted trigger-related bug --- src/partition_filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/partition_filter.c b/src/partition_filter.c index db168f4cdf..3a48d65291 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -238,6 +238,7 @@ partition_filter_end(CustomScanState *node) hash_seq_init(&stat, state->result_rels_table); while ((rri_handle = (ResultRelInfoHolder *) hash_seq_search(&stat)) != NULL) { + /* FIXME: add ResultRelInfos to estate->es_result_relations to fix triggers */ ExecCloseIndices(rri_handle->resultRelInfo); heap_close(rri_handle->resultRelInfo->ri_RelationDesc, RowExclusiveLock); From 67d8a5d55ebb44fc1955999e0ac638d30916dc85 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 26 Aug 2016 15:39:00 +0300 Subject: [PATCH 073/184] introduce new subsystem called 'xact_handling' & function get_pathman_relation_info_after_lock(), extract extract_binary_interval_from_text() from create_partitions_internal(), improve BGW startup --- Makefile | 2 +- init.sql | 36 ++++++++++---- src/init.c | 6 +-- src/pathman.h | 4 +- src/pg_pathman.c | 114 +++++++++++++++++++++++++++----------------- src/pl_funcs.c | 42 ++++++++-------- src/relation_info.c | 17 +++++++ src/relation_info.h | 2 + src/worker.c | 15 +++++- src/xact_handling.c | 34 +++++++++++++ src/xact_handling.h | 28 +++++++++++ 11 files changed, 216 insertions(+), 84 deletions(-) create mode 100644 src/xact_handling.c create mode 100644 src/xact_handling.h diff --git a/Makefile b/Makefile index b9d0226275..bfc1bc8886 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ MODULE_big = pg_pathman OBJS = src/init.o src/relation_info.o src/utils.o src/partition_filter.o src/runtimeappend.o \ src/runtime_merge_append.o src/pg_pathman.o src/dsm_array.o src/rangeset.o src/pl_funcs.o \ - src/worker.o src/hooks.o src/nodes_common.o $(WIN32RES) + src/worker.o src/hooks.o src/nodes_common.o src/xact_handling.o $(WIN32RES) EXTENSION = pg_pathman EXTVERSION = 0.1 diff --git a/init.sql b/init.sql index eefa56e15f..510ba5697b 100644 --- a/init.sql +++ b/init.sql @@ -317,13 +317,16 @@ SET pg_pathman.enable_partitionfilter = off; +/* + * Create DDL trigger to call pathman_ddl_trigger_func(). + */ CREATE EVENT TRIGGER pathman_ddl_trigger ON sql_drop EXECUTE PROCEDURE @extschema@.pathman_ddl_trigger_func(); /* - * Attach partitioned table + * Attach a previously partitioned table */ CREATE OR REPLACE FUNCTION @extschema@.add_to_pathman_config( parent_relid REGCLASS, @@ -356,6 +359,14 @@ CREATE OR REPLACE FUNCTION @extschema@.get_parent_of_partition(REGCLASS) RETURNS REGCLASS AS 'pg_pathman', 'get_parent_of_partition_pl' LANGUAGE C STRICT; +/* + * Checks if attribute is nullable + */ +CREATE OR REPLACE FUNCTION @extschema@.is_attribute_nullable( + REGCLASS, TEXT) +RETURNS BOOLEAN AS 'pg_pathman', 'is_attribute_nullable' +LANGUAGE C STRICT; + /* * Check if regclass is date or timestamp */ @@ -365,19 +376,18 @@ RETURNS BOOLEAN AS 'pg_pathman', 'is_date_type' LANGUAGE C STRICT; /* - * Checks if attribute is nullable + * Returns attribute type name for relation */ -CREATE OR REPLACE FUNCTION @extschema@.is_attribute_nullable( +CREATE OR REPLACE FUNCTION @extschema@.get_attribute_type_name( REGCLASS, TEXT) -RETURNS BOOLEAN AS 'pg_pathman', 'is_attribute_nullable' +RETURNS TEXT AS 'pg_pathman', 'get_attribute_type_name' LANGUAGE C STRICT; /* - * Returns attribute type name for relation + * Get parent of pg_pathman's partition. */ -CREATE OR REPLACE FUNCTION @extschema@.get_attribute_type_name( - REGCLASS, TEXT) -RETURNS TEXT AS 'pg_pathman', 'get_attribute_type_name' +CREATE OR REPLACE FUNCTION @extschema@.get_parent_of_partition(REGCLASS) +RETURNS REGCLASS AS 'pg_pathman', 'get_parent_of_partition_pl' LANGUAGE C STRICT; /* @@ -406,6 +416,16 @@ CREATE OR REPLACE FUNCTION @extschema@.build_update_trigger_func_name( RETURNS TEXT AS 'pg_pathman', 'build_update_trigger_func_name' LANGUAGE C STRICT; + +/* + * Lock partitioned relation to restrict concurrent modification of partitioning scheme. + */ + CREATE OR REPLACE FUNCTION @extschema@.lock_partitioned_relation( + REGCLASS) + RETURNS VOID AS 'pg_pathman', 'lock_partitioned_relation' + LANGUAGE C STRICT; + + /* * DEBUG: Place this inside some plpgsql fuction and set breakpoint. */ diff --git a/src/init.c b/src/init.c index 043307d860..d569774e6f 100644 --- a/src/init.c +++ b/src/init.c @@ -285,10 +285,8 @@ init_shmem_config(void) */ if (!IsUnderPostmaster) { - /* Initialize locks */ - pmstate->load_config_lock = LWLockAssign(); - pmstate->dsm_init_lock = LWLockAssign(); - pmstate->edit_partitions_lock = LWLockAssign(); + /* NOTE: dsm_array is redundant, hence the commented code */ + /* pmstate->dsm_init_lock = LWLockAssign(); */ } } } diff --git a/src/pathman.h b/src/pathman.h index 9cd7789fc1..5f3ac9e88c 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -69,9 +69,7 @@ Oid get_pathman_config_relid(void); */ typedef struct PathmanState { - LWLock *dsm_init_lock, - *load_config_lock, - *edit_partitions_lock; + LWLock *dsm_init_lock; /* unused */ } PathmanState; diff --git a/src/pg_pathman.c b/src/pg_pathman.c index dd4d82def4..115df108f2 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -16,6 +16,7 @@ #include "partition_filter.h" #include "runtimeappend.h" #include "runtime_merge_append.h" +#include "xact_handling.h" #include "postgres.h" #include "access/heapam.h" @@ -60,6 +61,9 @@ static Node *wrapper_make_expression(WrapperNode *wrap, int index, bool *alwaysT static bool disable_inheritance_subselect_walker(Node *node, void *context); /* "Partition creation"-related functions */ +static Datum extract_binary_interval_from_text(Datum interval_text, + Oid part_atttype, + Oid *interval_type); static bool spawn_partitions(Oid partitioned_rel, Datum value, Datum leading_bound, @@ -128,14 +132,15 @@ _PG_init(void) if (!process_shared_preload_libraries_in_progress) { - elog(ERROR, "Pathman module must be initialized in postmaster. " + elog(ERROR, "pg_pathman module must be initialized by Postmaster. " "Put the following line to configuration file: " "shared_preload_libraries='pg_pathman'"); } /* Request additional shared resources */ RequestAddinShmemSpace(estimate_pathman_shmem_size()); - RequestAddinLWLocks(3); + + /* NOTE: we don't need LWLocks now. RequestAddinLWLocks(1); */ /* Assign pg_pathman's initial state */ temp_init_state.initialization_needed = true; @@ -791,6 +796,57 @@ spawn_partitions(Oid partitioned_rel, /* parent's Oid */ return spawned; } +/* + * Convert interval from TEXT to binary form using partitioned column's type. + */ +static Datum +extract_binary_interval_from_text(Datum interval_text, /* interval as TEXT */ + Oid part_atttype, /* partitioned column's type */ + Oid *interval_type) /* returned value */ +{ + Datum interval_binary; + const char *interval_cstring; + + interval_cstring = TextDatumGetCString(interval_text); + + /* If 'part_atttype' is a *date type*, cast 'range_interval' to INTERVAL */ + if (is_date_type_internal(part_atttype)) + { + int32 interval_typmod = PATHMAN_CONFIG_interval_typmod; + + /* Convert interval from CSTRING to internal form */ + interval_binary = DirectFunctionCall3(interval_in, + CStringGetDatum(interval_cstring), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(interval_typmod)); + if (interval_type) + *interval_type = INTERVALOID; + } + /* Otherwise cast it to the partitioned column's type */ + else + { + HeapTuple htup; + Oid typein_proc = InvalidOid; + + htup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(part_atttype)); + if (HeapTupleIsValid(htup)) + { + typein_proc = ((Form_pg_type) GETSTRUCT(htup))->typinput; + ReleaseSysCache(htup); + } + else + elog(ERROR, "Cannot find input function for type %u", part_atttype); + + /* Convert interval from CSTRING to 'prel->atttype' */ + interval_binary = OidFunctionCall1(typein_proc, + CStringGetDatum(interval_cstring)); + if (interval_type) + *interval_type = part_atttype; + } + + return interval_binary; +} + /* * Append partitions (if needed) and return Oid of the partition to contain value. * @@ -808,9 +864,6 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) Datum values[Natts_pathman_config]; bool isnull[Natts_pathman_config]; - prel = get_pathman_relation_info(relid); - shout_if_prel_is_invalid(relid, prel, PT_RANGE); - /* Get both PartRelationInfo & PATHMAN_CONFIG contents for this relation */ if (pathman_config_contains_relation(relid, values, isnull, NULL)) { @@ -820,54 +873,27 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) Oid interval_type = InvalidOid; Datum interval_binary, /* assigned 'width' of a single partition */ interval_text; - const char *interval_cstring; FmgrInfo interval_type_cmp; - /* Fill the FmgrInfo struct with a cmp(value, part_attribute) function */ - fill_type_cmp_fmgr_info(&interval_type_cmp, value_type, prel->atttype); - - /* Convert interval from TEXT to CSTRING */ - interval_text = values[Anum_pathman_config_range_interval - 1]; - interval_cstring = TextDatumGetCString(interval_text); + /* Fetch PartRelationInfo by 'relid' */ + prel = get_pathman_relation_info(relid); + shout_if_prel_is_invalid(relid, prel, PT_RANGE); /* Read max & min range values from PartRelationInfo */ min_rvalue = prel->ranges[0].min; max_rvalue = prel->ranges[PrelLastChild(prel)].max; - /* If this is a *date type*, cast 'range_interval' to INTERVAL */ - if (is_date_type_internal(value_type)) - { - int32 interval_typmod = PATHMAN_CONFIG_interval_typmod; - - /* Convert interval from CSTRING to internal form */ - interval_binary = DirectFunctionCall3(interval_in, - CStringGetDatum(interval_cstring), - ObjectIdGetDatum(InvalidOid), - Int32GetDatum(interval_typmod)); - interval_type = INTERVALOID; - } - /* Otherwise cast it to the partitioned column's type */ - else - { - HeapTuple htup; - Oid typein_proc = InvalidOid; + /* Retrieve interval as TEXT from tuple */ + interval_text = values[Anum_pathman_config_range_interval - 1]; - htup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(prel->atttype)); - if (HeapTupleIsValid(htup)) - { - typein_proc = ((Form_pg_type) GETSTRUCT(htup))->typinput; - ReleaseSysCache(htup); - } - else - elog(ERROR, "Cannot find input function for type %u", - prel->atttype); - - /* Convert interval from CSTRING to 'prel->atttype' */ - interval_binary = OidFunctionCall1(typein_proc, - CStringGetDatum(interval_cstring)); - interval_type = prel->atttype; - } + /* Convert interval to binary representation */ + interval_binary = extract_binary_interval_from_text(interval_text, + prel->atttype, + &interval_type); + + /* Fill the FmgrInfo struct with a cmp(value, part_attribute) function */ + fill_type_cmp_fmgr_info(&interval_type_cmp, value_type, prel->atttype); if (SPI_connect() != SPI_OK_CONNECT) elog(ERROR, "Could not connect using SPI"); diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 151bd65009..d8059f887e 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -12,6 +12,7 @@ #include "init.h" #include "utils.h" #include "relation_info.h" +#include "xact_handling.h" #include "catalog/indexing.h" #include "commands/sequence.h" @@ -48,6 +49,7 @@ PG_FUNCTION_INFO_V1( build_update_trigger_name ); PG_FUNCTION_INFO_V1( is_date_type ); PG_FUNCTION_INFO_V1( is_attribute_nullable ); PG_FUNCTION_INFO_V1( add_to_pathman_config ); +PG_FUNCTION_INFO_V1( lock_partitioned_relation ); PG_FUNCTION_INFO_V1( debug_capture ); @@ -211,29 +213,10 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) PG_RETURN_NULL(); else { - Oid child_oid = InvalidOid; - - /* FIXME: useless double-checked lock (no new data) */ - LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); - LWLockAcquire(pmstate->edit_partitions_lock, LW_EXCLUSIVE); - - /* - * Check if someone else has already created partition. - */ - search_state = search_range_partition_eq(value, &cmp_func, prel, - &found_rentry); - if (search_state == SEARCH_RANGEREL_FOUND) - { - LWLockRelease(pmstate->load_config_lock); - LWLockRelease(pmstate->edit_partitions_lock); - - PG_RETURN_OID(found_rentry.child_oid); - } - - child_oid = create_partitions(parent_oid, value, value_type); + Oid child_oid = create_partitions(parent_oid, value, value_type); - LWLockRelease(pmstate->load_config_lock); - LWLockRelease(pmstate->edit_partitions_lock); + /* get_pathman_relation_info() will refresh this entry */ + invalidate_pathman_relation_info(parent_oid, NULL); PG_RETURN_OID(child_oid); } @@ -680,6 +663,21 @@ add_to_pathman_config(PG_FUNCTION_ARGS) } +/* + * Acquire appropriate lock on a partitioned relation. + */ +Datum +lock_partitioned_relation(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + /* Lock partitioned relation till transaction's end */ + xact_lock_partitioned_rel(relid); + + PG_RETURN_VOID(); +} + + /* * NOTE: used for DEBUG, set breakpoint here. */ diff --git a/src/relation_info.c b/src/relation_info.c index 05fd61c07c..0c33f980dc 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -11,6 +11,7 @@ #include "relation_info.h" #include "init.h" #include "utils.h" +#include "xact_handling.h" #include "access/htup_details.h" #include "access/xact.h" @@ -240,6 +241,22 @@ get_pathman_relation_info(Oid relid) return prel; } +/* Acquire lock on a table and try to get PartRelationInfo */ +const PartRelationInfo * +get_pathman_relation_info_after_lock(Oid relid, bool unlock_if_not_found) +{ + const PartRelationInfo *prel; + + /* Restrict concurrent partition creation (it's dangerous) */ + xact_lock_partitioned_rel(relid); + + prel = get_pathman_relation_info(relid); + if (!prel && unlock_if_not_found) + xact_unlock_partitioned_rel(relid); + + return prel; +} + /* Remove PartRelationInfo from local cache. */ void remove_pathman_relation_info(Oid relid) diff --git a/src/relation_info.h b/src/relation_info.h index 8cc2de687d..8c91d592df 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -121,6 +121,8 @@ const PartRelationInfo *refresh_pathman_relation_info(Oid relid, void invalidate_pathman_relation_info(Oid relid, bool *found); void remove_pathman_relation_info(Oid relid); const PartRelationInfo *get_pathman_relation_info(Oid relid); +const PartRelationInfo *get_pathman_relation_info_after_lock(Oid relid, + bool unlock_if_not_found); void delay_pathman_shutdown(void); void delay_invalidation_parent_rel(Oid parent); diff --git a/src/worker.c b/src/worker.c index 1b14c3eb99..7e4cfff7c9 100644 --- a/src/worker.c +++ b/src/worker.c @@ -37,6 +37,8 @@ static const char *create_partitions_bgw = "CreatePartitionsWorker"; */ typedef struct { + Oid userid; /* connect as a specified user */ + Oid result; /* target partition */ Oid dbid; Oid partitioned_table; @@ -135,6 +137,9 @@ create_partitions_bg_worker_segment(Oid relid, Datum value, Oid value_type) /* Initialize BGW args */ args = (PartitionArgs *) dsm_segment_address(segment); + + args->userid = GetAuthenticatedUserId(); + args->result = InvalidOid; args->dbid = MyDatabaseId; args->partitioned_table = relid; @@ -256,6 +261,9 @@ bg_worker_main(Datum main_arg) PartitionArgs *args; Datum value; + /* FIXME: add signal handler */ + BackgroundWorkerUnblockSignals(); + /* Create resource owner */ CurrentResourceOwner = ResourceOwnerCreate(NULL, create_partitions_bgw); @@ -270,8 +278,9 @@ bg_worker_main(Datum main_arg) args = dsm_segment_address(segment); /* Establish connection and start transaction */ - BackgroundWorkerInitializeConnectionByOid(args->dbid, InvalidOid); + BackgroundWorkerInitializeConnectionByOid(args->dbid, args->userid); + /* Start new transaction (syscache access etc.) */ StartTransactionCommand(); /* Initialize pg_pathman's local config */ @@ -283,17 +292,19 @@ bg_worker_main(Datum main_arg) args->value_byval, (const void *) args->value); +/* Print 'arg->value' for debug purposes */ #ifdef USE_ASSERT_CHECKING elog(LOG, "%s: arg->value is '%s' [%u]", create_partitions_bgw, DebugPrintDatum(value, args->value_type), MyProcPid); #endif - /* Create partitions */ + /* Create partitions and save the Oid of the last one */ args->result = create_partitions_internal(args->partitioned_table, value, /* unpacked Datum */ args->value_type); + /* Finish transaction in an appropriate way */ if (args->result == InvalidOid) AbortCurrentTransaction(); else diff --git a/src/xact_handling.c b/src/xact_handling.c new file mode 100644 index 0000000000..4d3ac8a877 --- /dev/null +++ b/src/xact_handling.c @@ -0,0 +1,34 @@ +/* ------------------------------------------------------------------------ + * + * xact_handling.c + * Transaction-specific locks and other function + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + +#include "xact_handling.h" + +#include "postgres.h" +#include "storage/lmgr.h" + + +/* + * Lock certain partitioned relation to disable concurrent access. + */ +void +xact_lock_partitioned_rel(Oid relid) +{ + /* Share exclusive lock conflicts with itself */ + LockRelationOid(relid, ShareUpdateExclusiveLock); +} + +/* + * Unlock partitioned relation. + */ +void +xact_unlock_partitioned_rel(Oid relid) +{ + UnlockRelationOid(relid, ShareUpdateExclusiveLock); +} diff --git a/src/xact_handling.h b/src/xact_handling.h new file mode 100644 index 0000000000..edff5c2191 --- /dev/null +++ b/src/xact_handling.h @@ -0,0 +1,28 @@ +/* ------------------------------------------------------------------------ + * + * xact_handling.h + * Transaction-specific locks and other function + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + +#ifndef XACT_HANDLING +#define XACT_HANDLING + +#include "pathman.h" + + +/* + * List of partitioned relations locked by this backend (plain Oids). + */ +extern List *locked_by_me; + +/* + * Transaction locks. + */ +void xact_lock_partitioned_rel(Oid relid); +void xact_unlock_partitioned_rel(Oid relid); + +#endif From cceaae185460214e676ee6eb658515d0419e4d6a Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 26 Aug 2016 18:13:25 +0300 Subject: [PATCH 074/184] documentation update --- README.md | 105 ++++++++++++++++++++++++++++++++++++------------------ init.sql | 2 +- 2 files changed, 72 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 708e51498b..fc41d3e8bb 100644 --- a/README.md +++ b/README.md @@ -62,77 +62,93 @@ Done! Now it's time to setup your partitioning schemes. ### Partition creation ```plpgsql -create_hash_partitions(relation TEXT, +create_hash_partitions(relation REGCLASS, attribute TEXT, partitions_count INTEGER) ``` Performs HASH partitioning for `relation` by integer key `attribute`. Creates `partitions_count` partitions and trigger on INSERT. All the data will be automatically copied from the parent to partitions. ```plpgsql -create_range_partitions(relation TEXT, - attribute TEXT, - start_value ANYELEMENT, - interval ANYELEMENT, - premake INTEGER DEFAULT NULL) +create_range_partitions(relation REGCLASS, + attribute TEXT, + start_value ANYELEMENT, + interval ANYELEMENT, + count INTEGER DEFAULT NULL + partition_data BOOLEAN DEFAULT true) + +create_range_partitions(relation TEXT, + attribute TEXT, + start_value ANYELEMENT, + interval INTERVAL, + count INTEGER DEFAULT NULL, + partition_data BOOLEAN DEFAULT true) +``` +Performs RANGE partitioning for `relation` by partitioning key `attribute`. `start_value` argument specifies initial value, `interval` sets the range of values in a single partition, `count` is the number of premade partitions (if not set then pathman tries to determine it based on attribute values). If `partition_data` is `true` then all the data will be automatically copied from the parent table to partitions. Note that data migration may took a while to finish and the table will be locked until transaction commits. See `partition_data_concurrent()` for a lock-free way to migrate data. -create_range_partitions(relation TEXT, - attribute TEXT, - start_value ANYELEMENT, - interval INTERVAL, - premake INTEGER DEFAULT NULL) +```plpgsql +create_partitions_from_range(relation REGCLASS, + attribute TEXT, + start_value ANYELEMENT, + end_value ANYELEMENT, + interval ANYELEMENT, + partition_data BOOLEAN DEFAULT true) + +create_partitions_from_range(relation REGCLASS, + attribute TEXT, + start_value ANYELEMENT, + end_value ANYELEMENT, + interval INTERVAL, + partition_data BOOLEAN DEFAULT true) ``` -Performs RANGE partitioning for `relation` by partitioning key `attribute`. `start_value` argument specifies initial value, `interval` sets the range of values in a single partition, `premake` is the number of premade partitions (if not set then pathman tries to determine it based on attribute values). All the data will be automatically copied from the parent to partitions. +Performs RANGE-partitioning from specified range for `relation` by partitioning key `attribute`. -```plpgsql -create_partitions_from_range(relation TEXT, - attribute TEXT, - start_value ANYELEMENT, - end_value ANYELEMENT, - interval ANYELEMENT) +### Data migration -create_partitions_from_range(relation TEXT, - attribute TEXT, - start_value ANYELEMENT, - end_value ANYELEMENT, - interval INTERVAL) +```plpgsql +partition_data_concurrent(relation REGCLASS) ``` -Performs RANGE-partitioning from specified range for `relation` by partitioning key `attribute`. Data will be copied to partitions as well. +Starts a background worker to copy data from parent table to partitions. The worker utilize short transactions to copy small bunches of data (up to 10K rows per transaction) and thus doesn't significantly interfere with users activity. ### Triggers ```plpgsql -create_hash_update_trigger(parent TEXT) +create_hash_update_trigger(parent REGCLASS) ``` Creates the trigger on UPDATE for HASH partitions. The UPDATE trigger isn't created by default because of the overhead. It's useful in cases when the key attribute might change. ```plpgsql -create_range_update_trigger(parent TEXT) +create_range_update_trigger(parent REGCLASS) ``` Same as above, but for a RANGE-partitioned table. ### Post-creation partition management ```plpgsql -split_range_partition(partition TEXT, value ANYELEMENT) +split_range_partition(partition REGCLASS, + value ANYELEMENT, + partition_name TEXT DEFAULT NULL) ``` Split RANGE `partition` in two by `value`. ```plpgsql -merge_range_partitions(partition1 TEXT, partition2 TEXT) +merge_range_partitions(partition1 REGCLASS, partition2 REGCLASS) ``` Merge two adjacent RANGE partitions. First, data from `partition2` is copied to `partition1`, then `partition2` is removed. ```plpgsql -append_range_partition(p_relation TEXT) +append_range_partition(p_relation REGCLASS, + partition_name TEXT DEFAULT NULL) ``` Append new RANGE partition. ```plpgsql -prepend_range_partition(p_relation TEXT) +prepend_range_partition(p_relation REGCLASS, + partition_name TEXT DEFAULT NULL) ``` Prepend new RANGE partition. ```plpgsql -add_range_partition(relation TEXT, - start_value ANYELEMENT, - end_value ANYELEMENT) +add_range_partition(relation REGCLASS, + start_value ANYELEMENT, + end_value ANYELEMENT, + partition_name TEXT DEFAULT NULL) ``` Create new RANGE partition for `relation` with specified range bounds. @@ -155,10 +171,31 @@ detach_range_partition(partition TEXT) Detach partition from the existing RANGE-partitioned relation. ```plpgsql -disable_partitioning(relation TEXT) +disable_pathman_for(relation TEXT) ``` Permanently disable `pg_pathman` partitioning mechanism for the specified parent table and remove the insert trigger if it exists. All partitions and data remain unchanged. +```plpgsql +drop_partitions(parent REGCLASS, + delete_data BOOLEAN DEFAULT FALSE) +``` +Drop partitions of the `parent` table. If `delete_data` is `false` then the data is copied to the parent table first. Default is `false`. + + +### Additional parameters + +```plpgsql +enable_parent(relation REGCLASS) +disable_parent(relation REGCLASS) +``` +Include/exclude parent table into/from query plan. In original PostgreSQL planner parent table is always included into query plan even if it's empty which can lead to additional overhead. You can use `disable_parent()` if you are never going to use parent table as a storage. Default value depends on the `partition_data` parameter that was specified during initial partitioning in `create_range_partitions()` or `create_partitions_from_range()` functions. If the `partition_data` parameter was `true` then all data have already been migrated to partitions and parent table disabled. Otherwise it is enabled. + +```plpgsql +enable_auto(relation REGCLASS) +disable_auto(relation REGCLASS) +``` +Enable/disable auto partition propagation (only for RANGE partitioning). It is enabled by default. + ## Custom plan nodes `pg_pathman` provides a couple of [custom plan nodes](https://wiki.postgresql.org/wiki/CustomScanAPI) which aim to reduce execution time, namely: diff --git a/init.sql b/init.sql index b978f4cf0c..9c4f9473bc 100644 --- a/init.sql +++ b/init.sql @@ -281,7 +281,7 @@ LANGUAGE plpgsql; /* * Disable pathman partitioning for specified relation */ -CREATE OR REPLACE FUNCTION @extschema@.disable_partitioning( +CREATE OR REPLACE FUNCTION @extschema@.disable_pathman_for( parent_relid REGCLASS) RETURNS VOID AS $$ From 9ed7d5dae50b167be4d24d639ee06e98db5427fd Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 26 Aug 2016 18:19:13 +0300 Subject: [PATCH 075/184] documentation update for hash partitioning --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fc41d3e8bb..ba20f797b6 100644 --- a/README.md +++ b/README.md @@ -64,9 +64,10 @@ Done! Now it's time to setup your partitioning schemes. ```plpgsql create_hash_partitions(relation REGCLASS, attribute TEXT, - partitions_count INTEGER) + partitions_count INTEGER, + partition_name TEXT DEFAULT NULL) ``` -Performs HASH partitioning for `relation` by integer key `attribute`. Creates `partitions_count` partitions and trigger on INSERT. All the data will be automatically copied from the parent to partitions. +Performs HASH partitioning for `relation` by integer key `attribute`. The `partitions_count` parameter specifies the number of partitions to create; it cannot be changed afterwards. If `partition_data` is `true` then all the data will be automatically copied from the parent table to partitions. Note that data migration may took a while to finish and the table will be locked until transaction commits. See `partition_data_concurrent()` for a lock-free way to migrate data. ```plpgsql create_range_partitions(relation REGCLASS, @@ -83,7 +84,7 @@ create_range_partitions(relation TEXT, count INTEGER DEFAULT NULL, partition_data BOOLEAN DEFAULT true) ``` -Performs RANGE partitioning for `relation` by partitioning key `attribute`. `start_value` argument specifies initial value, `interval` sets the range of values in a single partition, `count` is the number of premade partitions (if not set then pathman tries to determine it based on attribute values). If `partition_data` is `true` then all the data will be automatically copied from the parent table to partitions. Note that data migration may took a while to finish and the table will be locked until transaction commits. See `partition_data_concurrent()` for a lock-free way to migrate data. +Performs RANGE partitioning for `relation` by partitioning key `attribute`. `start_value` argument specifies initial value, `interval` sets the range of values in a single partition, `count` is the number of premade partitions (if not set then pathman tries to determine it based on attribute values). ```plpgsql create_partitions_from_range(relation REGCLASS, @@ -123,7 +124,7 @@ Same as above, but for a RANGE-partitioned table. ```plpgsql split_range_partition(partition REGCLASS, value ANYELEMENT, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL,) ``` Split RANGE `partition` in two by `value`. From 11be0feded8af020ca93e842e9589152abce55d3 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sun, 28 Aug 2016 04:04:36 +0300 Subject: [PATCH 076/184] ConcurrentPartWorker connects to DB using a given role, new macros for pathman_concurrent_part_tasks, fixes --- init.sql | 3 +- src/pathman_workers.c | 184 ++++++++++++++++++++++++++++-------------- src/pathman_workers.h | 37 +++++++-- src/utils.h | 4 +- 4 files changed, 158 insertions(+), 70 deletions(-) diff --git a/init.sql b/init.sql index 34eef6948c..100ca3bd0b 100644 --- a/init.sql +++ b/init.sql @@ -158,6 +158,7 @@ LANGUAGE plpgsql; */ CREATE OR REPLACE FUNCTION @extschema@.show_concurrent_part_tasks() RETURNS TABLE ( + userid REGROLE, pid INT, dbid OID, relid REGCLASS, @@ -188,7 +189,7 @@ RETURNS BOOL AS 'pg_pathman', 'stop_concurrent_part_task' LANGUAGE C STRICT; * Copy rows to partitions concurrently. */ CREATE OR REPLACE FUNCTION @extschema@._partition_data_concurrent( - p_relation regclass, + p_relation REGCLASS, p_min ANYELEMENT DEFAULT NULL::text, p_max ANYELEMENT DEFAULT NULL::text, p_limit INT DEFAULT NULL, diff --git a/src/pathman_workers.c b/src/pathman_workers.c index 5ff5579b08..136306057a 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -18,6 +18,7 @@ #include "pathman_workers.h" #include "relation_info.h" #include "utils.h" +#include "xact_handling.h" #include "access/htup_details.h" #include "access/xact.h" @@ -31,6 +32,7 @@ #include "storage/latch.h" #include "utils/builtins.h" #include "utils/datum.h" +#include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/typcache.h" #include "utils/resowner.h" @@ -351,6 +353,17 @@ bgw_main_spawn_partitions(Datum main_arg) DebugPrintDatum(value, args->value_type), MyProcPid); #endif + /* Check again if there's a conflicting lock */ + if (xact_conflicting_lock_exists(args->partitioned_table)) + { + elog(LOG, "%s: there's a conflicting lock on relation \"%s\"", + spawn_partitions_bgw, + get_rel_name_or_relid(args->partitioned_table)); + + dsm_detach(segment); + return; /* exit quickly */ + } + /* Create partitions and save the Oid of the last one */ args->result = create_partitions_internal(args->partitioned_table, value, /* unpacked Datum */ @@ -378,24 +391,11 @@ bgw_main_spawn_partitions(Datum main_arg) static void bgw_main_concurrent_part(Datum main_arg) { - ConcurrentPartSlot *args; - Oid types[2] = { OIDOID, INT4OID }; - Datum vals[2]; - bool nulls[2] = { false, false }; int rows; - int slot_idx = DatumGetInt32(main_arg); - MemoryContext worker_context = CurrentMemoryContext; - int failures_count = 0; bool failed; + int failures_count = 0; char *sql = NULL; - - /* Create resource owner */ - CurrentResourceOwner = ResourceOwnerCreate(NULL, "PartitionDataWorker"); - - args = &concurrent_part_slots[slot_idx]; - args->pid = MyProcPid; - vals[0] = args->relid; - vals[1] = 10000; + ConcurrentPartSlot *part_slot; /* Establish signal handlers before unblocking signals. */ pqsignal(SIGTERM, handle_sigterm); @@ -403,20 +403,39 @@ bgw_main_concurrent_part(Datum main_arg) /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); + /* Create resource owner */ + CurrentResourceOwner = ResourceOwnerCreate(NULL, concurrent_part_bgw); + + /* Update concurrent part slot */ + part_slot = &concurrent_part_slots[DatumGetInt32(main_arg)]; + part_slot->pid = MyProcPid; + /* Establish connection and start transaction */ - BackgroundWorkerInitializeConnectionByOid(args->dbid, InvalidOid); + BackgroundWorkerInitializeConnectionByOid(part_slot->dbid, part_slot->userid); + /* Initialize pg_pathman's local config */ + StartTransactionCommand(); + bg_worker_load_config(concurrent_part_bgw); + CommitTransactionCommand(); + + /* Do the job */ do { + Oid types[2] = { OIDOID, INT4OID }; + Datum vals[2] = { part_slot->relid, part_slot->batch_size }; + bool nulls[2] = { false, false }; + + /* Reset loop variables */ failed = false; rows = 0; + + /* Start new transaction (syscache access etc.) */ StartTransactionCommand(); - bg_worker_load_config("PartitionDataWorker"); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); - /* Do some preparation within the first iteration */ + /* Prepare the query if needed */ if (sql == NULL) { MemoryContext oldcontext; @@ -425,78 +444,104 @@ bgw_main_concurrent_part(Datum main_arg) * Allocate as SQL query in top memory context because current * context will be destroyed after transaction finishes */ - oldcontext = MemoryContextSwitchTo(worker_context); + oldcontext = MemoryContextSwitchTo(TopMemoryContext); sql = psprintf("SELECT %s._partition_data_concurrent($1::oid, p_limit:=$2)", - get_namespace_name(get_pathman_schema())); + get_namespace_name(get_pathman_schema())); MemoryContextSwitchTo(oldcontext); } + /* Exec ret = _partition_data_concurrent() */ PG_TRY(); { int ret; bool isnull; ret = SPI_execute_with_args(sql, 2, types, vals, nulls, false, 0); - if (ret > 0) + if (ret == SPI_OK_SELECT) { TupleDesc tupdesc = SPI_tuptable->tupdesc; HeapTuple tuple = SPI_tuptable->vals[0]; - Assert(SPI_processed == 1); + Assert(SPI_processed == 1); /* there should be 1 result at most */ rows = DatumGetInt32(SPI_getbinval(tuple, tupdesc, 1, &isnull)); + + Assert(!isnull); /* ... and ofc it must not be NULL */ } } PG_CATCH(); { ErrorData *error; + EmitErrorReport(); + error = CopyErrorData(); - elog(LOG, "Worker error: %s", error->message); + elog(LOG, "%s: %s", concurrent_part_bgw, error->message); FlushErrorState(); + FreeErrorData(error); /* * The most common exception we can catch here is a deadlock with * concurrent user queries. Check that attempts count doesn't exceed * some reasonable value */ - if (100 <= failures_count++) + if (failures_count++ > PART_WORKER_MAX_ATTEMPTS) { - pfree(sql); - args->worker_status = WS_FREE; + /* Mark slot as FREE */ + part_slot->worker_status = WS_FREE; + elog(LOG, - "The concurrent partitioning worker exiting because the " - "maximum attempts count exceeded. See the error message below"); - exit(1); + "Concurrent partitioning worker has canceled the task because " + "maximum amount of attempts (%d) had been exceeded. " + "See the error message below", + PART_WORKER_MAX_ATTEMPTS); + + return; /* exit quickly */ } + + /* Set 'failed' flag */ failed = true; } PG_END_TRY(); SPI_finish(); PopActiveSnapshot(); + if (failed) { - /* abort transaction and sleep for a second */ +#ifdef USE_ASSERT_CHECKING + elog(DEBUG2, "%s: could not relocate batch, total: %lu [%u]", + concurrent_part_bgw, part_slot->total_rows, MyProcPid); +#endif + + /* Abort transaction and sleep for a second */ AbortCurrentTransaction(); - DirectFunctionCall1(pg_sleep, Float8GetDatum(1)); + DirectFunctionCall1(pg_sleep, Float8GetDatum(part_slot->sleep_time)); } else { - /* Reset failures counter and commit transaction */ + /* Commit transaction and reset 'failures_count' */ CommitTransactionCommand(); failures_count = 0; - args->total_rows += rows; + + /* Add rows to total_rows */ + part_slot->total_rows += rows; + +#ifdef USE_ASSERT_CHECKING + elog(DEBUG2, "%s: relocated %d rows, total: %lu [%u]", + concurrent_part_bgw, rows, part_slot->total_rows, MyProcPid); +#endif } - /* If other backend requested to stop worker then quit */ - if (args->worker_status == WS_STOPPING) + /* If other backend requested to stop us, quit */ + if (part_slot->worker_status == WS_STOPPING) break; } - while(rows > 0 || failed); /* do while there is still rows to relocate */ + while(rows > 0 || failed); /* do while there's still rows to be relocated */ + /* Reclaim the resources */ pfree(sql); - args->worker_status = WS_FREE; + part_slot->worker_status = WS_FREE; } @@ -513,6 +558,8 @@ bgw_main_concurrent_part(Datum main_arg) Datum partition_table_concurrently(PG_FUNCTION_ARGS) { +#define tostr(str) ( #str ) + Oid relid = PG_GETARG_OID(0); ConcurrentPartSlot *my_slot = NULL; int empty_slot_idx = -1; @@ -550,7 +597,9 @@ partition_table_concurrently(PG_FUNCTION_ARGS) elog(ERROR, "No empty worker slots found"); /* Initialize concurrent part slot */ - InitConcurrentPartSlot(my_slot, WS_WORKING, MyDatabaseId, relid); + InitConcurrentPartSlot(my_slot, GetAuthenticatedUserId(), + WS_WORKING, MyDatabaseId, relid, + 1000, 1.0); /* Start worker (we should not wait) */ start_bg_worker(concurrent_part_bgw, @@ -560,8 +609,9 @@ partition_table_concurrently(PG_FUNCTION_ARGS) /* Tell user everything's fine */ elog(NOTICE, - "Worker started. You can stop it with the following command: " - "select stop_concurrent_part_task('%s');", + "Worker started. You can stop it " + "with the following command: select %s('%s');", + tostr(stop_concurrent_part_task), /* convert function's name to literal */ get_rel_name(relid)); PG_RETURN_VOID(); @@ -594,12 +644,20 @@ show_concurrent_part_tasks_internal(PG_FUNCTION_ARGS) userctx->cur_idx = 0; /* Create tuple descriptor */ - tupdesc = CreateTemplateTupleDesc(5, false); - TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid", INT4OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 2, "dbid", OIDOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relid", REGCLASSOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 4, "processed", INT4OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 5, "status", TEXTOID, -1, 0); + tupdesc = CreateTemplateTupleDesc(Natts_pathman_cp_tasks, false); + + TupleDescInitEntry(tupdesc, Anum_pathman_cp_tasks_userid, + "userid", REGROLEOID, -1, 0); + TupleDescInitEntry(tupdesc, Anum_pathman_cp_tasks_pid, + "pid", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, Anum_pathman_cp_tasks_dbid, + "dbid", OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, Anum_pathman_cp_tasks_relid, + "relid", REGCLASSOID, -1, 0); + TupleDescInitEntry(tupdesc, Anum_pathman_cp_tasks_processed, + "processed", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, Anum_pathman_cp_tasks_status, + "status", TEXTOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); funcctx->user_fctx = (void *) userctx; @@ -610,35 +668,39 @@ show_concurrent_part_tasks_internal(PG_FUNCTION_ARGS) funcctx = SRF_PERCALL_SETUP(); userctx = (active_workers_cxt *) funcctx->user_fctx; - /* - * Iterate through worker slots - */ + /* Iterate through worker slots */ for (i = userctx->cur_idx; i < PART_WORKER_SLOTS; i++) { - if (concurrent_part_slots[i].worker_status != WS_FREE) + ConcurrentPartSlot *cur_slot = &concurrent_part_slots[i]; + + if (cur_slot->worker_status != WS_FREE) { HeapTuple tuple; - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; + Datum values[Natts_pathman_cp_tasks]; + bool isnull[Natts_pathman_cp_tasks] = { 0, 0, 0, 0, 0, 0 }; - values[0] = concurrent_part_slots[i].pid; - values[1] = concurrent_part_slots[i].dbid; - values[2] = concurrent_part_slots[i].relid; - values[3] = concurrent_part_slots[i].total_rows; + values[Anum_pathman_cp_tasks_userid - 1] = cur_slot->userid; + values[Anum_pathman_cp_tasks_pid - 1] = cur_slot->pid; + values[Anum_pathman_cp_tasks_dbid - 1] = cur_slot->dbid; + values[Anum_pathman_cp_tasks_relid - 1] = cur_slot->relid; + values[Anum_pathman_cp_tasks_processed - 1] = cur_slot->total_rows; /* Now build a status string */ - switch(concurrent_part_slots[i].worker_status) + switch(cur_slot->worker_status) { case WS_WORKING: - values[4] = PointerGetDatum(pstrdup("working")); + values[Anum_pathman_cp_tasks_status - 1] = + PointerGetDatum(cstring_to_text("working")); break; case WS_STOPPING: - values[4] = PointerGetDatum(pstrdup("stopping")); + values[Anum_pathman_cp_tasks_status - 1] = + PointerGetDatum(cstring_to_text("stopping")); break; default: - values[4] = PointerGetDatum(pstrdup("[unknown]")); + values[Anum_pathman_cp_tasks_status - 1] = + PointerGetDatum(cstring_to_text("[unknown]")); } /* Form output tuple */ @@ -670,7 +732,7 @@ stop_concurrent_part_task(PG_FUNCTION_ARGS) concurrent_part_slots[i].dbid == MyDatabaseId) { concurrent_part_slots[i].worker_status = WS_STOPPING; - elog(NOTICE, "Worker will stop after current batch's finished"); + elog(NOTICE, "Worker will stop after it finishes current batch"); PG_RETURN_BOOL(true); } diff --git a/src/pathman_workers.h b/src/pathman_workers.h index 7b8d8d4ac3..38266cddac 100644 --- a/src/pathman_workers.h +++ b/src/pathman_workers.h @@ -25,13 +25,13 @@ */ typedef struct { - Oid userid; /* connect as a specified user */ + Oid userid; /* connect as a specified user */ - Oid result; /* target partition */ - Oid dbid; /* database which stores 'partitioned_table' */ + Oid result; /* target partition */ + Oid dbid; /* database which stores 'partitioned_table' */ Oid partitioned_table; - /* Type will help us to work with Datum */ + /* Needed to decode Datum from 'values' */ Oid value_type; Size value_size; bool value_byval; @@ -46,6 +46,8 @@ typedef struct */ typedef struct { + Oid userid; /* connect as a specified user */ + enum { WS_FREE = 0, /* slot is empty */ @@ -58,19 +60,42 @@ typedef struct Oid dbid; /* database which contains relation 'relid' */ Oid relid; /* table to be partitioned concurrently */ uint64 total_rows; /* total amount of rows processed */ + + int32 batch_size; /* number of rows in a batch */ + float8 sleep_time; /* how long should we sleep in case of error? */ } ConcurrentPartSlot; -#define InitConcurrentPartSlot(slot, w_status, db, rel) \ +#define InitConcurrentPartSlot(slot, user, w_status, db, rel, batch_sz, sleep_t) \ do { \ + (slot)->userid = (user); \ (slot)->worker_status = (w_status); \ + (slot)->pid = 0; \ (slot)->dbid = (db); \ (slot)->relid = (rel); \ (slot)->total_rows = 0; \ + (slot)->batch_size = (batch_sz); \ + (slot)->sleep_time = (sleep_t); \ } while (0) /* Number of worker slots for concurrent partitioning */ -#define PART_WORKER_SLOTS 10 +#define PART_WORKER_SLOTS 10 + +/* Max number of attempts per batch */ +#define PART_WORKER_MAX_ATTEMPTS 100 + + +/* + * Definitions for the "pathman_concurrent_part_tasks" view + */ +#define PATHMAN_CONCURRENT_PART_TASKS "pathman_concurrent_part_tasks" +#define Natts_pathman_cp_tasks 6 +#define Anum_pathman_cp_tasks_userid 1 +#define Anum_pathman_cp_tasks_pid 2 +#define Anum_pathman_cp_tasks_dbid 3 +#define Anum_pathman_cp_tasks_relid 4 +#define Anum_pathman_cp_tasks_processed 5 +#define Anum_pathman_cp_tasks_status 6 /* diff --git a/src/utils.h b/src/utils.h index 179062c47a..cd8419c3b5 100644 --- a/src/utils.h +++ b/src/utils.h @@ -8,8 +8,8 @@ * ------------------------------------------------------------------------ */ -#ifndef UTILS_H -#define UTILS_H +#ifndef PATHMAN_UTILS_H +#define PATHMAN_UTILS_H #include "pathman.h" From f6cd9d5c7996d1fc3026f19312ea0446b76a2a93 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 29 Aug 2016 00:03:38 +0300 Subject: [PATCH 077/184] remove useless includes, remove PathmanRange & PathmanHash impl --- src/hooks.c | 5 +--- src/init.c | 7 +++-- src/pathman.h | 2 -- src/pl_funcs.c | 72 ++++---------------------------------------------- 4 files changed, 9 insertions(+), 77 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 8c19c11d5b..5101872c97 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -10,9 +10,9 @@ #include "hooks.h" #include "init.h" +#include "partition_filter.h" #include "runtimeappend.h" #include "runtime_merge_append.h" -#include "partition_filter.h" #include "utils.h" #include "miscadmin.h" @@ -447,13 +447,10 @@ pathman_planner_hook(Query *parse, int cursorOptions, ParamListInfo boundParams) void pathman_post_parse_analysis_hook(ParseState *pstate, Query *query) { - elog(DEBUG2, "Called parse hook [%u]", MyProcPid); - /* Invoke original hook if needed */ if (post_parse_analyze_hook_next) post_parse_analyze_hook_next(pstate, query); - /* Finish delayed invalidation jobs */ if (IsPathmanReady()) finish_delayed_invalidation(); diff --git a/src/init.c b/src/init.c index ff347d16f5..966daad88a 100644 --- a/src/init.c +++ b/src/init.c @@ -25,18 +25,17 @@ #include "catalog/pg_inherits.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_type.h" -#include "executor/spi.h" #include "miscadmin.h" #include "optimizer/clauses.h" #include "utils/datum.h" #include "utils/inval.h" -#include "utils/fmgroids.h" -#include "utils/syscache.h" #include "utils/builtins.h" -#include "utils/typcache.h" +#include "utils/fmgroids.h" #include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/typcache.h" /* Help user in case of emergency */ diff --git a/src/pathman.h b/src/pathman.h index 45cd11e213..2c66589560 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -51,8 +51,6 @@ /* type modifier (typmod) for 'range_interval' */ #define PATHMAN_CONFIG_interval_typmod -1 -#define PATHMAN_CONFIG_ID_SEQ "pathman_config_id_seq" - /* * Definitions for the "pathman_config_params" table */ diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 9a03e2de96..f4af705181 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -8,10 +8,10 @@ * ------------------------------------------------------------------------ */ -#include "pathman.h" #include "init.h" -#include "utils.h" +#include "pathman.h" #include "relation_info.h" +#include "utils.h" #include "xact_handling.h" #include "access/htup_details.h" @@ -20,13 +20,13 @@ #include "catalog/indexing.h" #include "commands/sequence.h" #include "miscadmin.h" +#include "utils/array.h" #include "utils/builtins.h" +#include +#include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/syscache.h" #include "utils/typcache.h" -#include "utils/array.h" -#include "utils/memutils.h" -#include /* declarations */ @@ -56,33 +56,6 @@ PG_FUNCTION_INFO_V1( lock_partitioned_relation ); PG_FUNCTION_INFO_V1( debug_capture ); -/* pathman_range type */ -typedef struct PathmanRange -{ - Oid type_oid; - bool by_val; - RangeEntry range; -} PathmanRange; - -typedef struct PathmanHash -{ - Oid child_oid; - uint32 hash; -} PathmanHash; - -typedef struct PathmanRangeListCtxt -{ - Oid type_oid; - bool by_val; - RangeEntry *ranges; - int nranges; - int pos; -} PathmanRangeListCtxt; - -PG_FUNCTION_INFO_V1( pathman_range_in ); -PG_FUNCTION_INFO_V1( pathman_range_out ); - - static void on_partitions_created_internal(Oid partitioned_table, bool add_callbacks); static void on_partitions_updated_internal(Oid partitioned_table, bool add_callbacks); static void on_partitions_removed_internal(Oid partitioned_table, bool add_callbacks); @@ -729,41 +702,6 @@ lock_partitioned_relation(PG_FUNCTION_ARGS) } -Datum -pathman_range_in(PG_FUNCTION_ARGS) -{ - elog(ERROR, "Not implemented"); -} - -Datum -pathman_range_out(PG_FUNCTION_ARGS) -{ - PathmanRange *rng = (PathmanRange *) PG_GETARG_POINTER(0); - char *result; - char *left, - *right; - Oid outputfunc; - bool typisvarlena; - - getTypeOutputInfo(rng->type_oid, &outputfunc, &typisvarlena); - - left = OidOutputFunctionCall( - outputfunc, - rng->by_val ? - (Datum) rng->range.min : - PointerGetDatum(&rng->range.min)); - - right = OidOutputFunctionCall( - outputfunc, - rng->by_val ? - (Datum) rng->range.max : - PointerGetDatum(&rng->range.max)); - - result = psprintf("[%s: %s)", left, right); - PG_RETURN_CSTRING(result); -} - - /* * NOTE: used for DEBUG, set breakpoint here. */ From 0042c46fac968e961eaecef7b04ebec86e0b0906 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 29 Aug 2016 04:03:16 +0300 Subject: [PATCH 078/184] minor fixes & improvements for PL/pgSQL code --- expected/pg_pathman.out | 2 +- hash.sql | 4 +- init.sql | 4 +- range.sql | 214 +++++++++++++++++----------------------- 4 files changed, 96 insertions(+), 128 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index bbf2844572..b1240fadff 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -133,7 +133,7 @@ SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DAT ERROR: Partitioning key 'dt' must be NOT NULL ALTER TABLE test.range_rel ALTER COLUMN dt SET NOT NULL; SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DATE, '1 month'::INTERVAL, 2); -ERROR: Not enough partitions to fit all the values of 'dt' +ERROR: Not enough partitions to fit all values of 'dt' SELECT pathman.create_range_partitions('test.range_rel', 'DT', '2015-01-01'::DATE, '1 month'::INTERVAL); NOTICE: sequence "range_rel_seq" does not exist, skipping create_range_partitions diff --git a/hash.sql b/hash.sql index b5cc85a56b..f474e82747 100644 --- a/hash.sql +++ b/hash.sql @@ -53,7 +53,7 @@ BEGIN EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', v_child_relname, - @extschema@.get_schema_qualified_name(parent_relid)); + parent_relid::TEXT); EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (@extschema@.get_hash_part_idx(%s(%s), %s) = %s)', @@ -141,7 +141,7 @@ BEGIN attr := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; IF attr IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); + RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; END IF; SELECT string_agg(attname, ', '), diff --git a/init.sql b/init.sql index 100ca3bd0b..70dbafef6d 100644 --- a/init.sql +++ b/init.sql @@ -268,7 +268,7 @@ BEGIN /* Create partitions and copy rest of the data */ EXECUTE format('WITH part_data AS (DELETE FROM ONLY %1$s RETURNING *) INSERT INTO %1$s SELECT * FROM part_data', - @extschema@.get_schema_qualified_name(parent_relid)); + parent_relid::TEXT); /* Get number of inserted rows */ GET DIAGNOSTICS p_total = ROW_COUNT; @@ -317,7 +317,7 @@ BEGIN IF rel_persistence = 't'::CHAR THEN RAISE EXCEPTION 'Temporary table "%" cannot be partitioned', - quote_ident(p_relation::TEXT); + p_relation::TEXT; END IF; IF EXISTS (SELECT * FROM @extschema@.pathman_config diff --git a/range.sql b/range.sql index 789d7aa0e9..7d52264dbe 100644 --- a/range.sql +++ b/range.sql @@ -50,8 +50,6 @@ DECLARE v_rows_count INTEGER; v_max p_start_value%TYPE; v_cur_value p_start_value%TYPE := p_start_value; - v_plain_relname TEXT; - v_plain_schema TEXT; i INTEGER; BEGIN @@ -94,17 +92,15 @@ BEGIN pg_typeof(p_start_value)); END IF; - SELECT * INTO v_plain_schema, v_plain_relname - FROM @extschema@.get_plain_schema_and_relname(parent_relid); - /* Create sequence for child partitions names */ - PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); + PERFORM @extschema@.create_or_replace_sequence(schema, relname) + FROM @extschema@.get_plain_schema_and_relname(parent_relid); /* Insert new entry to pathman config */ INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) - VALUES (parent_relid, p_attribute, 2, p_interval::text); + VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); - /* create first partition */ + /* Create first partition */ FOR i IN 1..p_count LOOP EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s)', @@ -117,7 +113,7 @@ BEGIN /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(parent_relid); - /* Copy data */ + /* Relocate data if asked to */ IF partition_data = true THEN PERFORM @extschema@.disable_parent(parent_relid); PERFORM @extschema@.partition_data(parent_relid); @@ -148,8 +144,6 @@ DECLARE v_rows_count INTEGER; v_max p_start_value%TYPE; v_cur_value p_start_value%TYPE := p_start_value; - v_plain_schema TEXT; - v_plain_relname TEXT; i INTEGER; BEGIN @@ -194,15 +188,13 @@ BEGIN p_start_value + p_interval * p_count); END IF; - SELECT * INTO v_plain_schema, v_plain_relname - FROM @extschema@.get_plain_schema_and_relname(parent_relid); - /* Create sequence for child partitions names */ - PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); + PERFORM @extschema@.create_or_replace_sequence(schema, relname) + FROM @extschema@.get_plain_schema_and_relname(parent_relid); /* Insert new entry to pathman config */ INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) - VALUES (parent_relid, p_attribute, 2, p_interval::text); + VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); /* create first partition */ FOR i IN 1..p_count @@ -216,7 +208,7 @@ BEGIN /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(parent_relid); - /* Copy data */ + /* Relocate data if asked to */ IF partition_data = true THEN PERFORM @extschema@.disable_parent(parent_relid); PERFORM @extschema@.partition_data(parent_relid); @@ -244,9 +236,7 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( RETURNS INTEGER AS $$ DECLARE - v_plain_schema TEXT; - v_plain_relname TEXT; - part_count INTEGER := 0; + part_count INTEGER := 0; BEGIN PERFORM @extschema@.validate_relname(parent_relid); @@ -257,21 +247,19 @@ BEGIN RAISE EXCEPTION 'Interval must be positive'; END IF; - SELECT * INTO v_plain_schema, v_plain_relname - FROM @extschema@.get_plain_schema_and_relname(parent_relid); - - /* Create sequence for child partitions names */ - PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); - /* Check boundaries */ PERFORM @extschema@.check_boundaries(parent_relid, p_attribute, p_start_value, p_end_value); + /* Create sequence for child partitions names */ + PERFORM @extschema@.create_or_replace_sequence(schema, relname) + FROM @extschema@.get_plain_schema_and_relname(parent_relid); + /* Insert new entry to pathman config */ INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) - VALUES (parent_relid, p_attribute, 2, p_interval::text); + VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); WHILE p_start_value <= p_end_value LOOP @@ -285,7 +273,7 @@ BEGIN /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(parent_relid); - /* Copy data */ + /* Relocate data if asked to */ IF partition_data = true THEN PERFORM @extschema@.disable_parent(parent_relid); PERFORM @extschema@.partition_data(parent_relid); @@ -313,30 +301,26 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( RETURNS INTEGER AS $$ DECLARE - v_plain_schema TEXT; - v_plain_relname TEXT; - part_count INTEGER := 0; + part_count INTEGER := 0; BEGIN PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); - SELECT * INTO v_plain_schema, v_plain_relname - FROM @extschema@.get_plain_schema_and_relname(parent_relid); - - /* Create sequence for child partitions names */ - PERFORM @extschema@.create_or_replace_sequence(v_plain_schema, v_plain_relname); - /* Check boundaries */ PERFORM @extschema@.check_boundaries(parent_relid, p_attribute, p_start_value, p_end_value); + /* Create sequence for child partitions names */ + PERFORM @extschema@.create_or_replace_sequence(schema, relname) + FROM @extschema@.get_plain_schema_and_relname(parent_relid); + /* Insert new entry to pathman config */ INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) - VALUES (parent_relid, p_attribute, 2, p_interval::text); + VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); WHILE p_start_value <= p_end_value LOOP @@ -351,7 +335,7 @@ BEGIN /* Notify backend about changes */ PERFORM @extschema@.on_create_partitions(parent_relid); - /* Copy data */ + /* Relocate data if asked to */ IF partition_data = true THEN PERFORM @extschema@.disable_parent(parent_relid); PERFORM @extschema@.partition_data(parent_relid); @@ -379,15 +363,16 @@ $$ DECLARE v_min p_start_value%TYPE; v_max p_start_value%TYPE; - v_count INTEGER; + v_count BIGINT; BEGIN /* Get min and max values */ - EXECUTE format('SELECT count(*), min(%s), max(%s) FROM %s WHERE NOT %s IS NULL', - p_attribute, p_attribute, parent_relid::text, p_attribute) + EXECUTE format('SELECT count(*), min(%1$s), max(%1$s) + FROM %2$s WHERE NOT %1$s IS NULL', + p_attribute, parent_relid::TEXT) INTO v_count, v_min, v_max; - /* check that column has NULL values */ + /* Check if column has NULL values */ IF v_count > 0 AND (v_min IS NULL OR v_max IS NULL) THEN RAISE EXCEPTION '''%'' column contains NULL values', p_attribute; END IF; @@ -399,8 +384,8 @@ BEGIN END IF; /* Check upper boundary */ - IF p_end_value <= v_max THEN - RAISE EXCEPTION 'Not enough partitions to fit all the values of ''%''', + IF p_end_value <= v_max THEN + RAISE EXCEPTION 'Not enough partitions to fit all values of ''%''', p_attribute; END IF; END @@ -421,7 +406,6 @@ DECLARE v_child_relname TEXT; v_plain_child_relname TEXT; v_attname TEXT; - v_sql TEXT; v_plain_schema TEXT; v_plain_relname TEXT; v_child_relname_exists BOOL; @@ -432,7 +416,7 @@ BEGIN WHERE partrel = parent_relid; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::text); + RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; END IF; SELECT * INTO v_plain_schema, v_plain_relname @@ -461,21 +445,17 @@ BEGIN v_child_relname := partition_name; END IF; - EXECUTE format( - 'CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', - v_child_relname, - @extschema@.get_schema_qualified_name(parent_relid)); - - EXECUTE format( - 'ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', - v_child_relname, - @extschema@.build_check_constraint_name( - v_child_relname::regclass, - v_attname), - @extschema@.build_range_condition( - v_attname, - p_start_value, - p_end_value)); + EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', + v_child_relname, + parent_relid::TEXT); + + EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', + v_child_relname, + @extschema@.build_check_constraint_name(v_child_relname::REGCLASS, + v_attname), + @extschema@.build_range_condition(v_attname, + p_start_value, + p_end_value)); RETURN v_child_relname; END @@ -511,8 +491,7 @@ BEGIN INTO v_attname, v_part_type; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', - quote_ident(v_parent_relid::text); + RAISE EXCEPTION 'Table "%" is not partitioned', v_parent_relid::TEXT; END IF; /* Check if this is a RANGE partition */ @@ -534,17 +513,16 @@ BEGIN END IF; /* Create new partition */ - v_new_partition := @extschema@.create_single_range_partition( - @extschema@.get_schema_qualified_name(v_parent_relid), - p_value, - p_range[2], - partition_name); + v_new_partition := @extschema@.create_single_range_partition(v_parent_relid, + p_value, + p_range[2], + partition_name); /* Copy data */ v_cond := @extschema@.build_range_condition(v_attname, p_value, p_range[2]); EXECUTE format('WITH part_data AS (DELETE FROM %s WHERE %s RETURNING *) INSERT INTO %s SELECT * FROM part_data', - p_partition, + p_partition::TEXT, v_cond, v_new_partition); @@ -553,11 +531,11 @@ BEGIN v_check_name := @extschema@.build_check_constraint_name(p_partition, v_attname); EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s', - p_partition::text, + p_partition::TEXT, v_check_name); EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', - p_partition::text, + p_partition::TEXT, v_check_name, v_cond); @@ -577,8 +555,8 @@ CREATE OR REPLACE FUNCTION @extschema@.merge_range_partitions( RETURNS VOID AS $$ DECLARE - v_parent_relid1 OID; - v_parent_relid2 OID; + v_parent_relid1 REGCLASS; + v_parent_relid2 REGCLASS; v_attname TEXT; v_part_type INTEGER; v_atttype TEXT; @@ -601,8 +579,7 @@ BEGIN INTO v_attname, v_part_type; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', - quote_ident(v_parent_relid1::regclass::text); + RAISE EXCEPTION 'Table "%" is not partitioned', v_parent_relid1::TEXT; END IF; /* Check if this is a RANGE partition */ @@ -640,7 +617,6 @@ RETURNS ANYARRAY AS $$ DECLARE v_attname TEXT; - v_cond TEXT; v_check_name TEXT; BEGIN @@ -649,8 +625,7 @@ BEGIN INTO v_attname; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', - quote_ident(parent_relid::text); + RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; END IF; /* @@ -669,12 +644,12 @@ BEGIN /* Drop constraint on first partition... */ v_check_name := @extschema@.build_check_constraint_name(partition1, v_attname); EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s', - partition1::text, + partition1::TEXT, v_check_name); /* and create a new one */ EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', - partition1::text, + partition1::TEXT, v_check_name, @extschema@.build_range_condition(v_attname, least(p_range[1], p_range[3]), @@ -683,11 +658,11 @@ BEGIN /* Copy data from second partition to the first one */ EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) INSERT INTO %s SELECT * FROM part_data', - partition2::text, - partition1::text); + partition2::TEXT, + partition1::TEXT); /* Remove second partition */ - EXECUTE format('DROP TABLE %s', partition2::text); + EXECUTE format('DROP TABLE %s', partition2::TEXT); END $$ LANGUAGE plpgsql; @@ -713,7 +688,7 @@ BEGIN INTO v_attname, v_interval; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); + RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; END IF; v_atttype := @extschema@.get_attribute_type_name(parent_relid, v_attname); @@ -750,7 +725,7 @@ CREATE OR REPLACE FUNCTION @extschema@.append_partition_internal( RETURNS TEXT AS $$ DECLARE - v_part_name TEXT; + v_part_name TEXT; BEGIN IF @extschema@.partitions_count(parent_relid) = 0 THEN @@ -806,7 +781,7 @@ BEGIN INTO v_attname, v_interval; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); + RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; END IF; v_atttype := @extschema@.get_attribute_type_name(parent_relid, v_attname); @@ -890,6 +865,7 @@ RETURNS TEXT AS $$ DECLARE v_part_name TEXT; + BEGIN IF p_start_value >= p_end_value THEN RAISE EXCEPTION 'Failed to create partition: p_start_value is greater than p_end_value'; @@ -902,11 +878,10 @@ BEGIN END IF; /* Create new partition */ - v_part_name :=@extschema@.create_single_range_partition( - parent_relid, - p_start_value, - p_end_value, - partition_name); + v_part_name := @extschema@.create_single_range_partition(parent_relid, + p_start_value, + p_end_value, + partition_name); PERFORM @extschema@.on_update_partitions(parent_relid); RETURN v_part_name; @@ -926,20 +901,20 @@ CREATE OR REPLACE FUNCTION @extschema@.drop_range_partition( RETURNS TEXT AS $$ DECLARE - v_part_relid REGCLASS; - v_part_name TEXT := p_partition::TEXT; - v_count INTEGER; + parent_relid REGCLASS; + part_name TEXT; BEGIN - v_part_relid = @extschema@.get_parent_of_partition(p_partition); + parent_relid := @extschema@.get_parent_of_partition(p_partition); + part_name := p_partition::TEXT; /* save the name to be returned */ /* Drop table */ - EXECUTE format('DROP TABLE %s', p_partition::TEXT); + EXECUTE format('DROP TABLE %s', part_name); /* Invalidate cache */ - PERFORM @extschema@.on_update_partitions(v_part_relid); + PERFORM @extschema@.on_update_partitions(parent_relid); - RETURN v_part_name; + RETURN part_name; EXCEPTION WHEN others THEN RAISE EXCEPTION '%', SQLERRM; @@ -960,9 +935,6 @@ RETURNS TEXT AS $$ DECLARE v_attname TEXT; - v_cond TEXT; - v_plain_partname TEXT; - v_plain_schema TEXT; rel_persistence CHAR; BEGIN @@ -972,7 +944,7 @@ BEGIN IF rel_persistence = 't'::CHAR THEN RAISE EXCEPTION 'Temporary table "%" cannot be used as a partition', - quote_ident(p_partition::TEXT); + p_partition::TEXT; END IF; IF @extschema@.check_overlap(parent_relid, p_start_value, p_end_value) THEN @@ -986,19 +958,15 @@ BEGIN /* Set inheritance */ EXECUTE format('ALTER TABLE %s INHERIT %s', p_partition, parent_relid); - /* Set check constraint */ v_attname := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); + RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; END IF; - /* Plain partition name and schema */ - SELECT * INTO v_plain_schema, v_plain_partname - FROM @extschema@.get_plain_schema_and_relname(p_partition); - + /* Set check constraint */ EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', - p_partition, + p_partition::TEXT, @extschema@.build_check_constraint_name(p_partition, v_attname), @extschema@.build_range_condition(v_attname, p_start_value, @@ -1024,32 +992,32 @@ CREATE OR REPLACE FUNCTION @extschema@.detach_range_partition( RETURNS TEXT AS $$ DECLARE - v_attname text; - v_parent regclass; + v_attname TEXT; + parent_relid REGCLASS; BEGIN - v_parent = @extschema@.get_parent_of_partition(p_partition); + parent_relid = @extschema@.get_parent_of_partition(p_partition); v_attname := attname FROM @extschema@.pathman_config - WHERE partrel = v_parent; + WHERE partrel = parent_relid; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(v_parent::TEXT); + RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; END IF; /* Remove inheritance */ EXECUTE format('ALTER TABLE %s NO INHERIT %s', - p_partition, - v_parent); + p_partition::TEXT, + parent_relid::TEXT); /* Remove check constraint */ EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %s', - p_partition, + p_partition::TEXT, @extschema@.build_check_constraint_name(p_partition, v_attname)); /* Invalidate cache */ - PERFORM @extschema@.on_update_partitions(v_parent); + PERFORM @extschema@.on_update_partitions(parent_relid); RETURN p_partition; @@ -1114,7 +1082,7 @@ BEGIN attr := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; IF attr IS NULL THEN - RAISE EXCEPTION 'Table % is not partitioned', quote_ident(parent_relid::TEXT); + RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; END IF; SELECT string_agg(attname, ', '), @@ -1127,7 +1095,7 @@ BEGIN ' AND '), string_agg('$' || attnum, ', ') FROM pg_attribute - WHERE attrelid::regclass = parent_relid AND attnum > 0 + WHERE attrelid::REGCLASS = parent_relid AND attnum > 0 INTO att_names, old_fields, new_fields, @@ -1148,11 +1116,11 @@ BEGIN LOOP EXECUTE format(trigger, triggername, - @extschema@.get_schema_qualified_name(rec.inhrelid), + rec.inhrelid::REGCLASS::TEXT, funcname); END LOOP; - return funcname; + RETURN funcname; END $$ LANGUAGE plpgsql; From fd4405def3fe00c06c19615e7bd348bc4547bc63 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 29 Aug 2016 05:45:46 +0300 Subject: [PATCH 079/184] make sure PartitionFilter is enabled while performing [concurrent] partitioning, fixes for ConcurrentPartWorker: pythonish tests pass --- init.sql | 8 ++++--- src/pathman_workers.c | 48 +++++++++++++++++++++++++++----------- src/pathman_workers.h | 2 +- tests/partitioning_test.py | 6 +++-- 4 files changed, 44 insertions(+), 20 deletions(-) diff --git a/init.sql b/init.sql index 70dbafef6d..e270c807b1 100644 --- a/init.sql +++ b/init.sql @@ -247,7 +247,8 @@ BEGIN RETURN; END $$ -LANGUAGE plpgsql; +LANGUAGE plpgsql +SET pg_pathman.enable_partitionfilter = on; /* ensures that PartitionFilter is ON */ /* * Old school way to distribute rows to partitions. @@ -275,7 +276,8 @@ BEGIN RETURN; END $$ -LANGUAGE plpgsql; +LANGUAGE plpgsql +SET pg_pathman.enable_partitionfilter = on; /* ensures that PartitionFilter is ON */ /* * Disable pathman partitioning for specified relation. @@ -541,7 +543,7 @@ BEGIN RETURN v_part_count; END $$ LANGUAGE plpgsql -SET pg_pathman.enable_partitionfilter = off; +SET pg_pathman.enable_partitionfilter = off; /* ensures that PartitionFilter is OFF */ diff --git a/src/pathman_workers.c b/src/pathman_workers.c index 136306057a..5bb5c3b37c 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -421,9 +421,11 @@ bgw_main_concurrent_part(Datum main_arg) /* Do the job */ do { - Oid types[2] = { OIDOID, INT4OID }; - Datum vals[2] = { part_slot->relid, part_slot->batch_size }; - bool nulls[2] = { false, false }; + MemoryContext old_mcxt; + + Oid types[2] = { OIDOID, INT4OID }; + Datum vals[2] = { part_slot->relid, part_slot->batch_size }; + bool nulls[2] = { false, false }; /* Reset loop variables */ failed = false; @@ -432,22 +434,25 @@ bgw_main_concurrent_part(Datum main_arg) /* Start new transaction (syscache access etc.) */ StartTransactionCommand(); + /* We'll need this to recover from errors */ + old_mcxt = CurrentMemoryContext; + SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); /* Prepare the query if needed */ if (sql == NULL) { - MemoryContext oldcontext; + MemoryContext current_mcxt; /* * Allocate as SQL query in top memory context because current * context will be destroyed after transaction finishes */ - oldcontext = MemoryContextSwitchTo(TopMemoryContext); + current_mcxt = MemoryContextSwitchTo(TopMemoryContext); sql = psprintf("SELECT %s._partition_data_concurrent($1::oid, p_limit:=$2)", get_namespace_name(get_pathman_schema())); - MemoryContextSwitchTo(oldcontext); + MemoryContextSwitchTo(current_mcxt); } /* Exec ret = _partition_data_concurrent() */ @@ -471,13 +476,25 @@ bgw_main_concurrent_part(Datum main_arg) } PG_CATCH(); { - ErrorData *error; - - EmitErrorReport(); + ErrorData *error; + char *sleep_time_str; + /* Switch to the original context & copy edata */ + MemoryContextSwitchTo(old_mcxt); error = CopyErrorData(); - elog(LOG, "%s: %s", concurrent_part_bgw, error->message); FlushErrorState(); + + /* Print messsage for this BGWorker to server log */ + sleep_time_str = datum_to_cstring(Float8GetDatum(part_slot->sleep_time), + FLOAT8OID); + ereport(LOG, + (errmsg("%s: %s", concurrent_part_bgw, error->message), + errdetail("Attempt: %d/%d, sleep time: %s", + failures_count + 1, + PART_WORKER_MAX_ATTEMPTS, + sleep_time_str))); + pfree(sleep_time_str); /* free the time string */ + FreeErrorData(error); /* @@ -485,7 +502,7 @@ bgw_main_concurrent_part(Datum main_arg) * concurrent user queries. Check that attempts count doesn't exceed * some reasonable value */ - if (failures_count++ > PART_WORKER_MAX_ATTEMPTS) + if (failures_count++ >= PART_WORKER_MAX_ATTEMPTS) { /* Mark slot as FREE */ part_slot->worker_status = WS_FREE; @@ -510,8 +527,11 @@ bgw_main_concurrent_part(Datum main_arg) if (failed) { #ifdef USE_ASSERT_CHECKING - elog(DEBUG2, "%s: could not relocate batch, total: %lu [%u]", - concurrent_part_bgw, part_slot->total_rows, MyProcPid); + elog(DEBUG1, "%s: could not relocate batch (%d/%d), total: %lu [%u]", + concurrent_part_bgw, + failures_count, PART_WORKER_MAX_ATTEMPTS, /* current/max */ + part_slot->total_rows, + MyProcPid); #endif /* Abort transaction and sleep for a second */ @@ -528,7 +548,7 @@ bgw_main_concurrent_part(Datum main_arg) part_slot->total_rows += rows; #ifdef USE_ASSERT_CHECKING - elog(DEBUG2, "%s: relocated %d rows, total: %lu [%u]", + elog(DEBUG1, "%s: relocated %d rows, total: %lu [%u]", concurrent_part_bgw, rows, part_slot->total_rows, MyProcPid); #endif } diff --git a/src/pathman_workers.h b/src/pathman_workers.h index 38266cddac..49b17a954a 100644 --- a/src/pathman_workers.h +++ b/src/pathman_workers.h @@ -82,7 +82,7 @@ typedef struct #define PART_WORKER_SLOTS 10 /* Max number of attempts per batch */ -#define PART_WORKER_MAX_ATTEMPTS 100 +#define PART_WORKER_MAX_ATTEMPTS 60 /* diff --git a/tests/partitioning_test.py b/tests/partitioning_test.py index 0645c82df9..835c968d67 100644 --- a/tests/partitioning_test.py +++ b/tests/partitioning_test.py @@ -53,8 +53,10 @@ def test_concurrent(self): while True: # update some rows to check for deadlocks node.safe_psql('postgres', - '''update abc set t = 'test' - where id in (select (random() * 300000)::int from generate_series(1, 3000))''') + ''' + update abc set t = 'test' + where id in (select (random() * 300000)::int from generate_series(1, 3000)) + ''') count = node.execute('postgres', 'select count(*) from pathman_concurrent_part_tasks') From dbcce6eb26aeba4bd96062c20022cb22bcf14f47 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 29 Aug 2016 05:57:08 +0300 Subject: [PATCH 080/184] Update README.md --- README.md | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ba20f797b6..31da0545d8 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ create_hash_partitions(relation REGCLASS, partitions_count INTEGER, partition_name TEXT DEFAULT NULL) ``` -Performs HASH partitioning for `relation` by integer key `attribute`. The `partitions_count` parameter specifies the number of partitions to create; it cannot be changed afterwards. If `partition_data` is `true` then all the data will be automatically copied from the parent table to partitions. Note that data migration may took a while to finish and the table will be locked until transaction commits. See `partition_data_concurrent()` for a lock-free way to migrate data. +Performs HASH partitioning for `relation` by integer key `attribute`. The `partitions_count` parameter specifies the number of partitions to create; it cannot be changed afterwards. If `partition_data` is `true` then all the data will be automatically copied from the parent table to partitions. Note that data migration may took a while to finish and the table will be locked until transaction commits. See `partition_table_concurrently()` for a lock-free way to migrate data. ```plpgsql create_range_partitions(relation REGCLASS, @@ -106,9 +106,14 @@ Performs RANGE-partitioning from specified range for `relation` by partitioning ### Data migration ```plpgsql -partition_data_concurrent(relation REGCLASS) +partition_table_concurrently(relation REGCLASS) ``` -Starts a background worker to copy data from parent table to partitions. The worker utilize short transactions to copy small bunches of data (up to 10K rows per transaction) and thus doesn't significantly interfere with users activity. +Starts a background worker to move data from parent table to partitions. The worker utilizes short transactions to copy small batches of data (up to 10K rows per transaction) and thus doesn't significantly interfere with user's activity. + +```plpgsql +stop_concurrent_part_task(relation REGCLASS) +``` +Stops a background worker performing a concurrent partitioning task. Note: worker will exit after it finishes relocating a current batch. ### Triggers ```plpgsql @@ -344,6 +349,15 @@ SELECT tableoid::regclass AS partition, * FROM partitioned_table; - Though indices on a parent table aren't particularly useful (since it's empty), they act as prototypes for indices on partitions. For each index on the parent table, `pg_pathman` will create a similar index on every partition. +- All running concurrent partitioning tasks can be listed using the `pathman_concurrent_part_tasks` view: +```plpgsql +postgres=# SELECT * FROM pathman_concurrent_part_tasks; + userid | pid | dbid | relid | processed | status +--------+------+-------+-------+-----------+--------- + dmitry | 7367 | 16384 | test | 472000 | working +(1 row) +``` + ### HASH partitioning Consider an example of HASH partitioning. First create a table with some integer column: ``` From ed154fc7a749bf6429d3cc9a03eef5d2a8423ef1 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 29 Aug 2016 16:56:11 +0300 Subject: [PATCH 081/184] fixes regarding PATHMAN_CONFIG_PARAMS in refresh_pathman_relation_info() --- src/init.c | 9 +++++---- src/relation_info.c | 10 +++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/init.c b/src/init.c index 966daad88a..35ce722ef5 100644 --- a/src/init.c +++ b/src/init.c @@ -622,7 +622,7 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, } /* - * Return 'enable_parent' parameter of relation + * Return additional parameters from PATHMAN_CONFIG_PARAMS. */ bool read_pathman_params(Oid relid, Datum *values, bool *isnull) @@ -632,7 +632,7 @@ read_pathman_params(Oid relid, Datum *values, bool *isnull) ScanKeyData key[1]; Snapshot snapshot; HeapTuple htup; - bool result = false; + bool row_found = false; ScanKeyInit(&key[0], Anum_pathman_config_params_partrel, @@ -643,11 +643,12 @@ read_pathman_params(Oid relid, Datum *values, bool *isnull) snapshot = RegisterSnapshot(GetLatestSnapshot()); scan = heap_beginscan(rel, snapshot, 1, key); + /* There should be just 1 row */ if ((htup = heap_getnext(scan, ForwardScanDirection)) != NULL) { /* Extract data if necessary */ heap_deform_tuple(htup, RelationGetDescr(rel), values, isnull); - result = true; + row_found = true; } /* Clean resources */ @@ -655,7 +656,7 @@ read_pathman_params(Oid relid, Datum *values, bool *isnull) UnregisterSnapshot(snapshot); heap_close(rel, AccessShareLock); - return result; + return row_found; } /* diff --git a/src/relation_info.c b/src/relation_info.c index 500934de7f..87db65af79 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -160,14 +160,18 @@ refresh_pathman_relation_info(Oid relid, pfree(prel_children); - /* - * Read additional parameters ('enable_parent' and 'auto' at the moment) - */ + /* Read additional parameters ('enable_parent' and 'auto' at the moment) */ if (read_pathman_params(relid, param_values, param_isnull)) { prel->enable_parent = param_values[Anum_pathman_config_params_enable_parent - 1]; prel->auto_partition = param_values[Anum_pathman_config_params_auto - 1]; } + /* Else set default values if they cannot be found */ + else + { + prel->enable_parent = false; + prel->auto_partition = true; + } /* We've successfully built a cache entry */ prel->valid = true; From fe964827f80dc94c73b319377d07c568495b1da5 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Mon, 29 Aug 2016 17:00:08 +0300 Subject: [PATCH 082/184] use atomic flag for worker slots --- src/init.c | 2 +- src/pathman_workers.c | 54 ++++++++++++++++++++++++++++++------------- src/pathman_workers.h | 3 ++- 3 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/init.c b/src/init.c index 966daad88a..3a36d27feb 100644 --- a/src/init.c +++ b/src/init.c @@ -622,7 +622,7 @@ pathman_config_contains_relation(Oid relid, Datum *values, bool *isnull, } /* - * Return 'enable_parent' parameter of relation + * Loads additional pathman parameters like 'enable_parent' or 'auto' */ bool read_pathman_params(Oid relid, Datum *values, bool *isnull) diff --git a/src/pathman_workers.c b/src/pathman_workers.c index 5bb5c3b37c..60bf6725a4 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -95,12 +95,19 @@ init_concurrent_part_task_slots(void) { bool found; Size size = estimate_concurrent_part_task_slots_size(); + int i; concurrent_part_slots = (ConcurrentPartSlot *) ShmemInitStruct("array of ConcurrentPartSlots", size, &found); /* Initialize 'concurrent_part_slots' if needed */ - if (!found) memset(concurrent_part_slots, 0, size); + if (!found) + { + memset(concurrent_part_slots, 0, size); + + for (i = 0; i < PART_WORKER_SLOTS; i++) + pg_atomic_init_flag_impl(&concurrent_part_slots[i].slot_used); + } } @@ -423,9 +430,9 @@ bgw_main_concurrent_part(Datum main_arg) { MemoryContext old_mcxt; - Oid types[2] = { OIDOID, INT4OID }; - Datum vals[2] = { part_slot->relid, part_slot->batch_size }; - bool nulls[2] = { false, false }; + Oid types[2] = { OIDOID, INT4OID }; + Datum vals[2] = { part_slot->relid, part_slot->batch_size }; + bool nulls[2] = { false, false }; /* Reset loop variables */ failed = false; @@ -506,6 +513,7 @@ bgw_main_concurrent_part(Datum main_arg) { /* Mark slot as FREE */ part_slot->worker_status = WS_FREE; + pg_atomic_clear_flag(&part_slot->slot_used); elog(LOG, "Concurrent partitioning worker has canceled the task because " @@ -561,7 +569,10 @@ bgw_main_concurrent_part(Datum main_arg) /* Reclaim the resources */ pfree(sql); + + /* Set slot free */ part_slot->worker_status = WS_FREE; + pg_atomic_clear_flag(&part_slot->slot_used); } @@ -596,16 +607,24 @@ partition_table_concurrently(PG_FUNCTION_ARGS) */ for (i = 0; i < PART_WORKER_SLOTS; i++) { - if (concurrent_part_slots[i].worker_status == WS_FREE) + /* + * Attempt to acquire the flag. If it has alread been used then skip + * this slot and try another one + */ + if (!pg_atomic_test_set_flag(&concurrent_part_slots[i].slot_used)) + continue; + + /* If atomic flag wasn't used then status should be WS_FREE */ + Assert(concurrent_part_slots[i].worker_status == WS_FREE); + + if (empty_slot_idx < 0) { - if (empty_slot_idx < 0) - { - my_slot = &concurrent_part_slots[i]; - empty_slot_idx = i; - } + my_slot = &concurrent_part_slots[i]; + empty_slot_idx = i; } - else if (concurrent_part_slots[i].relid == relid && - concurrent_part_slots[i].dbid == MyDatabaseId) + + if (concurrent_part_slots[i].relid == relid && + concurrent_part_slots[i].dbid == MyDatabaseId) { elog(ERROR, "Table \"%s\" is already being partitioned", @@ -745,13 +764,16 @@ stop_concurrent_part_task(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); int i; + ConcurrentPartSlot *slot; for (i = 0; i < PART_WORKER_SLOTS; i++) - if (concurrent_part_slots[i].worker_status != WS_FREE && - concurrent_part_slots[i].relid == relid && - concurrent_part_slots[i].dbid == MyDatabaseId) + slot = &concurrent_part_slots[i]; + + if (slot->worker_status != WS_FREE && + slot->relid == relid && + slot->dbid == MyDatabaseId) { - concurrent_part_slots[i].worker_status = WS_STOPPING; + slot->worker_status = WS_STOPPING; elog(NOTICE, "Worker will stop after it finishes current batch"); PG_RETURN_BOOL(true); diff --git a/src/pathman_workers.h b/src/pathman_workers.h index 49b17a954a..9332ac0e59 100644 --- a/src/pathman_workers.h +++ b/src/pathman_workers.h @@ -46,6 +46,7 @@ typedef struct */ typedef struct { + pg_atomic_flag slot_used; /* flag for atomic slot acquirement */ Oid userid; /* connect as a specified user */ enum @@ -57,7 +58,7 @@ typedef struct } worker_status; /* status of a particular worker */ pid_t pid; /* worker's PID */ - Oid dbid; /* database which contains relation 'relid' */ + Oid dbid; /* database which contains the relation */ Oid relid; /* table to be partitioned concurrently */ uint64 total_rows; /* total amount of rows processed */ From 1e8d342ce168d556f6024d1cae7ee529d9377cca Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Mon, 29 Aug 2016 22:45:19 +0300 Subject: [PATCH 083/184] Russian documentation updated --- README.md | 2 +- README.rus.md | 356 +++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 280 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index 31da0545d8..18fbcd87a9 100644 --- a/README.md +++ b/README.md @@ -491,7 +491,7 @@ There are several user-accessible [GUC](https://www.postgresql.org/docs/9.5/stat To **permanently** disable `pg_pathman` for some previously partitioned table, use the `disable_partitioning()` function: ``` -SELECT disable_partitioning('range_rel'); +SELECT disable_pathman_for('range_rel'); ``` All sections and data will remain unchanged and will be handled by the standard PostgreSQL inheritance mechanism. diff --git a/README.rus.md b/README.rus.md index f8c45306a5..85c8ea70bc 100644 --- a/README.rus.md +++ b/README.rus.md @@ -1,10 +1,12 @@ +[![Build Status](https://travis-ci.org/postgrespro/pg_pathman.svg?branch=master)](https://travis-ci.org/postgrespro/pg_pathman) + # pg_pathman Модуль `pg_pathman` предоставляет оптимизированный механизм секционирования, а также функции для создания и управления секциями. ## Концепция pg_pathman -Секционирование -- это способ разбиения одной большой таблицы на множество меньших по размеру. Для каждой записи можно однозначно определить секцию, в которой она должна храниться посредством вычисления ключа. +**Секционирование** -- это способ разбиения одной большой таблицы на множество меньших по размеру. Для каждой записи можно однозначно определить секцию, в которой она должна храниться посредством вычисления ключа. Секционирование в postgres основано на механизме наследования. Каждому наследнику задается условие CHECK CONSTRAINT. Например: ``` @@ -29,16 +31,15 @@ WHERE id = 150 В текущей версии `pg_pathman` поддерживает следующие типы секционирования: -* RANGE - разбивает таблицу на секции по диапазонам ключевого аттрибута; для оптимизации построения плана используется метод бинарного поиска. -* HASH - данные равномерно распределяются по секциям в соответствии со значениями hash-функции, вычисленными по заданному целочисленному атрибуту. +* **RANGE** - разбивает таблицу на секции по диапазонам ключевого аттрибута; для оптимизации построения плана используется метод бинарного поиска. +* **HASH** - данные равномерно распределяются по секциям в соответствии со значениями hash-функции, вычисленными по заданному целочисленному атрибуту. + +More interesting features are yet to come. Stay tuned! ## Roadmap - * Выбор секций на этапе выполнения запроса (полезно для nested loop join, prepared statements); - * Оптимизация выдачи упорядоченных результатов из секционированных таблиц (полезно для merge join, order by); - * Оптимизация hash join для случая, когда обе таблицы секционированы по ключу join’а; * LIST-секционирование; - * HASH-секционирование по ключевому атрибуту с типом, отличным от INTEGER. + * Оптимизация hash join для случая, когда обе таблицы секционированы по ключу join’а. ## Установка @@ -55,113 +56,306 @@ shared_preload_libraries = 'pg_pathman' CREATE EXTENSION pg_pathman; ``` -## Функции pg_pathman +> **Важно:** Если вы хотите собрать `pg_pathman` для работы с кастомной сборкой PostgreSQL, не забудьте установить переменную окружения `PG_CONFIG` равной пути к исполняемому файлу pg_config. Узнать больше о сборке расширений для PostgreSQL можно по ссылке: [here](https://wiki.postgresql.org/wiki/Building_and_Installing_PostgreSQL_Extension_Modules). + +## Функции `pg_pathman` ### Создание секций +```plpgsql +create_hash_partitions(relation REGCLASS, + attribute TEXT, + partitions_count INTEGER, + partition_name TEXT DEFAULT NULL) ``` -create_hash_partitions( - relation TEXT, - attribute TEXT, - partitions_count INTEGER) -``` -Выполняет HASH-секционирование таблицы `relation` по целочисленному полю `attribute`. Создает `partitions_count` дочерних секций, а также триггер на вставку. Данные из родительской таблицы будут автоматически скопированы в дочерние. +Выполняет HASH-секционирование таблицы `relation` по целочисленному полю `attribute`. Параметр `partitions_count` определяет, сколько секций будет создано. Если `partition_data` установлен в значение `true`, то данные из родительской таблицы будут автоматически распределены по секциям. Стоит иметь в виду, что миграция данных может занять некоторое время, а данные заблокированы. Для конкурентной миграции данных см. функцию `partition_table_concurrently()`. +```plpgsql +create_range_partitions(relation REGCLASS, + attribute TEXT, + start_value ANYELEMENT, + interval ANYELEMENT, + count INTEGER DEFAULT NULL + partition_data BOOLEAN DEFAULT true) + +create_range_partitions(relation TEXT, + attribute TEXT, + start_value ANYELEMENT, + interval INTERVAL, + count INTEGER DEFAULT NULL, + partition_data BOOLEAN DEFAULT true) ``` -create_range_partitions( - relation TEXT, - attribute TEXT, - start_value ANYELEMENT, - interval ANYELEMENT, - premake INTEGER DEFAULT NULL) +Выполняет RANGE-секционирование таблицы `relation` по полю `attribute`. Аргумент `start_value` задает начальное значение, `interval` -- диапазон значений внутри одной секции, `count` -- количество создаваемых секций (если не задано, то pathman попытается определить количество секций на основе значений аттрибута). + +```plpgsql +create_partitions_from_range(relation REGCLASS, + attribute TEXT, + start_value ANYELEMENT, + end_value ANYELEMENT, + interval ANYELEMENT, + partition_data BOOLEAN DEFAULT true) -create_range_partitions( - relation TEXT, - attribute TEXT, - start_value ANYELEMENT, - interval INTERVAL, - premake INTEGER DEFAULT NULL) +create_partitions_from_range(relation REGCLASS, + attribute TEXT, + start_value ANYELEMENT, + end_value ANYELEMENT, + interval INTERVAL, + partition_data BOOLEAN DEFAULT true) ``` -Выполняет RANGE-секционирование таблицы `relation` по полю `attribute`. Аргумент `start_value` задает начальное значение, `interval` -- диапазон значений внутри одной секции, `premake` -- количество заранее создаваемых секций (если не задано, то pathman попытается определить количество секций на основе значений аттрибута). Данные из родительской таблицы будут автоматически скопированы в дочерние. +Выполняет RANGE-секционирование для заданного диапазона таблицы `relation` по полю `attribute`. + +### Миграция данных +```plpgsql +partition_table_concurrently(relation REGCLASS) ``` -create_partitions_from_range( - relation TEXT, - attribute TEXT, - start_value ANYELEMENT, - end_value ANYELEMENT, - interval ANYELEMENT) +Запускает новый процесс (background worker) для конкурентного перемещения данных из родительской таблицы в дочерние секции. Рабочий процесс использует короткие транзакции для перемещения небольших объемов данных (порядка 10 тысяч записей) и, таким образом, не оказывает существенного влияния на работу пользователей. -create_partitions_from_range( - relation TEXT, - attribute TEXT, - start_value ANYELEMENT, - end_value ANYELEMENT, - interval INTERVAL) +```plpgsql +stop_concurrent_part_task(relation REGCLASS) ``` -Выполняет RANGE-секционирование для заданного диапазона таблицы `relation` по полю `attribute`. Данные также будут скопированы в дочерние секции. +Останавливает процесс конкурентного партиционирования. Обратите внимание, что процесс завершается не мгновенно, а только по завершении текущей транзакции. ### Утилиты -``` -create_hash_update_trigger(parent TEXT) +```plpgsql +create_hash_update_trigger(parent REGCLASS) ``` Создает триггер на UPDATE для HASH секций. По-умолчанию триггер на обновление данных не создается, т.к. это создает дополнительные накладные расходы. Триггер полезен только в том случае, когда меняется значение ключевого аттрибута. -``` -create_range_update_trigger(parent TEXT) +```plpgsql +create_range_update_trigger(parent REGCLASS) ``` Аналогично предыдущей, но для RANGE секций. ### Управление секциями -``` -split_range_partition(partition TEXT, value ANYELEMENT) +```plpgsql +split_range_partition(partition REGCLASS, + value ANYELEMENT, + partition_name TEXT DEFAULT NULL,) ``` Разбивает RANGE секцию `partition` на две секции по значению `value`. -``` -merge_range_partitions(partition1 TEXT, partition2 TEXT) + +```plpgsql +merge_range_partitions(partition1 REGCLASS, partition2 REGCLASS) ``` Объединяет две смежные RANGE секции. Данные из `partition2` копируются в `partition1`, после чего секция `partition2` удаляется. -``` -append_range_partition(p_relation TEXT) + +```plpgsql +append_range_partition(p_relation REGCLASS, + partition_name TEXT DEFAULT NULL) ``` Добавляет новую RANGE секцию в конец списка секций. -``` -prepend_range_partition(p_relation TEXT) + +```plpgsql +prepend_range_partition(p_relation REGCLASS, + partition_name TEXT DEFAULT NULL) ``` Добавляет новую RANGE секцию в начало списка секций. -``` -add_range_partition( - relation TEXT, - start_value ANYELEMENT, - end_value ANYELEMENT) +```plpgsql +add_range_partition(relation REGCLASS, + start_value ANYELEMENT, + end_value ANYELEMENT, + partition_name TEXT DEFAULT NULL) ``` Добавляет новую RANGE секцию с заданным диапазоном к секционированной таблице `relation`. -``` +```plpgsql drop_range_partition(partition TEXT) ``` Удаляет RANGE секцию вместе с содержащимися в ней данными. -``` -attach_range_partition( - relation TEXT, - partition TEXT, - start_value ANYELEMENT, - end_value ANYELEMENT) +```plpgsql +attach_range_partition(relation TEXT, + partition TEXT, + start_value ANYELEMENT, + end_value ANYELEMENT) ``` Присоединяет существующую таблицу `partition` в качестве секции к ранее секционированной таблице `relation`. Структура присоединяемой таблицы должна в точности повторять структуру родительской. -``` +```plpgsql detach_range_partition(partition TEXT) ``` Отсоединяет секцию `partition`, после чего она становится независимой таблицей. +```plpgsql +disable_pathman_for(relation TEXT) +``` +Отключает механизм секционирования `pg_pathman` для заданной таблицы. При этом созданные ранее секции остаются без изменений. + +```plpgsql +drop_partitions(parent REGCLASS, + delete_data BOOLEAN DEFAULT FALSE) +``` +Удаляет все секции таблицы `parent`. Если параметр `delete_data` задан как `false` (по-умолчанию `false`), то данные из секций копируются в родительскую таблицу. + +### Дополнительные параметры + +```plpgsql +enable_parent(relation REGCLASS) +disable_parent(relation REGCLASS) ``` -disable_partitioning(relation TEXT) +Включает/исключает родительскую таблицу в план запроса. В оригинальном планировщике PostgreSQL родительская таблица всегда включается в план запроса, даже если она пуста. Это создает дополнительные накладные расходы. Выполните `disable_parent()`, если вы не собираетесь хранить какие-либо данные в родительской таблице. Значение по-умолчанию зависит от того, был ли установлен параметр `partition_data` при первоначальном разбиении таблицы (см. функции `create_range_partitions()` и `create_partitions_from_range()`). Если он был установлен в значение `true`, то все данные были перемещены в секции, а родительская таблица отключена. В противном случае родительская таблица по-умолчанию влючена. + +```plpgsql +enable_auto(relation REGCLASS) +disable_auto(relation REGCLASS) +``` +Включает/выключает автоматическое создание секций (только для RANGE секционирования). По-умолчанию включено. + +## Custom plan nodes +`pg_pathman` вводит три новых узла плана (см. [custom plan nodes](https://wiki.postgresql.org/wiki/CustomScanAPI)), предназначенных для оптимизации времени выполнения: + +- `RuntimeAppend` (замещает узел типа `Append`) +- `RuntimeMergeAppend` (замещает узел типа `MergeAppend`) +- `PartitionFilter` (выполняет работу INSERT-триггера) + +`PartitionFilter` работает как прокси-узел для INSERT-запросов, распределяя новые записи по соответствующим секциям: + +``` +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_table +SELECT generate_series(1, 10), random(); + QUERY PLAN +----------------------------------------- + Insert on partitioned_table + -> Custom Scan (PartitionFilter) + -> Subquery Scan on "*SELECT*" + -> Result +(4 rows) +``` + +Узлы `RuntimeAppend` и `RuntimeMergeAppend` имеют между собой много общего: они нужны в случает, когда условие WHERE принимает форму: +``` +ПЕРЕМЕННАЯ ОПЕРАТОР ПАРАМЕТР +``` +Подобные выражения не могут быть оптимизированы во время планирования, т.к. значение параметра неизвестно до стадии выполнения. Проблема может быть решена путем встраивания дополнительной процедуры анализа в код `Append` узла, таким образом позволяя ему выбирать лишь необходимые субпланы из всего списка дочерних планов. + +---------- + +Есть по меньшей мере несколько ситуаций, которые демонстрируют полезность таких узлов: + +``` +/* создаем таблицу, которую хотим секционировать */ +CREATE TABLE partitioned_table(id INT NOT NULL, payload REAL); + +/* заполняем данными */ +INSERT INTO partitioned_table +SELECT generate_series(1, 1000), random(); + +/* выполняем секционирование */ +SELECT create_hash_partitions('partitioned_table', 'id', 100); + +/* создаем обычную таблицу */ +CREATE TABLE some_table AS SELECT generate_series(1, 100) AS VAL; ``` -Отключает механизм секционирования `pg_pathman` для заданной таблицы и удаляет триггер на вставку. При этом созданные ранее секции остаются без изменений. -## Примеры использования -### HASH + + - **`id = (select ... limit 1)`** +``` +EXPLAIN (COSTS OFF, ANALYZE) SELECT * FROM partitioned_table +WHERE id = (SELECT * FROM some_table LIMIT 1); + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Custom Scan (RuntimeAppend) (actual time=0.030..0.033 rows=1 loops=1) + InitPlan 1 (returns $0) + -> Limit (actual time=0.011..0.011 rows=1 loops=1) + -> Seq Scan on some_table (actual time=0.010..0.010 rows=1 loops=1) + -> Seq Scan on partitioned_table_70 partitioned_table (actual time=0.004..0.006 rows=1 loops=1) + Filter: (id = $0) + Rows Removed by Filter: 9 + Planning time: 1.131 ms + Execution time: 0.075 ms +(9 rows) + +/* выключаем узел RuntimeAppend */ +SET pg_pathman.enable_runtimeappend = f; + +EXPLAIN (COSTS OFF, ANALYZE) SELECT * FROM partitioned_table +WHERE id = (SELECT * FROM some_table LIMIT 1); + QUERY PLAN +---------------------------------------------------------------------------------- + Append (actual time=0.196..0.274 rows=1 loops=1) + InitPlan 1 (returns $0) + -> Limit (actual time=0.005..0.005 rows=1 loops=1) + -> Seq Scan on some_table (actual time=0.003..0.003 rows=1 loops=1) + -> Seq Scan on partitioned_table_0 (actual time=0.014..0.014 rows=0 loops=1) + Filter: (id = $0) + Rows Removed by Filter: 6 + -> Seq Scan on partitioned_table_1 (actual time=0.003..0.003 rows=0 loops=1) + Filter: (id = $0) + Rows Removed by Filter: 5 + ... /* more plans follow */ + Planning time: 1.140 ms + Execution time: 0.855 ms +(306 rows) +``` + + - **`id = ANY (select ...)`** +``` +EXPLAIN (COSTS OFF, ANALYZE) SELECT * FROM partitioned_table +WHERE id = any (SELECT * FROM some_table limit 4); + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Nested Loop (actual time=0.025..0.060 rows=4 loops=1) + -> Limit (actual time=0.009..0.011 rows=4 loops=1) + -> Seq Scan on some_table (actual time=0.008..0.010 rows=4 loops=1) + -> Custom Scan (RuntimeAppend) (actual time=0.002..0.004 rows=1 loops=4) + -> Seq Scan on partitioned_table_70 partitioned_table (actual time=0.001..0.001 rows=10 loops=1) + -> Seq Scan on partitioned_table_26 partitioned_table (actual time=0.002..0.003 rows=9 loops=1) + -> Seq Scan on partitioned_table_27 partitioned_table (actual time=0.001..0.002 rows=20 loops=1) + -> Seq Scan on partitioned_table_63 partitioned_table (actual time=0.001..0.002 rows=9 loops=1) + Planning time: 0.771 ms + Execution time: 0.101 ms +(10 rows) + +/* выключаем узел RuntimeAppend */ +SET pg_pathman.enable_runtimeappend = f; + +EXPLAIN (COSTS OFF, ANALYZE) SELECT * FROM partitioned_table +WHERE id = any (SELECT * FROM some_table limit 4); + QUERY PLAN +----------------------------------------------------------------------------------------- + Nested Loop Semi Join (actual time=0.531..1.526 rows=4 loops=1) + Join Filter: (partitioned_table.id = some_table.val) + Rows Removed by Join Filter: 3990 + -> Append (actual time=0.190..0.470 rows=1000 loops=1) + -> Seq Scan on partitioned_table (actual time=0.187..0.187 rows=0 loops=1) + -> Seq Scan on partitioned_table_0 (actual time=0.002..0.004 rows=6 loops=1) + -> Seq Scan on partitioned_table_1 (actual time=0.001..0.001 rows=5 loops=1) + -> Seq Scan on partitioned_table_2 (actual time=0.002..0.004 rows=14 loops=1) +... /* 96 scans follow */ + -> Materialize (actual time=0.000..0.000 rows=4 loops=1000) + -> Limit (actual time=0.005..0.006 rows=4 loops=1) + -> Seq Scan on some_table (actual time=0.003..0.004 rows=4 loops=1) + Planning time: 2.169 ms + Execution time: 2.059 ms +(110 rows) +``` + + - **`NestLoop` involving a partitioned table**, which is omitted since it's occasionally shown above. + +---------- + +Узнать больше о работе RuntimeAppend можно в [блоге](http://akorotkov.github.io/blog/2016/06/15/pg_pathman-runtime-append/) Александра Короткова. + +## Примеры + +### Common tips +- You can easily add **_partition_** column containing the names of the underlying partitions using the system attribute called **_tableoid_**: +``` +SELECT tableoid::regclass AS partition, * FROM partitioned_table; +``` +- Несмотря на то, что индексы на родительской таблице не очень полезны (т.к. таблица пуста), они тем не менее выполняют роль прототипов для создания индексов в дочерних таблицах: `pg_pathman` автоматически создает аналогичные индексы для каждой новой секции. + +- Получить все текущие процессы конкурентного секционирования можно из представления `pathman_concurrent_part_tasks`: +```plpgsql +postgres=# SELECT * FROM pathman_concurrent_part_tasks; + userid | pid | dbid | relid | processed | status +--------+------+-------+-------+-----------+--------- + dmitry | 7367 | 16384 | test | 472000 | working +(1 row) +``` + +### HASH секционирование Рассмотрим пример секционирования таблицы, используя HASH-стратегию на примере таблицы товаров. ``` CREATE TABLE items ( @@ -200,7 +394,7 @@ EXPLAIN SELECT * FROM ONLY items; Seq Scan on items (cost=0.00..0.00 rows=1 width=45) ``` -### RANGE +### RANGE секционирование Рассмотрим пример разбиения таблицы по диапазону дат. Пусть у нас имеется таблица логов: ``` CREATE TABLE journal ( @@ -274,14 +468,22 @@ EXPLAIN SELECT * FROM journal WHERE dt >= '2015-06-01' AND dt < '2015-06-03'; ``` ### Деакцивация pg_pathman -Деактивировать механизм pg_pathman для некоторой ранее разделенной таблицы можно следующей командой disable_partitioning(): +Для включения и отключения модуля `pg_pathman` и отдельных его копонентов существует ряд [GUC](https://www.postgresql.org/docs/9.5/static/config-setting.html) переменных: + + - `pg_pathman.enable` --- полная отключение (или включение) модуля `pg_pathman` + - `pg_pathman.enable_runtimeappend` --- включение/отключение функционала `RuntimeAppend` + - `pg_pathman.enable_runtimemergeappend` --- включение/отключение функционала `RuntimeMergeAppend` + - `pg_pathman.enable_partitionfilter` --- включение/отключение функционала `PartitionFilter` + +Чтобы **безвозвратно** отключить механизм `pg_pathman` для отдельной таблицы, используйте фунцию `disable_pathman_for()`. В результате этой операции структура таблиц останется прежней, но для планирования и выполнения запросов будет использоваться стандартный механизм PostgreSQL. ``` -SELECT disable_partitioning('journal'); +SELECT disable_pathman_for('range_rel'); ``` -Все созданные секции и данные останутся по прежнему доступны и будут обрабатываться стандартным планировщиком PostgreSQL. -## Авторы - -Ильдар Мусин Postgres Professional, Россия +## Обратная связь +Если у вас есть вопросы или предложения, а также если вы обнаружили ошибки, напишите нам в разделе [issues](https://github.com/postgrespro/pg_pathman/issues). -Александр Коротков Postgres Professional, Россия +## Авторы +Ильдар Мусин Postgres Professional, Россия +Александр Коротков Postgres Professional, Россия +Дмитрий Иванов Postgres Professional, Россия From 9b804487fe3c12dbb0fd2f70754203eb8053da2e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2016 09:26:12 +0300 Subject: [PATCH 084/184] concurrent partitioning subsystem now uses spinlocks --- src/pathman_workers.c | 125 +++++++++++++++++++++++------------------- src/pathman_workers.h | 42 +++++++++++--- 2 files changed, 102 insertions(+), 65 deletions(-) diff --git a/src/pathman_workers.c b/src/pathman_workers.c index 60bf6725a4..5ced9e57e1 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -106,7 +106,7 @@ init_concurrent_part_task_slots(void) memset(concurrent_part_slots, 0, size); for (i = 0; i < PART_WORKER_SLOTS; i++) - pg_atomic_init_flag_impl(&concurrent_part_slots[i].slot_used); + SpinLockInit(&concurrent_part_slots[i].mutex); } } @@ -235,10 +235,10 @@ start_bg_worker(const char bgworker_name[BGW_MAXLEN], static dsm_segment * create_partitions_bg_worker_segment(Oid relid, Datum value, Oid value_type) { - TypeCacheEntry *typcache; - Size datum_size; - Size segment_size; - dsm_segment *segment; + TypeCacheEntry *typcache; + Size datum_size; + Size segment_size; + dsm_segment *segment; SpawnPartitionArgs *args; typcache = lookup_type_cache(value_type, 0); @@ -314,10 +314,10 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) static void bgw_main_spawn_partitions(Datum main_arg) { - dsm_handle handle = DatumGetUInt32(main_arg); - dsm_segment *segment; - SpawnPartitionArgs *args; - Datum value; + dsm_handle handle = DatumGetUInt32(main_arg); + dsm_segment *segment; + SpawnPartitionArgs *args; + Datum value; /* Establish signal handlers before unblocking signals. */ pqsignal(SIGTERM, handle_sigterm); @@ -512,8 +512,7 @@ bgw_main_concurrent_part(Datum main_arg) if (failures_count++ >= PART_WORKER_MAX_ATTEMPTS) { /* Mark slot as FREE */ - part_slot->worker_status = WS_FREE; - pg_atomic_clear_flag(&part_slot->slot_used); + cps_set_status(part_slot, WS_FREE); elog(LOG, "Concurrent partitioning worker has canceled the task because " @@ -534,14 +533,6 @@ bgw_main_concurrent_part(Datum main_arg) if (failed) { -#ifdef USE_ASSERT_CHECKING - elog(DEBUG1, "%s: could not relocate batch (%d/%d), total: %lu [%u]", - concurrent_part_bgw, - failures_count, PART_WORKER_MAX_ATTEMPTS, /* current/max */ - part_slot->total_rows, - MyProcPid); -#endif - /* Abort transaction and sleep for a second */ AbortCurrentTransaction(); DirectFunctionCall1(pg_sleep, Float8GetDatum(part_slot->sleep_time)); @@ -553,16 +544,18 @@ bgw_main_concurrent_part(Datum main_arg) failures_count = 0; /* Add rows to total_rows */ + SpinLockAcquire(&part_slot->mutex); part_slot->total_rows += rows; - +/* Report debug message */ #ifdef USE_ASSERT_CHECKING elog(DEBUG1, "%s: relocated %d rows, total: %lu [%u]", concurrent_part_bgw, rows, part_slot->total_rows, MyProcPid); #endif + SpinLockRelease(&part_slot->mutex); } /* If other backend requested to stop us, quit */ - if (part_slot->worker_status == WS_STOPPING) + if (cps_check_status(part_slot) == WS_STOPPING) break; } while(rows > 0 || failed); /* do while there's still rows to be relocated */ @@ -570,9 +563,8 @@ bgw_main_concurrent_part(Datum main_arg) /* Reclaim the resources */ pfree(sql); - /* Set slot free */ - part_slot->worker_status = WS_FREE; - pg_atomic_clear_flag(&part_slot->slot_used); + /* Mark slot as FREE */ + cps_set_status(part_slot, WS_FREE); } @@ -589,12 +581,11 @@ bgw_main_concurrent_part(Datum main_arg) Datum partition_table_concurrently(PG_FUNCTION_ARGS) { -#define tostr(str) ( #str ) +#define tostr(str) ( #str ) /* convert function's name to literal */ - Oid relid = PG_GETARG_OID(0); - ConcurrentPartSlot *my_slot = NULL; - int empty_slot_idx = -1; - int i; + Oid relid = PG_GETARG_OID(0); + int empty_slot_idx = -1; + int i; /* Check if relation is a partitioned table */ shout_if_prel_is_invalid(relid, @@ -607,38 +598,43 @@ partition_table_concurrently(PG_FUNCTION_ARGS) */ for (i = 0; i < PART_WORKER_SLOTS; i++) { - /* - * Attempt to acquire the flag. If it has alread been used then skip - * this slot and try another one - */ - if (!pg_atomic_test_set_flag(&concurrent_part_slots[i].slot_used)) - continue; + ConcurrentPartSlot *cur_slot = &concurrent_part_slots[i]; + bool keep_this_lock = false; - /* If atomic flag wasn't used then status should be WS_FREE */ - Assert(concurrent_part_slots[i].worker_status == WS_FREE); + SpinLockAcquire(&cur_slot->mutex); if (empty_slot_idx < 0) { - my_slot = &concurrent_part_slots[i]; empty_slot_idx = i; + keep_this_lock = true; } - if (concurrent_part_slots[i].relid == relid && - concurrent_part_slots[i].dbid == MyDatabaseId) + if (cur_slot->relid == relid && + cur_slot->dbid == MyDatabaseId) { + if (empty_slot_idx >= 0) + SpinLockRelease(&cur_slot->mutex); + elog(ERROR, "Table \"%s\" is already being partitioned", get_rel_name(relid)); } + + if (!keep_this_lock) + SpinLockRelease(&cur_slot->mutex); } - if (my_slot == NULL) + if (empty_slot_idx < 0) elog(ERROR, "No empty worker slots found"); + else + { + /* Initialize concurrent part slot */ + InitConcurrentPartSlot(&concurrent_part_slots[empty_slot_idx], + GetAuthenticatedUserId(), WS_WORKING, + MyDatabaseId, relid, 1000, 1.0); - /* Initialize concurrent part slot */ - InitConcurrentPartSlot(my_slot, GetAuthenticatedUserId(), - WS_WORKING, MyDatabaseId, relid, - 1000, 1.0); + SpinLockRelease(&concurrent_part_slots[empty_slot_idx].mutex); + } /* Start worker (we should not wait) */ start_bg_worker(concurrent_part_bgw, @@ -712,11 +708,13 @@ show_concurrent_part_tasks_internal(PG_FUNCTION_ARGS) { ConcurrentPartSlot *cur_slot = &concurrent_part_slots[i]; + SpinLockAcquire(&cur_slot->mutex); + if (cur_slot->worker_status != WS_FREE) { HeapTuple tuple; Datum values[Natts_pathman_cp_tasks]; - bool isnull[Natts_pathman_cp_tasks] = { 0, 0, 0, 0, 0, 0 }; + bool isnull[Natts_pathman_cp_tasks] = { 0 }; values[Anum_pathman_cp_tasks_userid - 1] = cur_slot->userid; values[Anum_pathman_cp_tasks_pid - 1] = cur_slot->pid; @@ -750,6 +748,8 @@ show_concurrent_part_tasks_internal(PG_FUNCTION_ARGS) SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } + + SpinLockRelease(&cur_slot->mutex); } SRF_RETURN_DONE(funcctx); @@ -763,22 +763,35 @@ Datum stop_concurrent_part_task(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); + bool worker_found = false; int i; - ConcurrentPartSlot *slot; - for (i = 0; i < PART_WORKER_SLOTS; i++) - slot = &concurrent_part_slots[i]; + for (i = 0; i < PART_WORKER_SLOTS && !worker_found; i++) + { + ConcurrentPartSlot *cur_slot = &concurrent_part_slots[i]; + + SpinLockAcquire(&cur_slot->mutex); - if (slot->worker_status != WS_FREE && - slot->relid == relid && - slot->dbid == MyDatabaseId) + if (cur_slot->worker_status != WS_FREE && + cur_slot->relid == relid && + cur_slot->dbid == MyDatabaseId) { - slot->worker_status = WS_STOPPING; elog(NOTICE, "Worker will stop after it finishes current batch"); - PG_RETURN_BOOL(true); + cur_slot->worker_status = WS_STOPPING; + worker_found = true; } - elog(ERROR, "Cannot find worker for relation \"%s\"", - get_rel_name_or_relid(relid)); + SpinLockRelease(&cur_slot->mutex); + } + + if (worker_found) + PG_RETURN_BOOL(true); + else + { + elog(ERROR, "Cannot find worker for relation \"%s\"", + get_rel_name_or_relid(relid)); + + PG_RETURN_BOOL(false); /* keep compiler happy */ + } } diff --git a/src/pathman_workers.h b/src/pathman_workers.h index 9332ac0e59..57893d80a4 100644 --- a/src/pathman_workers.h +++ b/src/pathman_workers.h @@ -18,6 +18,7 @@ #define PATHMAN_WORKERS_H #include "postgres.h" +#include "storage/spin.h" /* @@ -41,22 +42,24 @@ typedef struct } SpawnPartitionArgs; +typedef enum +{ + WS_FREE = 0, /* slot is empty */ + WS_WORKING, /* occupied by live worker */ + WS_STOPPING /* worker is going to shutdown */ + +} ConcurrentPartSlotStatus; + /* * Store args and execution status of a single ConcurrentPartWorker. */ typedef struct { - pg_atomic_flag slot_used; /* flag for atomic slot acquirement */ - Oid userid; /* connect as a specified user */ + slock_t mutex; /* protect slot from race conditions */ - enum - { - WS_FREE = 0, /* slot is empty */ - WS_WORKING, /* occupied by live worker */ - WS_STOPPING /* worker is going to shutdown */ - - } worker_status; /* status of a particular worker */ + ConcurrentPartSlotStatus worker_status; /* status of a particular worker */ + Oid userid; /* connect as a specified user */ pid_t pid; /* worker's PID */ Oid dbid; /* database which contains the relation */ Oid relid; /* table to be partitioned concurrently */ @@ -78,6 +81,27 @@ typedef struct (slot)->sleep_time = (sleep_t); \ } while (0) +static inline ConcurrentPartSlotStatus +cps_check_status(ConcurrentPartSlot *slot) +{ + ConcurrentPartSlotStatus status; + + SpinLockAcquire(&slot->mutex); + status = slot->worker_status; + SpinLockRelease(&slot->mutex); + + return status; +} + +static inline void +cps_set_status(ConcurrentPartSlot *slot, ConcurrentPartSlotStatus status) +{ + SpinLockAcquire(&slot->mutex); + slot->worker_status = status; + SpinLockRelease(&slot->mutex); +} + + /* Number of worker slots for concurrent partitioning */ #define PART_WORKER_SLOTS 10 From 788d41cc6de9585ee54ce382bbcb899d91e0ac44 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2016 09:45:33 +0300 Subject: [PATCH 085/184] bugfixes for concurrent partitioning --- src/pathman_workers.c | 36 ++++++++++++++++++++++-------------- src/pathman_workers.h | 6 +++--- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/pathman_workers.c b/src/pathman_workers.c index 5ced9e57e1..23bc9b868d 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -512,7 +512,7 @@ bgw_main_concurrent_part(Datum main_arg) if (failures_count++ >= PART_WORKER_MAX_ATTEMPTS) { /* Mark slot as FREE */ - cps_set_status(part_slot, WS_FREE); + cps_set_status(part_slot, CPS_FREE); elog(LOG, "Concurrent partitioning worker has canceled the task because " @@ -555,7 +555,7 @@ bgw_main_concurrent_part(Datum main_arg) } /* If other backend requested to stop us, quit */ - if (cps_check_status(part_slot) == WS_STOPPING) + if (cps_check_status(part_slot) == CPS_STOPPING) break; } while(rows > 0 || failed); /* do while there's still rows to be relocated */ @@ -564,7 +564,7 @@ bgw_main_concurrent_part(Datum main_arg) pfree(sql); /* Mark slot as FREE */ - cps_set_status(part_slot, WS_FREE); + cps_set_status(part_slot, CPS_FREE); } @@ -603,7 +603,8 @@ partition_table_concurrently(PG_FUNCTION_ARGS) SpinLockAcquire(&cur_slot->mutex); - if (empty_slot_idx < 0) + /* Should we take this slot into account? */ + if (empty_slot_idx < 0 && cur_slot->worker_status == CPS_FREE) { empty_slot_idx = i; keep_this_lock = true; @@ -630,7 +631,7 @@ partition_table_concurrently(PG_FUNCTION_ARGS) { /* Initialize concurrent part slot */ InitConcurrentPartSlot(&concurrent_part_slots[empty_slot_idx], - GetAuthenticatedUserId(), WS_WORKING, + GetAuthenticatedUserId(), CPS_WORKING, MyDatabaseId, relid, 1000, 1.0); SpinLockRelease(&concurrent_part_slots[empty_slot_idx].mutex); @@ -707,12 +708,13 @@ show_concurrent_part_tasks_internal(PG_FUNCTION_ARGS) for (i = userctx->cur_idx; i < PART_WORKER_SLOTS; i++) { ConcurrentPartSlot *cur_slot = &concurrent_part_slots[i]; + HeapTuple htup = NULL; + HOLD_INTERRUPTS(); SpinLockAcquire(&cur_slot->mutex); - if (cur_slot->worker_status != WS_FREE) + if (cur_slot->worker_status != CPS_FREE) { - HeapTuple tuple; Datum values[Natts_pathman_cp_tasks]; bool isnull[Natts_pathman_cp_tasks] = { 0 }; @@ -725,12 +727,12 @@ show_concurrent_part_tasks_internal(PG_FUNCTION_ARGS) /* Now build a status string */ switch(cur_slot->worker_status) { - case WS_WORKING: + case CPS_WORKING: values[Anum_pathman_cp_tasks_status - 1] = PointerGetDatum(cstring_to_text("working")); break; - case WS_STOPPING: + case CPS_STOPPING: values[Anum_pathman_cp_tasks_status - 1] = PointerGetDatum(cstring_to_text("stopping")); break; @@ -741,15 +743,18 @@ show_concurrent_part_tasks_internal(PG_FUNCTION_ARGS) } /* Form output tuple */ - tuple = heap_form_tuple(funcctx->tuple_desc, values, isnull); + htup = heap_form_tuple(funcctx->tuple_desc, values, isnull); /* Switch to next worker */ userctx->cur_idx = i + 1; - - SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } SpinLockRelease(&cur_slot->mutex); + RESUME_INTERRUPTS(); + + /* Return tuple if needed */ + if (htup) + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(htup)); } SRF_RETURN_DONE(funcctx); @@ -770,19 +775,22 @@ stop_concurrent_part_task(PG_FUNCTION_ARGS) { ConcurrentPartSlot *cur_slot = &concurrent_part_slots[i]; + HOLD_INTERRUPTS(); SpinLockAcquire(&cur_slot->mutex); - if (cur_slot->worker_status != WS_FREE && + if (cur_slot->worker_status != CPS_FREE && cur_slot->relid == relid && cur_slot->dbid == MyDatabaseId) { elog(NOTICE, "Worker will stop after it finishes current batch"); - cur_slot->worker_status = WS_STOPPING; + /* Change worker's state & set 'worker_found' */ + cur_slot->worker_status = CPS_STOPPING; worker_found = true; } SpinLockRelease(&cur_slot->mutex); + RESUME_INTERRUPTS(); } if (worker_found) diff --git a/src/pathman_workers.h b/src/pathman_workers.h index 57893d80a4..3ea664d57c 100644 --- a/src/pathman_workers.h +++ b/src/pathman_workers.h @@ -44,9 +44,9 @@ typedef struct typedef enum { - WS_FREE = 0, /* slot is empty */ - WS_WORKING, /* occupied by live worker */ - WS_STOPPING /* worker is going to shutdown */ + CPS_FREE = 0, /* slot is empty */ + CPS_WORKING, /* occupied by live worker */ + CPS_STOPPING /* worker is going to shutdown */ } ConcurrentPartSlotStatus; From a3c3d0571abaa964386914cff3408ce9693648bc Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 30 Aug 2016 10:27:34 +0300 Subject: [PATCH 086/184] English documentation fix --- README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 31da0545d8..c372299075 100644 --- a/README.md +++ b/README.md @@ -29,16 +29,14 @@ WHERE id = 150 Based on the partitioning type and condition's operator, `pg_pathman` searches for the corresponding partitions and builds the plan. Currently `pg_pathman` supports two partitioning schemes: * **RANGE** - maps rows to partitions using partitioning key ranges assigned to each partition. Optimization is achieved by using the binary search algorithm; -* **HASH** - maps rows to partitions using a generic hash function (only *integer* attributes are supported at the moment). +* **HASH** - maps rows to partitions using a generic hash function. More interesting features are yet to come. Stay tuned! ## Roadmap - * Replace INSERT triggers with a custom node (aka **PartitionFilter**) - * Implement [concurrent partitioning](https://github.com/postgrespro/pg_pathman/tree/concurrent_part) (much more responsive) - * Implement HASH partitioning for non-integer attributes - * Optimize hash join (both tables are partitioned by join key) - * Implement LIST partitioning scheme + + * Implement LIST partitioning scheme; + * Optimize hash join (both tables are partitioned by join key). ## Installation guide To install `pg_pathman`, execute this in the module's directory: From cd60d482e60a5b5c401d6a0f5c1fe3ca84b9f3b1 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 30 Aug 2016 10:58:04 +0300 Subject: [PATCH 087/184] concurrent partitioning bugfix --- src/pathman_workers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pathman_workers.c b/src/pathman_workers.c index 23bc9b868d..05c5e51619 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -609,8 +609,7 @@ partition_table_concurrently(PG_FUNCTION_ARGS) empty_slot_idx = i; keep_this_lock = true; } - - if (cur_slot->relid == relid && + else if (cur_slot->relid == relid && cur_slot->dbid == MyDatabaseId) { if (empty_slot_idx >= 0) From f523d2ca3fb6a568b0a230c5cd4837784ed2ee87 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2016 11:11:47 +0300 Subject: [PATCH 088/184] don't forget to unlock current slot too --- src/pathman_workers.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/pathman_workers.c b/src/pathman_workers.c index 23bc9b868d..119cdc0535 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -584,12 +584,14 @@ partition_table_concurrently(PG_FUNCTION_ARGS) #define tostr(str) ( #str ) /* convert function's name to literal */ Oid relid = PG_GETARG_OID(0); - int empty_slot_idx = -1; + int empty_slot_idx = -1; /* do we have a slot for BGWorker? */ int i; /* Check if relation is a partitioned table */ shout_if_prel_is_invalid(relid, + /* We also lock the parent relation */ get_pathman_relation_info_after_lock(relid, true), + /* Partitioning type does not matter here */ PT_INDIFFERENT); /* @@ -601,30 +603,38 @@ partition_table_concurrently(PG_FUNCTION_ARGS) ConcurrentPartSlot *cur_slot = &concurrent_part_slots[i]; bool keep_this_lock = false; + /* Lock current slot */ SpinLockAcquire(&cur_slot->mutex); - /* Should we take this slot into account? */ + /* Should we take this slot into account? (it should be FREE) */ if (empty_slot_idx < 0 && cur_slot->worker_status == CPS_FREE) { - empty_slot_idx = i; - keep_this_lock = true; + empty_slot_idx = i; /* yes, remember this slot */ + keep_this_lock = true; /* also don't unlock it */ } + /* Oops, looks like we already have BGWorker for this table */ if (cur_slot->relid == relid && cur_slot->dbid == MyDatabaseId) { - if (empty_slot_idx >= 0) - SpinLockRelease(&cur_slot->mutex); + /* Unlock current slot */ + SpinLockRelease(&cur_slot->mutex); + + /* Release borrowed slot for new BGWorker too */ + if (empty_slot_idx >= 0 && empty_slot_idx != i) + SpinLockRelease(&concurrent_part_slots[empty_slot_idx].mutex); elog(ERROR, "Table \"%s\" is already being partitioned", get_rel_name(relid)); } + /* Normally we don't want to keep it */ if (!keep_this_lock) SpinLockRelease(&cur_slot->mutex); } + /* Looks like we could not find an empty slot */ if (empty_slot_idx < 0) elog(ERROR, "No empty worker slots found"); else @@ -634,6 +644,7 @@ partition_table_concurrently(PG_FUNCTION_ARGS) GetAuthenticatedUserId(), CPS_WORKING, MyDatabaseId, relid, 1000, 1.0); + /* Now we can safely unlock slot for new BGWorker */ SpinLockRelease(&concurrent_part_slots[empty_slot_idx].mutex); } From d886aa7db8f14a7f14cb30d6c4de4a83887448ca Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 30 Aug 2016 12:01:19 +0300 Subject: [PATCH 089/184] fixed clang warning message --- src/nodes_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nodes_common.c b/src/nodes_common.c index a733070f53..f99273d762 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -262,7 +262,7 @@ get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel) result = repalloc(result, allocated * sizeof(Oid)); } - Assert(i < (uint32) abs(PrelChildrenCount(prel))); + Assert(i < PrelChildrenCount(prel)); result[used++] = children[i]; } } From a165a449b578503b8cf2f9f746e7802a694f7644 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2016 14:14:20 +0300 Subject: [PATCH 090/184] fix annoying godforsaken bug in handle_binary_opexpr() --- src/pg_pathman.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 115be5a8f8..281bf15834 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -1194,6 +1194,7 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, return; /* exit on equal */ } + break; /* continue to function's end */ case PT_RANGE: { From c78ee4ed5e06e9d3c74249283009958dddfa5e8f Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2016 15:25:14 +0300 Subject: [PATCH 091/184] include parent into Runtime[Merge]Append's plan if asked to, introduce macro PrelParentRelid --- src/init.c | 4 ++-- src/nodes_common.c | 14 ++++++++++++++ src/pg_pathman.c | 8 ++++---- src/relation_info.h | 6 ++++-- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/init.c b/src/init.c index b00430ef4b..aa3e5a0ee6 100644 --- a/src/init.c +++ b/src/init.c @@ -373,7 +373,7 @@ fill_prel_with_partitions(const Oid *partitions, DisablePathman(); /* disable pg_pathman since config is broken */ ereport(ERROR, (errmsg("Unknown partitioning type for relation \"%s\"", - get_rel_name_or_relid(prel->key)), + get_rel_name_or_relid(PrelParentRelid(prel))), errhint(INIT_ERROR_HINT))); } } @@ -419,7 +419,7 @@ fill_prel_with_partitions(const Oid *partitions, DisablePathman(); /* disable pg_pathman since config is broken */ elog(ERROR, "pg_pathman's cache for relation \"%s\" " "has not been properly initialized", - get_rel_name_or_relid(prel->key)); + get_rel_name_or_relid(PrelParentRelid(prel))); } } #endif diff --git a/src/nodes_common.c b/src/nodes_common.c index f99273d762..26810a5875 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -248,6 +248,12 @@ get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel) Oid *result = (Oid *) palloc(allocated * sizeof(Oid)); Oid *children = PrelGetChildrenArray(prel); + /* If required, add parent to result */ + Assert(INITIAL_ALLOC_NUM >= 1); + if (prel->enable_parent) + result[used++] = PrelParentRelid(prel); + + /* Deal with selected partitions */ foreach (range_cell, ranges) { uint32 i; @@ -366,6 +372,10 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, { Plan *child_plan = (Plan *) lfirst(lc2); RelOptInfo *child_rel = ((Path *) lfirst(lc1))->parent; + Oid child_relid; + + /* Fetch relid of the 'child_rel' */ + child_relid = root->simple_rte_array[child_rel->relid]->relid; /* Replace rel's tlist with a matching one */ if (!cscan->scan.plan.targetlist) @@ -380,6 +390,10 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, if (!cscan->custom_scan_tlist) cscan->custom_scan_tlist = replace_tlist_varnos(child_plan->targetlist, rel); + + /* If this is a plan for parent table, fill it with quals */ + if (PrelParentRelid(prel) == child_relid) + child_plan->qual = get_actual_clauses(clauses); } } diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 281bf15834..30deb8a83f 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -900,13 +900,13 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) elog(ERROR, "Could not connect using SPI"); /* while (value >= MAX) ... */ - spawn_partitions(prel->key, value, max_rvalue, prel->atttype, - &interval_type_cmp, interval_binary, + spawn_partitions(PrelParentRelid(prel), value, max_rvalue, + prel->atttype, &interval_type_cmp, interval_binary, interval_type, true, &partid); /* while (value < MIN) ... */ - spawn_partitions(prel->key, value, min_rvalue, prel->atttype, - &interval_type_cmp, interval_binary, + spawn_partitions(PrelParentRelid(prel), value, min_rvalue, + prel->atttype, &interval_type_cmp, interval_binary, interval_type, false, &partid); SPI_finish(); /* close SPI connection */ diff --git a/src/relation_info.h b/src/relation_info.h index a4f290789d..1ed9993338 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -96,6 +96,8 @@ typedef enum * PartRelationInfo field access macros. */ +#define PrelParentRelid(prel) ( (prel)->key ) + #define PrelGetChildrenArray(prel) ( (prel)->children ) #define PrelGetRangesArray(prel) ( (prel)->ranges ) @@ -111,7 +113,7 @@ PrelLastChild(const PartRelationInfo *prel) if (PrelChildrenCount(prel) == 0) elog(ERROR, "pg_pathman's cache entry for relation %u has 0 children", - prel->key); + PrelParentRelid(prel)); return PrelChildrenCount(prel) - 1; /* last partition */ } @@ -161,7 +163,7 @@ FreeChildrenArray(PartRelationInfo *prel) Oid child = (prel)->children[i]; /* If it's *always been* relid's partition, free cache */ - if (prel->key == get_parent_of_partition(child, NULL)) + if (PrelParentRelid(prel) == get_parent_of_partition(child, NULL)) forget_parent_of_partition(child, NULL); } From 92209e40d5f133f035fecbcf41140f39d08fce23 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2016 15:58:03 +0300 Subject: [PATCH 092/184] fix outdated function parameter types in README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index abcda08311..fcd329bc2c 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ create_range_partitions(relation REGCLASS, count INTEGER DEFAULT NULL partition_data BOOLEAN DEFAULT true) -create_range_partitions(relation TEXT, +create_range_partitions(relation REGCLASS, attribute TEXT, start_value ANYELEMENT, interval INTERVAL, @@ -162,15 +162,15 @@ drop_range_partition(partition TEXT) Drop RANGE partition and all its data. ```plpgsql -attach_range_partition(relation TEXT, - partition TEXT, +attach_range_partition(relation REGCLASS, + partition REGCLASS, start_value ANYELEMENT, end_value ANYELEMENT) ``` Attach partition to the existing RANGE-partitioned relation. The attached table must have exactly the same structure as the parent table, including the dropped columns. ```plpgsql -detach_range_partition(partition TEXT) +detach_range_partition(partition REGCLASS) ``` Detach partition from the existing RANGE-partitioned relation. From 6f763b65939174f1d9acfa6bdf40509aab6eba8c Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2016 16:00:10 +0300 Subject: [PATCH 093/184] fix outdated function parameter types in README.rus.md --- README.rus.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rus.md b/README.rus.md index 85c8ea70bc..05c55fce50 100644 --- a/README.rus.md +++ b/README.rus.md @@ -77,7 +77,7 @@ create_range_partitions(relation REGCLASS, count INTEGER DEFAULT NULL partition_data BOOLEAN DEFAULT true) -create_range_partitions(relation TEXT, +create_range_partitions(relation REGCLASS, attribute TEXT, start_value ANYELEMENT, interval INTERVAL, @@ -164,20 +164,20 @@ drop_range_partition(partition TEXT) Удаляет RANGE секцию вместе с содержащимися в ней данными. ```plpgsql -attach_range_partition(relation TEXT, - partition TEXT, +attach_range_partition(relation REGCLASS, + partition REGCLASS, start_value ANYELEMENT, end_value ANYELEMENT) ``` Присоединяет существующую таблицу `partition` в качестве секции к ранее секционированной таблице `relation`. Структура присоединяемой таблицы должна в точности повторять структуру родительской. ```plpgsql -detach_range_partition(partition TEXT) +detach_range_partition(partition REGCLASS) ``` Отсоединяет секцию `partition`, после чего она становится независимой таблицей. ```plpgsql -disable_pathman_for(relation TEXT) +disable_pathman_for(relation REGCLASS) ``` Отключает механизм секционирования `pg_pathman` для заданной таблицы. При этом созданные ранее секции остаются без изменений. From d643c7463e767564330767c498dc68ab62bdc9db Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2016 16:25:37 +0300 Subject: [PATCH 094/184] place lock_partitioned_relation() in various places over the range.sql --- range.sql | 140 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 48 deletions(-) diff --git a/range.sql b/range.sql index 7d52264dbe..a1700baa56 100644 --- a/range.sql +++ b/range.sql @@ -34,6 +34,47 @@ END $$ LANGUAGE plpgsql; +/* + * Check RANGE partition boundaries. + */ +CREATE OR REPLACE FUNCTION @extschema@.check_boundaries( + parent_relid REGCLASS, + p_attribute TEXT, + p_start_value ANYELEMENT, + p_end_value ANYELEMENT) +RETURNS VOID AS +$$ +DECLARE + v_min p_start_value%TYPE; + v_max p_start_value%TYPE; + v_count BIGINT; + +BEGIN + /* Get min and max values */ + EXECUTE format('SELECT count(*), min(%1$s), max(%1$s) + FROM %2$s WHERE NOT %1$s IS NULL', + p_attribute, parent_relid::TEXT) + INTO v_count, v_min, v_max; + + /* Check if column has NULL values */ + IF v_count > 0 AND (v_min IS NULL OR v_max IS NULL) THEN + RAISE EXCEPTION '''%'' column contains NULL values', p_attribute; + END IF; + + /* Check lower boundary */ + IF p_start_value > v_min THEN + RAISE EXCEPTION 'Start value is less than minimum value of ''%''', + p_attribute; + END IF; + + /* Check upper boundary */ + IF p_end_value <= v_max THEN + RAISE EXCEPTION 'Not enough partitions to fit all values of ''%''', + p_attribute; + END IF; +END +$$ LANGUAGE plpgsql; + /* * Creates RANGE partitions for specified relation based on datetime attribute */ @@ -53,6 +94,9 @@ DECLARE i INTEGER; BEGIN + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); @@ -147,6 +191,9 @@ DECLARE i INTEGER; BEGIN + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); @@ -239,6 +286,9 @@ DECLARE part_count INTEGER := 0; BEGIN + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); @@ -304,6 +354,9 @@ DECLARE part_count INTEGER := 0; BEGIN + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); @@ -351,48 +404,8 @@ END $$ LANGUAGE plpgsql; /* - * Check RANGE partition boundaries. - */ -CREATE OR REPLACE FUNCTION @extschema@.check_boundaries( - parent_relid REGCLASS, - p_attribute TEXT, - p_start_value ANYELEMENT, - p_end_value ANYELEMENT) -RETURNS VOID AS -$$ -DECLARE - v_min p_start_value%TYPE; - v_max p_start_value%TYPE; - v_count BIGINT; - -BEGIN - /* Get min and max values */ - EXECUTE format('SELECT count(*), min(%1$s), max(%1$s) - FROM %2$s WHERE NOT %1$s IS NULL', - p_attribute, parent_relid::TEXT) - INTO v_count, v_min, v_max; - - /* Check if column has NULL values */ - IF v_count > 0 AND (v_min IS NULL OR v_max IS NULL) THEN - RAISE EXCEPTION '''%'' column contains NULL values', p_attribute; - END IF; - - /* Check lower boundary */ - IF p_start_value > v_min THEN - RAISE EXCEPTION 'Start value is less than minimum value of ''%''', - p_attribute; - END IF; - - /* Check upper boundary */ - IF p_end_value <= v_max THEN - RAISE EXCEPTION 'Not enough partitions to fit all values of ''%''', - p_attribute; - END IF; -END -$$ LANGUAGE plpgsql; - -/* - * Creates new RANGE partition. Returns partition name + * Creates new RANGE partition. Returns partition name. + * NOTE: This function SHOULD NOT take xact_handling lock (BGWs in 9.5). */ CREATE OR REPLACE FUNCTION @extschema@.create_single_range_partition( parent_relid REGCLASS, @@ -485,6 +498,9 @@ BEGIN v_part_relname := @extschema@.validate_relname(p_partition); v_parent_relid = @extschema@.get_parent_of_partition(p_partition); + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(v_parent_relid); + SELECT attname, parttype FROM @extschema@.pathman_config WHERE partrel = v_parent_relid @@ -573,6 +589,9 @@ BEGIN RAISE EXCEPTION 'Cannot merge partitions with different parents'; END IF; + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(v_parent_relid1); + SELECT attname, parttype FROM @extschema@.pathman_config WHERE partrel = v_parent_relid1 @@ -604,8 +623,8 @@ LANGUAGE plpgsql; * Merge two partitions. All data will be copied to the first one. Second * partition will be destroyed. * - * Notes: dummy field is used to pass the element type to the function - * (it is necessary because of pseudo-types used in function) + * NOTE: dummy field is used to pass the element type to the function + * (it is necessary because of pseudo-types used in function). */ CREATE OR REPLACE FUNCTION @extschema@.merge_range_partitions_internal( parent_relid REGCLASS, @@ -668,7 +687,7 @@ $$ LANGUAGE plpgsql; /* - * Append new partition + * Append new partition. */ CREATE OR REPLACE FUNCTION @extschema@.append_range_partition( parent_relid REGCLASS, @@ -682,6 +701,9 @@ DECLARE v_interval TEXT; BEGIN + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); + SELECT attname, range_interval FROM @extschema@.pathman_config WHERE partrel = parent_relid @@ -715,7 +737,12 @@ END $$ LANGUAGE plpgsql; - +/* + * Spawn logic for append_partition(). We have to + * separate this in order to pass the 'p_range'. + * + * NOTE: we don't take a xact_handling lock here. + */ CREATE OR REPLACE FUNCTION @extschema@.append_partition_internal( parent_relid REGCLASS, p_atttype TEXT, @@ -761,7 +788,7 @@ LANGUAGE plpgsql; /* - * Prepend new partition + * Prepend new partition. */ CREATE OR REPLACE FUNCTION @extschema@.prepend_range_partition( parent_relid REGCLASS, @@ -808,7 +835,12 @@ END $$ LANGUAGE plpgsql; - +/* + * Spawn logic for prepend_partition(). We have to + * separate this in order to pass the 'p_range'. + * + * NOTE: we don't take a xact_handling lock here. + */ CREATE OR REPLACE FUNCTION @extschema@.prepend_partition_internal( parent_relid REGCLASS, p_atttype TEXT, @@ -867,6 +899,9 @@ DECLARE v_part_name TEXT; BEGIN + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); + IF p_start_value >= p_end_value THEN RAISE EXCEPTION 'Failed to create partition: p_start_value is greater than p_end_value'; END IF; @@ -908,6 +943,9 @@ BEGIN parent_relid := @extschema@.get_parent_of_partition(p_partition); part_name := p_partition::TEXT; /* save the name to be returned */ + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); + /* Drop table */ EXECUTE format('DROP TABLE %s', part_name); @@ -938,6 +976,9 @@ DECLARE rel_persistence CHAR; BEGIN + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); + /* Ignore temporary tables */ SELECT relpersistence FROM pg_catalog.pg_class WHERE oid = p_partition INTO rel_persistence; @@ -998,6 +1039,9 @@ DECLARE BEGIN parent_relid = @extschema@.get_parent_of_partition(p_partition); + /* Acquire exclusive lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); + v_attname := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; From d3076c409eb60f093f890278b61cb46c6e092b81 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2016 17:57:06 +0400 Subject: [PATCH 095/184] Update roadmap in README.md This feature was requested by github user @1803 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index fcd329bc2c..7d3606c7b1 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ More interesting features are yet to come. Stay tuned! ## Roadmap + * Provide a way to create user-defined partition creation\destruction callbacks (issue [#22](https://github.com/postgrespro/pg_pathman/issues/22)) * Implement LIST partitioning scheme; * Optimize hash join (both tables are partitioned by join key). From 6f83fe1d8932820884e3484ff15a2deb162a853c Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2016 17:03:16 +0300 Subject: [PATCH 096/184] fixes for README.md & README.rus.md --- README.md | 4 ++-- README.rus.md | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7d3606c7b1..0c5c36ce44 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Done! Now it's time to setup your partitioning schemes. create_hash_partitions(relation REGCLASS, attribute TEXT, partitions_count INTEGER, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL) ``` Performs HASH partitioning for `relation` by integer key `attribute`. The `partitions_count` parameter specifies the number of partitions to create; it cannot be changed afterwards. If `partition_data` is `true` then all the data will be automatically copied from the parent table to partitions. Note that data migration may took a while to finish and the table will be locked until transaction commits. See `partition_table_concurrently()` for a lock-free way to migrate data. @@ -350,7 +350,7 @@ SELECT tableoid::regclass AS partition, * FROM partitioned_table; - All running concurrent partitioning tasks can be listed using the `pathman_concurrent_part_tasks` view: ```plpgsql -postgres=# SELECT * FROM pathman_concurrent_part_tasks; +SELECT * FROM pathman_concurrent_part_tasks; userid | pid | dbid | relid | processed | status --------+------+-------+-------+-----------+--------- dmitry | 7367 | 16384 | test | 472000 | working diff --git a/README.rus.md b/README.rus.md index 05c55fce50..a06f25ceca 100644 --- a/README.rus.md +++ b/README.rus.md @@ -38,6 +38,7 @@ More interesting features are yet to come. Stay tuned! ## Roadmap + * Предоставить возможность установки пользовательских колбеков на создание\уничтожение партиции (issue [#22](https://github.com/postgrespro/pg_pathman/issues/22)) * LIST-секционирование; * Оптимизация hash join для случая, когда обе таблицы секционированы по ключу join’а. @@ -65,7 +66,7 @@ CREATE EXTENSION pg_pathman; create_hash_partitions(relation REGCLASS, attribute TEXT, partitions_count INTEGER, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL) ``` Выполняет HASH-секционирование таблицы `relation` по целочисленному полю `attribute`. Параметр `partitions_count` определяет, сколько секций будет создано. Если `partition_data` установлен в значение `true`, то данные из родительской таблицы будут автоматически распределены по секциям. Стоит иметь в виду, что миграция данных может занять некоторое время, а данные заблокированы. Для конкурентной миграции данных см. функцию `partition_table_concurrently()`. @@ -348,7 +349,7 @@ SELECT tableoid::regclass AS partition, * FROM partitioned_table; - Получить все текущие процессы конкурентного секционирования можно из представления `pathman_concurrent_part_tasks`: ```plpgsql -postgres=# SELECT * FROM pathman_concurrent_part_tasks; +SELECT * FROM pathman_concurrent_part_tasks; userid | pid | dbid | relid | processed | status --------+------+-------+-------+-----------+--------- dmitry | 7367 | 16384 | test | 472000 | working From e1552663e2ebaad5e1433a1e56759a9537ca715c Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 30 Aug 2016 18:58:53 +0300 Subject: [PATCH 097/184] disable auto partition propagation in concurrent partitioning --- src/hooks.c | 1 + src/init.c | 11 +++++++ src/init.h | 16 ++++++++++ src/partition_filter.c | 7 ++++- src/pathman_workers.c | 9 ++++-- src/pg_pathman.c | 66 ++++++++++++++++++++++++++---------------- 6 files changed, 81 insertions(+), 29 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 5101872c97..1698d3d8cf 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -521,6 +521,7 @@ pathman_relcache_hook(Datum arg, Oid relid) case PPS_ENTRY_NOT_FOUND: { /* NOTE: Remove NOT_USED when it's time */ + delay_invalidation_parent_rel(partitioned_table); #ifdef NOT_USED elog(DEBUG2, "Invalidation message for relation %u [%u]", relid, MyProcPid); diff --git a/src/init.c b/src/init.c index b00430ef4b..1bc53cad4d 100644 --- a/src/init.c +++ b/src/init.c @@ -118,6 +118,17 @@ init_main_pathman_toggle(void) NULL, pg_pathman_enable_assign_hook, NULL); + + DefineCustomBoolVariable("pg_pathman.enable_auto_partition", + "Enables auto partition propagation", + NULL, + &pg_pathman_init_state.auto_partition, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); } /* diff --git a/src/init.h b/src/init.h index 2d31f618b6..9375976db1 100644 --- a/src/init.h +++ b/src/init.h @@ -26,6 +26,7 @@ typedef struct { bool pg_pathman_enable; /* GUC variable implementation */ + bool auto_partition; /* GUC variable for auto partition propagation */ bool initialization_needed; /* do we need to perform init? */ } PathmanInitState; @@ -52,6 +53,21 @@ extern PathmanInitState pg_pathman_init_state; */ #define IsPathmanReady() ( IsPathmanInitialized() && IsPathmanEnabled() ) +/* + * Check if auto partition propagation enabled + */ +#define IsAutoPartitionEnabled() ( pg_pathman_init_state.auto_partition ) + +/* + * Enable/disable auto partition propagation. Note that this only works if + * partitioned relation supports this. See enable_auto() and disable_auto() + * functions. + */ +#define SetAutoPartitionEnabled(value) \ + do { \ + pg_pathman_init_state.auto_partition = value; \ + } while (0) + /* * Emergency disable mechanism. */ diff --git a/src/partition_filter.c b/src/partition_filter.c index bab93e8306..6c38c10998 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -11,6 +11,7 @@ #include "partition_filter.h" #include "nodes_common.h" #include "utils.h" +#include "init.h" #include "utils/guc.h" #include "utils/memutils.h" @@ -204,7 +205,11 @@ partition_filter_exec(CustomScanState *node) elog(ERROR, "PartitionFilter selected more than one partition"); else if (nparts == 0) { - if (prel->auto_partition) + /* + * If auto partition propagation is enabled then try to create + * new partitions for the key + */ + if (prel->auto_partition && IsAutoPartitionEnabled()) { selected_partid = create_partitions(state->partitioned_table, state->temp_const.constvalue, diff --git a/src/pathman_workers.c b/src/pathman_workers.c index e601e8be23..fcc64bace0 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -417,6 +417,9 @@ bgw_main_concurrent_part(Datum main_arg) part_slot = &concurrent_part_slots[DatumGetInt32(main_arg)]; part_slot->pid = MyProcPid; + /* Disable auto partition propagation */ + SetAutoPartitionEnabled(false); + /* Establish connection and start transaction */ BackgroundWorkerInitializeConnectionByOid(part_slot->dbid, part_slot->userid); @@ -494,10 +497,11 @@ bgw_main_concurrent_part(Datum main_arg) /* Print messsage for this BGWorker to server log */ sleep_time_str = datum_to_cstring(Float8GetDatum(part_slot->sleep_time), FLOAT8OID); + failures_count++; ereport(LOG, (errmsg("%s: %s", concurrent_part_bgw, error->message), errdetail("Attempt: %d/%d, sleep time: %s", - failures_count + 1, + failures_count, PART_WORKER_MAX_ATTEMPTS, sleep_time_str))); pfree(sleep_time_str); /* free the time string */ @@ -509,7 +513,7 @@ bgw_main_concurrent_part(Datum main_arg) * concurrent user queries. Check that attempts count doesn't exceed * some reasonable value */ - if (failures_count++ >= PART_WORKER_MAX_ATTEMPTS) + if (failures_count >= PART_WORKER_MAX_ATTEMPTS) { /* Mark slot as FREE */ cps_set_status(part_slot, CPS_FREE); @@ -593,7 +597,6 @@ partition_table_concurrently(PG_FUNCTION_ARGS) get_pathman_relation_info_after_lock(relid, true), /* Partitioning type does not matter here */ PT_INDIFFERENT); - /* * Look for an empty slot and also check that a concurrent * partitioning operation for this table hasn't been started yet diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 281bf15834..e2de816548 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -446,45 +446,61 @@ append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, memcpy(childrel->attr_widths, rel->attr_widths, (rel->max_attr - rel->min_attr + 1) * sizeof(int32)); - - /* Copy restrictions */ + /* + * Copy restrictions. If it's not the parent table then copy only those + * restrictions that reference to this partition + */ childrel->baserestrictinfo = NIL; - forboth(lc, wrappers, lc2, rel->baserestrictinfo) + if (rte->relid != childOid) { - bool alwaysTrue; - WrapperNode *wrap = (WrapperNode *) lfirst(lc); - Node *new_clause = wrapper_make_expression(wrap, index, &alwaysTrue); - RestrictInfo *old_rinfo = (RestrictInfo *) lfirst(lc2); - - if (alwaysTrue) + forboth(lc, wrappers, lc2, rel->baserestrictinfo) { - continue; - } - Assert(new_clause); + bool alwaysTrue; + WrapperNode *wrap = (WrapperNode *) lfirst(lc); + Node *new_clause = wrapper_make_expression(wrap, index, &alwaysTrue); + RestrictInfo *old_rinfo = (RestrictInfo *) lfirst(lc2); - if (and_clause((Node *) new_clause)) - { - ListCell *alc; + if (alwaysTrue) + { + continue; + } + Assert(new_clause); - foreach(alc, ((BoolExpr *) new_clause)->args) + if (and_clause((Node *) new_clause)) { - Node *arg = (Node *) lfirst(alc); - RestrictInfo *new_rinfo = rebuild_restrictinfo(arg, old_rinfo); + ListCell *alc; + + foreach(alc, ((BoolExpr *) new_clause)->args) + { + Node *arg = (Node *) lfirst(alc); + RestrictInfo *new_rinfo = rebuild_restrictinfo(arg, old_rinfo); + change_varnos((Node *)new_rinfo, rel->relid, childrel->relid); + childrel->baserestrictinfo = lappend(childrel->baserestrictinfo, + new_rinfo); + } + } + else + { + RestrictInfo *new_rinfo = rebuild_restrictinfo(new_clause, old_rinfo); + + /* Replace old relids with new ones */ change_varnos((Node *)new_rinfo, rel->relid, childrel->relid); + childrel->baserestrictinfo = lappend(childrel->baserestrictinfo, - new_rinfo); + (void *) new_rinfo); } } - else + } + /* If it's the parent table then copy all restrictions */ + else + { + foreach(lc, rel->baserestrictinfo) { - RestrictInfo *new_rinfo = rebuild_restrictinfo(new_clause, old_rinfo); - - /* Replace old relids with new ones */ - change_varnos((Node *)new_rinfo, rel->relid, childrel->relid); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); childrel->baserestrictinfo = lappend(childrel->baserestrictinfo, - (void *) new_rinfo); + (RestrictInfo *) copyObject(rinfo)); } } From 43b0b6e11ef9635e7e0dfe18a3c15aa01926ac9d Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Wed, 31 Aug 2016 14:03:25 +0300 Subject: [PATCH 098/184] change varnos for parent table in AppendPath --- src/pg_pathman.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pg_pathman.c b/src/pg_pathman.c index f1c6d4e3b1..0d2c03ed76 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -498,9 +498,11 @@ append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, foreach(lc, rel->baserestrictinfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *new_rinfo = (RestrictInfo *) copyObject(rinfo); + change_varnos((Node *)new_rinfo, rel->relid, childrel->relid); childrel->baserestrictinfo = lappend(childrel->baserestrictinfo, - (RestrictInfo *) copyObject(rinfo)); + (void *) new_rinfo); } } From 027fe76c09c2488ec478109191c53d9e74df27a9 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 31 Aug 2016 14:11:06 +0300 Subject: [PATCH 099/184] don't modify parent's clauses in create_append_plan_common() --- src/nodes_common.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/nodes_common.c b/src/nodes_common.c index 26810a5875..a291ccb0bb 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -372,10 +372,6 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, { Plan *child_plan = (Plan *) lfirst(lc2); RelOptInfo *child_rel = ((Path *) lfirst(lc1))->parent; - Oid child_relid; - - /* Fetch relid of the 'child_rel' */ - child_relid = root->simple_rte_array[child_rel->relid]->relid; /* Replace rel's tlist with a matching one */ if (!cscan->scan.plan.targetlist) @@ -390,10 +386,6 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, if (!cscan->custom_scan_tlist) cscan->custom_scan_tlist = replace_tlist_varnos(child_plan->targetlist, rel); - - /* If this is a plan for parent table, fill it with quals */ - if (PrelParentRelid(prel) == child_relid) - child_plan->qual = get_actual_clauses(clauses); } } From 173dd507b05d6556c52c4245235c87822180ba0b Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 31 Aug 2016 16:51:44 +0300 Subject: [PATCH 100/184] disable some of the concurrent data modification operations for partitioned tables --- hash.sql | 5 +++++ init.sql | 8 ++++++++ range.sql | 27 +++++++++++++++++++++++++++ src/pl_funcs.c | 12 ++++++++++++ src/xact_handling.c | 18 ++++++++++++++++++ src/xact_handling.h | 4 ++++ 6 files changed, 74 insertions(+) diff --git a/hash.sql b/hash.sql index f474e82747..9dd8d0aac5 100644 --- a/hash.sql +++ b/hash.sql @@ -29,6 +29,11 @@ BEGIN /* Acquire exclusive lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); + IF partition_data = true THEN + /* Acquire data modification lock */ + PERFORM @extschema@.lock_relation_modification(parent_relid); + END IF; + PERFORM @extschema@.validate_relname(parent_relid); attribute := lower(attribute); PERFORM @extschema@.common_relation_checks(parent_relid, attribute); diff --git a/init.sql b/init.sql index e673b25a15..826c740a22 100644 --- a/init.sql +++ b/init.sql @@ -656,6 +656,14 @@ LANGUAGE C STRICT; LANGUAGE C STRICT; +/* + * Lock relation to restrict concurrent modification of data. + */ + CREATE OR REPLACE FUNCTION @extschema@.lock_relation_modification( + REGCLASS) + RETURNS VOID AS 'pg_pathman', 'lock_relation_modification' + LANGUAGE C STRICT; + /* * DEBUG: Place this inside some plpgsql fuction and set breakpoint. */ diff --git a/range.sql b/range.sql index a1700baa56..da87d03263 100644 --- a/range.sql +++ b/range.sql @@ -97,6 +97,11 @@ BEGIN /* Acquire exclusive lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); + IF partition_data = true THEN + /* Acquire data modification lock */ + PERFORM @extschema@.lock_relation_modification(parent_relid); + END IF; + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); @@ -194,6 +199,11 @@ BEGIN /* Acquire exclusive lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); + IF partition_data = true THEN + /* Acquire data modification lock */ + PERFORM @extschema@.lock_relation_modification(parent_relid); + END IF; + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); @@ -289,6 +299,11 @@ BEGIN /* Acquire exclusive lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); + IF partition_data = true THEN + /* Acquire data modification lock */ + PERFORM @extschema@.lock_relation_modification(parent_relid); + END IF; + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); @@ -357,6 +372,11 @@ BEGIN /* Acquire exclusive lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); + IF partition_data = true THEN + /* Acquire data modification lock */ + PERFORM @extschema@.lock_relation_modification(parent_relid); + END IF; + PERFORM @extschema@.validate_relname(parent_relid); p_attribute := lower(p_attribute); PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); @@ -501,6 +521,9 @@ BEGIN /* Acquire exclusive lock on parent */ PERFORM @extschema@.lock_partitioned_relation(v_parent_relid); + /* Acquire data modification lock */ + PERFORM @extschema@.lock_relation_modification(p_partition); + SELECT attname, parttype FROM @extschema@.pathman_config WHERE partrel = v_parent_relid @@ -585,6 +608,10 @@ BEGIN v_parent_relid1 := @extschema@.get_parent_of_partition(partition1); v_parent_relid2 := @extschema@.get_parent_of_partition(partition2); + /* Acquire data modification lock */ + PERFORM @extschema@.lock_relation_modification(partition1); + PERFORM @extschema@.lock_relation_modification(partition2); + IF v_parent_relid1 != v_parent_relid2 THEN RAISE EXCEPTION 'Cannot merge partitions with different parents'; END IF; diff --git a/src/pl_funcs.c b/src/pl_funcs.c index f4af705181..b325819044 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -53,6 +53,7 @@ PG_FUNCTION_INFO_V1( is_attribute_nullable ); PG_FUNCTION_INFO_V1( add_to_pathman_config ); PG_FUNCTION_INFO_V1( invalidate_relcache ); PG_FUNCTION_INFO_V1( lock_partitioned_relation ); +PG_FUNCTION_INFO_V1( lock_relation_modification ); PG_FUNCTION_INFO_V1( debug_capture ); @@ -701,6 +702,17 @@ lock_partitioned_relation(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +Datum +lock_relation_modification(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + /* Lock partitioned relation till transaction's end */ + xact_lock_rel_data(relid); + + PG_RETURN_VOID(); +} + /* * NOTE: used for DEBUG, set breakpoint here. diff --git a/src/xact_handling.c b/src/xact_handling.c index 7358493307..05a30c0075 100644 --- a/src/xact_handling.c +++ b/src/xact_handling.c @@ -39,6 +39,24 @@ xact_unlock_partitioned_rel(Oid relid) UnlockRelationOid(relid, ShareUpdateExclusiveLock); } +/* + * Lock certain relation's data (INSERT | UPDATE | DELETE). + */ +void +xact_lock_rel_data(Oid relid) +{ + LockRelationOid(relid, RowExclusiveLock); +} + +/* + * Unlock relation's data. + */ +void +xact_unlock_rel_data(Oid relid) +{ + UnlockRelationOid(relid, RowExclusiveLock); +} + /* * Check whether we already hold a lock that * might conflict with partition spawning BGW. diff --git a/src/xact_handling.h b/src/xact_handling.h index 6b9bb1aa4f..b51cbe1892 100644 --- a/src/xact_handling.h +++ b/src/xact_handling.h @@ -24,6 +24,10 @@ extern List *locked_by_me; */ void xact_lock_partitioned_rel(Oid relid); void xact_unlock_partitioned_rel(Oid relid); + +void xact_lock_rel_data(Oid relid); +void xact_unlock_rel_data(Oid relid); + bool xact_conflicting_lock_exists(Oid relid); #endif From e82348b525468a966a66730e2562c32f43cff6cc Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 31 Aug 2016 18:21:06 +0300 Subject: [PATCH 101/184] check transaction isolation level for blocking partitioning operations --- init.sql | 10 +++++++++- range.sql | 27 +++++++++++++++++++++------ src/pathman_workers.c | 2 +- src/pg_pathman.c | 2 +- src/pl_funcs.c | 20 ++++++++++++++++++++ src/xact_handling.c | 38 +++++++++++++++++++++++++++++++++++--- src/xact_handling.h | 12 ++++++------ 7 files changed, 93 insertions(+), 18 deletions(-) diff --git a/init.sql b/init.sql index 826c740a22..2d60234d16 100644 --- a/init.sql +++ b/init.sql @@ -655,7 +655,6 @@ LANGUAGE C STRICT; RETURNS VOID AS 'pg_pathman', 'lock_partitioned_relation' LANGUAGE C STRICT; - /* * Lock relation to restrict concurrent modification of data. */ @@ -664,6 +663,15 @@ LANGUAGE C STRICT; RETURNS VOID AS 'pg_pathman', 'lock_relation_modification' LANGUAGE C STRICT; +/* + * Check if we can distribute data without bad consequences. + */ + CREATE OR REPLACE FUNCTION @extschema@.common_blocking_partitioning_checks( + REGCLASS) + RETURNS VOID AS 'pg_pathman', 'common_blocking_partitioning_checks' + LANGUAGE C STRICT; + + /* * DEBUG: Place this inside some plpgsql fuction and set breakpoint. */ diff --git a/range.sql b/range.sql index da87d03263..7ba22376b2 100644 --- a/range.sql +++ b/range.sql @@ -98,7 +98,10 @@ BEGIN PERFORM @extschema@.lock_partitioned_relation(parent_relid); IF partition_data = true THEN - /* Acquire data modification lock */ + /* Perform some checks regarding the blocking partitioning */ + PERFORM @extschema@.common_blocking_partitioning_checks(parent_relid); + + /* Acquire data modification lock (prevent further modifications) */ PERFORM @extschema@.lock_relation_modification(parent_relid); END IF; @@ -200,7 +203,10 @@ BEGIN PERFORM @extschema@.lock_partitioned_relation(parent_relid); IF partition_data = true THEN - /* Acquire data modification lock */ + /* Perform some checks regarding the blocking partitioning */ + PERFORM @extschema@.common_blocking_partitioning_checks(parent_relid); + + /* Acquire data modification lock (prevent further modifications) */ PERFORM @extschema@.lock_relation_modification(parent_relid); END IF; @@ -300,7 +306,10 @@ BEGIN PERFORM @extschema@.lock_partitioned_relation(parent_relid); IF partition_data = true THEN - /* Acquire data modification lock */ + /* Perform some checks regarding the blocking partitioning */ + PERFORM @extschema@.common_blocking_partitioning_checks(parent_relid); + + /* Acquire data modification lock (prevent further modifications) */ PERFORM @extschema@.lock_relation_modification(parent_relid); END IF; @@ -373,7 +382,10 @@ BEGIN PERFORM @extschema@.lock_partitioned_relation(parent_relid); IF partition_data = true THEN - /* Acquire data modification lock */ + /* Perform some checks regarding the blocking partitioning */ + PERFORM @extschema@.common_blocking_partitioning_checks(parent_relid); + + /* Acquire data modification lock (prevent further modifications) */ PERFORM @extschema@.lock_relation_modification(parent_relid); END IF; @@ -521,7 +533,8 @@ BEGIN /* Acquire exclusive lock on parent */ PERFORM @extschema@.lock_partitioned_relation(v_parent_relid); - /* Acquire data modification lock */ + /* Acquire data modification lock (prevent further modifications) */ + PERFORM @extschema@.common_blocking_partitioning_checks(p_partition); PERFORM @extschema@.lock_relation_modification(p_partition); SELECT attname, parttype @@ -608,8 +621,10 @@ BEGIN v_parent_relid1 := @extschema@.get_parent_of_partition(partition1); v_parent_relid2 := @extschema@.get_parent_of_partition(partition2); - /* Acquire data modification lock */ + /* Acquire data modification locks (prevent further modifications) */ + PERFORM @extschema@.common_blocking_partitioning_checks(partition1); PERFORM @extschema@.lock_relation_modification(partition1); + PERFORM @extschema@.common_blocking_partitioning_checks(partition2); PERFORM @extschema@.lock_relation_modification(partition2); IF v_parent_relid1 != v_parent_relid2 THEN diff --git a/src/pathman_workers.c b/src/pathman_workers.c index fcc64bace0..2df35b2fc5 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -361,7 +361,7 @@ bgw_main_spawn_partitions(Datum main_arg) #endif /* Check again if there's a conflicting lock */ - if (xact_conflicting_lock_exists(args->partitioned_table)) + if (xact_bgw_conflicting_lock_exists(args->partitioned_table)) { elog(LOG, "%s: there's a conflicting lock on relation \"%s\"", spawn_partitions_bgw, diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 0d2c03ed76..8f35b1528b 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -976,7 +976,7 @@ create_partitions(Oid relid, Datum value, Oid value_type) * If table has been partitioned in some previous xact AND * we don't hold any conflicting locks, run BGWorker. */ - if (part_in_prev_xact && !xact_conflicting_lock_exists(relid)) + if (part_in_prev_xact && !xact_bgw_conflicting_lock_exists(relid)) { elog(DEBUG2, "create_partitions(): chose BGWorker [%u]", MyProcPid); last_partition = create_partitions_bg_worker(relid, value, value_type); diff --git a/src/pl_funcs.c b/src/pl_funcs.c index b325819044..c040d98f02 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -54,6 +54,7 @@ PG_FUNCTION_INFO_V1( add_to_pathman_config ); PG_FUNCTION_INFO_V1( invalidate_relcache ); PG_FUNCTION_INFO_V1( lock_partitioned_relation ); PG_FUNCTION_INFO_V1( lock_relation_modification ); +PG_FUNCTION_INFO_V1( common_blocking_partitioning_checks ); PG_FUNCTION_INFO_V1( debug_capture ); @@ -713,6 +714,25 @@ lock_relation_modification(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +Datum +common_blocking_partitioning_checks(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + if (!xact_is_level_read_committed()) + ereport(ERROR, + (errmsg("Cannot perform blocking partitioning operation"), + errdetail("Expected READ COMMITTED isolation level"))); + + if (xact_is_table_being_modified(relid)) + ereport(ERROR, + (errmsg("Cannot perform blocking partitioning operation"), + errdetail("Table \"%s\" is being modified concurrently", + get_rel_name_or_relid(relid)))); + + PG_RETURN_VOID(); +} + /* * NOTE: used for DEBUG, set breakpoint here. diff --git a/src/xact_handling.c b/src/xact_handling.c index 05a30c0075..9b6a0b1a28 100644 --- a/src/xact_handling.c +++ b/src/xact_handling.c @@ -11,6 +11,7 @@ #include "xact_handling.h" #include "postgres.h" +#include "access/xact.h" #include "catalog/catalog.h" #include "miscadmin.h" #include "storage/lmgr.h" @@ -45,7 +46,7 @@ xact_unlock_partitioned_rel(Oid relid) void xact_lock_rel_data(Oid relid) { - LockRelationOid(relid, RowExclusiveLock); + LockRelationOid(relid, ShareLock); } /* @@ -54,7 +55,7 @@ xact_lock_rel_data(Oid relid) void xact_unlock_rel_data(Oid relid) { - UnlockRelationOid(relid, RowExclusiveLock); + UnlockRelationOid(relid, ShareLock); } /* @@ -62,7 +63,7 @@ xact_unlock_rel_data(Oid relid) * might conflict with partition spawning BGW. */ bool -xact_conflicting_lock_exists(Oid relid) +xact_bgw_conflicting_lock_exists(Oid relid) { LOCKMODE lockmode; @@ -78,6 +79,37 @@ xact_conflicting_lock_exists(Oid relid) return false; } +/* + * Check if table is being modified + * concurrently in a separate transaction. + */ +bool +xact_is_table_being_modified(Oid relid) +{ + /* + * Check if someone has already started a + * transaction and modified table's contents. + */ + if (ConditionalLockRelationOid(relid, ExclusiveLock)) + { + UnlockRelationOid(relid, ExclusiveLock); + return false; + } + + return true; +} + +/* + * Check if current transaction's level is READ COMMITTED. + */ +bool +xact_is_level_read_committed(void) +{ + if (XactIsoLevel <= XACT_READ_COMMITTED) + return true; + + return false; +} /* * Do we hold the specified lock? diff --git a/src/xact_handling.h b/src/xact_handling.h index b51cbe1892..2903978de7 100644 --- a/src/xact_handling.h +++ b/src/xact_handling.h @@ -14,11 +14,6 @@ #include "pathman.h" -/* - * List of partitioned relations locked by this backend (plain Oids). - */ -extern List *locked_by_me; - /* * Transaction locks. */ @@ -28,6 +23,11 @@ void xact_unlock_partitioned_rel(Oid relid); void xact_lock_rel_data(Oid relid); void xact_unlock_rel_data(Oid relid); -bool xact_conflicting_lock_exists(Oid relid); +/* + * Utility checks. + */ +bool xact_bgw_conflicting_lock_exists(Oid relid); +bool xact_is_table_being_modified(Oid relid); +bool xact_is_level_read_committed(void); #endif From d80733858e9f6b36cf2bfbeb10dc7656269c34ee Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 31 Aug 2016 18:49:03 +0300 Subject: [PATCH 102/184] remove exception blocks in PL/pgSQL functions --- hash.sql | 3 --- range.sql | 30 ------------------------------ 2 files changed, 33 deletions(-) diff --git a/hash.sql b/hash.sql index 9dd8d0aac5..a22021859c 100644 --- a/hash.sql +++ b/hash.sql @@ -83,9 +83,6 @@ BEGIN END IF; RETURN partitions_count; - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql SET client_min_messages = WARNING; diff --git a/range.sql b/range.sql index 7ba22376b2..134ac88695 100644 --- a/range.sql +++ b/range.sql @@ -174,9 +174,6 @@ BEGIN END IF; RETURN p_count; - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql; @@ -280,9 +277,6 @@ BEGIN END IF; RETURN p_count; - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql; @@ -356,9 +350,6 @@ BEGIN END IF; RETURN part_count; /* number of created partitions */ - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql; @@ -429,9 +420,6 @@ BEGIN END IF; RETURN part_count; /* number of created partitions */ - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql; @@ -772,9 +760,6 @@ BEGIN /* Invalidate cache */ PERFORM @extschema@.on_update_partitions(parent_relid); RETURN v_part_name; - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql; @@ -870,9 +855,6 @@ BEGIN /* Invalidate cache */ PERFORM @extschema@.on_update_partitions(parent_relid); RETURN v_part_name; - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql; @@ -962,9 +944,6 @@ BEGIN PERFORM @extschema@.on_update_partitions(parent_relid); RETURN v_part_name; - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql; @@ -995,9 +974,6 @@ BEGIN PERFORM @extschema@.on_update_partitions(parent_relid); RETURN part_name; - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql; @@ -1059,9 +1035,6 @@ BEGIN PERFORM @extschema@.on_update_partitions(parent_relid); RETURN p_partition; - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql; @@ -1106,9 +1079,6 @@ BEGIN PERFORM @extschema@.on_update_partitions(parent_relid); RETURN p_partition; - -EXCEPTION WHEN others THEN - RAISE EXCEPTION '%', SQLERRM; END $$ LANGUAGE plpgsql; From 11459316e7ddb857589eca299dc73e4282bd6176 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 31 Aug 2016 19:35:59 +0300 Subject: [PATCH 103/184] bugfix: don't take parent's plan into account in PartitionFilter --- src/nodes_common.c | 7 ++++--- src/nodes_common.h | 3 ++- src/partition_filter.c | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/nodes_common.c b/src/nodes_common.c index a291ccb0bb..4bc3e762da 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -240,7 +240,8 @@ unpack_runtimeappend_private(RuntimeAppendState *scan_state, CustomScan *cscan) /* Transform partition ranges into plain array of partition Oids */ Oid * -get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel) +get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel, + bool include_parent) { ListCell *range_cell; uint32 allocated = INITIAL_ALLOC_NUM; @@ -250,7 +251,7 @@ get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel) /* If required, add parent to result */ Assert(INITIAL_ALLOC_NUM >= 1); - if (prel->enable_parent) + if (include_parent) result[used++] = PrelParentRelid(prel); /* Deal with selected partitions */ @@ -521,7 +522,7 @@ rescan_append_common(CustomScanState *node) } /* Get Oids of the required partitions */ - parts = get_partition_oids(ranges, &nparts, prel); + parts = get_partition_oids(ranges, &nparts, prel, prel->enable_parent); /* Select new plans for this run using 'parts' */ if (scan_state->cur_plans) diff --git a/src/nodes_common.h b/src/nodes_common.h index d66b02e27e..ef3cb3df5a 100644 --- a/src/nodes_common.h +++ b/src/nodes_common.h @@ -60,7 +60,8 @@ clear_plan_states(CustomScanState *scan_state) } } -Oid * get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel); +Oid * get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel, + bool include_parent); Path * create_append_path_common(PlannerInfo *root, AppendPath *inner_append, diff --git a/src/partition_filter.c b/src/partition_filter.c index 6c38c10998..71e1b8946e 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -199,7 +199,7 @@ partition_filter_exec(CustomScanState *node) old_cxt = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); ranges = walk_expr_tree((Expr *) &state->temp_const, &wcxt)->rangeset; - parts = get_partition_oids(ranges, &nparts, prel); + parts = get_partition_oids(ranges, &nparts, prel, false); if (nparts > 1) elog(ERROR, "PartitionFilter selected more than one partition"); @@ -222,7 +222,7 @@ partition_filter_exec(CustomScanState *node) elog(ERROR, "There is no suitable partition for key '%s'", datum_to_cstring(state->temp_const.constvalue, - state->temp_const.consttype)); + state->temp_const.consttype)); } else selected_partid = parts[0]; From e455306502946fc3a645fc173e0caf28d6225698 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Wed, 31 Aug 2016 20:01:11 +0300 Subject: [PATCH 104/184] lock tests --- tests/partitioning_test.py | 91 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/tests/partitioning_test.py b/tests/partitioning_test.py index 835c968d67..e8a8d03b33 100644 --- a/tests/partitioning_test.py +++ b/tests/partitioning_test.py @@ -52,6 +52,7 @@ def test_concurrent(self): while True: # update some rows to check for deadlocks + # import ipdb; ipdb.set_trace() node.safe_psql('postgres', ''' update abc set t = 'test' @@ -135,5 +136,95 @@ def test_replication(self): 0 ) + def test_locks(self): + """Test that a session trying to create new partitions waits for other + sessions if they doing the same""" + + import threading + import time + + class Flag: + def __init__(self, value): + self.flag = value + + def set(self, value): + self.flag = value + + def get(self): + return self.flag + + # There is one flag for each thread which shows if thread have done + # its work + flags = [Flag(False) for i in xrange(3)] + + # All threads synchronizes though this lock + lock = threading.Lock() + + # Define thread function + def add_partition(node, flag, query): + """ We expect that this query will wait until another session + commits or rolls back""" + node.safe_psql('postgres', query) + with lock: + flag.set(True) + + # Initialize master server + node = get_new_node('master') + node.init() + node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + node.safe_psql( + 'postgres', + 'create extension pg_pathman; ' + + 'create table abc(id serial, t text); ' + + 'insert into abc select generate_series(1, 100000); ' + + 'select create_range_partitions(\'abc\', \'id\', 1, 50000);' + ) + + # Start transaction that will create partition + con = node.connect() + con.begin() + con.execute('select append_range_partition(\'abc\')') + + # Start threads that suppose to add new partitions and wait some time + query = [ + 'select prepend_range_partition(\'abc\')', + 'select append_range_partition(\'abc\')', + 'select add_range_partition(\'abc\', 500000, 550000)', + ] + threads = [] + for i in range(3): + thread = \ + threading.Thread(target=add_partition, args=(node, flags[i], query[i])) + threads.append(thread) + thread.start() + time.sleep(3) + + # This threads should wait until current transaction finished + with lock: + for i in range(3): + self.assertEqual(flags[i].get(), False) + + # Commit transaction. Since then other sessions can create partitions + con.commit() + + # Now wait until each thread finishes + for i in range(3): + threads[i].join() + + # Check flags, it should be true which means that threads are finished + with lock: + for i in range(3): + self.assertEqual(flags[i].get(), True) + + # Check that all partitions are created + self.assertEqual( + node.safe_psql( + 'postgres', + 'select count(*) from pg_inherits where inhparent=\'abc\'::regclass' + ), + '6\n' + ) + if __name__ == "__main__": unittest.main() From ea3ad7327ab767329bbc5ac0af3db62eb639353c Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 31 Aug 2016 20:08:41 +0300 Subject: [PATCH 105/184] simplify initial partitioning locks --- hash.sql | 8 ++--- init.sql | 12 ++------ range.sql | 75 ++++++++++++++++++--------------------------- src/pl_funcs.c | 32 +++++++++---------- src/relation_info.c | 2 +- src/xact_handling.c | 58 +++++++++++++++++------------------ src/xact_handling.h | 7 ++--- 7 files changed, 84 insertions(+), 110 deletions(-) diff --git a/hash.sql b/hash.sql index a22021859c..2df89fd731 100644 --- a/hash.sql +++ b/hash.sql @@ -26,12 +26,12 @@ DECLARE v_hashfunc TEXT; BEGIN - /* Acquire exclusive lock on parent */ - PERFORM @extschema@.lock_partitioned_relation(parent_relid); - IF partition_data = true THEN /* Acquire data modification lock */ - PERFORM @extschema@.lock_relation_modification(parent_relid); + PERFORM @extschema@.prevent_relation_modification(parent_relid); + ELSE + /* Acquire lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); END IF; PERFORM @extschema@.validate_relname(parent_relid); diff --git a/init.sql b/init.sql index 2d60234d16..5ca451ca54 100644 --- a/init.sql +++ b/init.sql @@ -658,17 +658,9 @@ LANGUAGE C STRICT; /* * Lock relation to restrict concurrent modification of data. */ - CREATE OR REPLACE FUNCTION @extschema@.lock_relation_modification( + CREATE OR REPLACE FUNCTION @extschema@.prevent_relation_modification( REGCLASS) - RETURNS VOID AS 'pg_pathman', 'lock_relation_modification' - LANGUAGE C STRICT; - -/* - * Check if we can distribute data without bad consequences. - */ - CREATE OR REPLACE FUNCTION @extschema@.common_blocking_partitioning_checks( - REGCLASS) - RETURNS VOID AS 'pg_pathman', 'common_blocking_partitioning_checks' + RETURNS VOID AS 'pg_pathman', 'prevent_relation_modification' LANGUAGE C STRICT; diff --git a/range.sql b/range.sql index 134ac88695..8c1511c033 100644 --- a/range.sql +++ b/range.sql @@ -94,15 +94,12 @@ DECLARE i INTEGER; BEGIN - /* Acquire exclusive lock on parent */ - PERFORM @extschema@.lock_partitioned_relation(parent_relid); - IF partition_data = true THEN - /* Perform some checks regarding the blocking partitioning */ - PERFORM @extschema@.common_blocking_partitioning_checks(parent_relid); - - /* Acquire data modification lock (prevent further modifications) */ - PERFORM @extschema@.lock_relation_modification(parent_relid); + /* Acquire data modification lock */ + PERFORM @extschema@.prevent_relation_modification(parent_relid); + ELSE + /* Acquire lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); END IF; PERFORM @extschema@.validate_relname(parent_relid); @@ -196,15 +193,12 @@ DECLARE i INTEGER; BEGIN - /* Acquire exclusive lock on parent */ - PERFORM @extschema@.lock_partitioned_relation(parent_relid); - IF partition_data = true THEN - /* Perform some checks regarding the blocking partitioning */ - PERFORM @extschema@.common_blocking_partitioning_checks(parent_relid); - - /* Acquire data modification lock (prevent further modifications) */ - PERFORM @extschema@.lock_relation_modification(parent_relid); + /* Acquire data modification lock */ + PERFORM @extschema@.prevent_relation_modification(parent_relid); + ELSE + /* Acquire lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); END IF; PERFORM @extschema@.validate_relname(parent_relid); @@ -296,15 +290,12 @@ DECLARE part_count INTEGER := 0; BEGIN - /* Acquire exclusive lock on parent */ - PERFORM @extschema@.lock_partitioned_relation(parent_relid); - IF partition_data = true THEN - /* Perform some checks regarding the blocking partitioning */ - PERFORM @extschema@.common_blocking_partitioning_checks(parent_relid); - - /* Acquire data modification lock (prevent further modifications) */ - PERFORM @extschema@.lock_relation_modification(parent_relid); + /* Acquire data modification lock */ + PERFORM @extschema@.prevent_relation_modification(parent_relid); + ELSE + /* Acquire lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); END IF; PERFORM @extschema@.validate_relname(parent_relid); @@ -369,15 +360,12 @@ DECLARE part_count INTEGER := 0; BEGIN - /* Acquire exclusive lock on parent */ - PERFORM @extschema@.lock_partitioned_relation(parent_relid); - IF partition_data = true THEN - /* Perform some checks regarding the blocking partitioning */ - PERFORM @extschema@.common_blocking_partitioning_checks(parent_relid); - - /* Acquire data modification lock (prevent further modifications) */ - PERFORM @extschema@.lock_relation_modification(parent_relid); + /* Acquire data modification lock */ + PERFORM @extschema@.prevent_relation_modification(parent_relid); + ELSE + /* Acquire lock on parent */ + PERFORM @extschema@.lock_partitioned_relation(parent_relid); END IF; PERFORM @extschema@.validate_relname(parent_relid); @@ -518,12 +506,11 @@ BEGIN v_part_relname := @extschema@.validate_relname(p_partition); v_parent_relid = @extschema@.get_parent_of_partition(p_partition); - /* Acquire exclusive lock on parent */ + /* Acquire lock on parent */ PERFORM @extschema@.lock_partitioned_relation(v_parent_relid); /* Acquire data modification lock (prevent further modifications) */ - PERFORM @extschema@.common_blocking_partitioning_checks(p_partition); - PERFORM @extschema@.lock_relation_modification(p_partition); + PERFORM @extschema@.prevent_relation_modification(p_partition); SELECT attname, parttype FROM @extschema@.pathman_config @@ -610,16 +597,14 @@ BEGIN v_parent_relid2 := @extschema@.get_parent_of_partition(partition2); /* Acquire data modification locks (prevent further modifications) */ - PERFORM @extschema@.common_blocking_partitioning_checks(partition1); - PERFORM @extschema@.lock_relation_modification(partition1); - PERFORM @extschema@.common_blocking_partitioning_checks(partition2); - PERFORM @extschema@.lock_relation_modification(partition2); + PERFORM @extschema@.prevent_relation_modification(partition1); + PERFORM @extschema@.prevent_relation_modification(partition2); IF v_parent_relid1 != v_parent_relid2 THEN RAISE EXCEPTION 'Cannot merge partitions with different parents'; END IF; - /* Acquire exclusive lock on parent */ + /* Acquire lock on parent */ PERFORM @extschema@.lock_partitioned_relation(v_parent_relid1); SELECT attname, parttype @@ -731,7 +716,7 @@ DECLARE v_interval TEXT; BEGIN - /* Acquire exclusive lock on parent */ + /* Acquire lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); SELECT attname, range_interval @@ -923,7 +908,7 @@ DECLARE v_part_name TEXT; BEGIN - /* Acquire exclusive lock on parent */ + /* Acquire lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); IF p_start_value >= p_end_value THEN @@ -964,7 +949,7 @@ BEGIN parent_relid := @extschema@.get_parent_of_partition(p_partition); part_name := p_partition::TEXT; /* save the name to be returned */ - /* Acquire exclusive lock on parent */ + /* Acquire lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); /* Drop table */ @@ -994,7 +979,7 @@ DECLARE rel_persistence CHAR; BEGIN - /* Acquire exclusive lock on parent */ + /* Acquire lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); /* Ignore temporary tables */ @@ -1054,7 +1039,7 @@ DECLARE BEGIN parent_relid = @extschema@.get_parent_of_partition(p_partition); - /* Acquire exclusive lock on parent */ + /* Acquire lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); v_attname := attname diff --git a/src/pl_funcs.c b/src/pl_funcs.c index c040d98f02..56d44a01a2 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -53,8 +53,7 @@ PG_FUNCTION_INFO_V1( is_attribute_nullable ); PG_FUNCTION_INFO_V1( add_to_pathman_config ); PG_FUNCTION_INFO_V1( invalidate_relcache ); PG_FUNCTION_INFO_V1( lock_partitioned_relation ); -PG_FUNCTION_INFO_V1( lock_relation_modification ); -PG_FUNCTION_INFO_V1( common_blocking_partitioning_checks ); +PG_FUNCTION_INFO_V1( prevent_relation_modification ); PG_FUNCTION_INFO_V1( debug_capture ); @@ -698,33 +697,34 @@ lock_partitioned_relation(PG_FUNCTION_ARGS) Oid relid = PG_GETARG_OID(0); /* Lock partitioned relation till transaction's end */ - xact_lock_partitioned_rel(relid); - - PG_RETURN_VOID(); -} - -Datum -lock_relation_modification(PG_FUNCTION_ARGS) -{ - Oid relid = PG_GETARG_OID(0); - - /* Lock partitioned relation till transaction's end */ - xact_lock_rel_data(relid); + xact_lock_partitioned_rel(relid, false); PG_RETURN_VOID(); } +/* + * Lock relation exclusively & check for current isolation level. + */ Datum -common_blocking_partitioning_checks(PG_FUNCTION_ARGS) +prevent_relation_modification(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); + /* + * Check that isolation level is READ COMMITTED. + * Else we won't be able to see new rows + * which could slip through locks. + */ if (!xact_is_level_read_committed()) ereport(ERROR, (errmsg("Cannot perform blocking partitioning operation"), errdetail("Expected READ COMMITTED isolation level"))); - if (xact_is_table_being_modified(relid)) + /* + * Check if table is being modified + * concurrently in a separate transaction. + */ + if (!xact_lock_rel_exclusive(relid, true)) ereport(ERROR, (errmsg("Cannot perform blocking partitioning operation"), errdetail("Table \"%s\" is being modified concurrently", diff --git a/src/relation_info.c b/src/relation_info.c index 87db65af79..aaa3fd6036 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -265,7 +265,7 @@ get_pathman_relation_info_after_lock(Oid relid, bool unlock_if_not_found) const PartRelationInfo *prel; /* Restrict concurrent partition creation (it's dangerous) */ - xact_lock_partitioned_rel(relid); + xact_lock_partitioned_rel(relid, false); prel = get_pathman_relation_info(relid); if (!prel && unlock_if_not_found) diff --git a/src/xact_handling.c b/src/xact_handling.c index 9b6a0b1a28..14c819a9f3 100644 --- a/src/xact_handling.c +++ b/src/xact_handling.c @@ -24,11 +24,19 @@ static inline bool do_we_hold_the_lock(Oid relid, LOCKMODE lockmode); /* * Lock certain partitioned relation to disable concurrent access. */ -void -xact_lock_partitioned_rel(Oid relid) +bool +xact_lock_partitioned_rel(Oid relid, bool nowait) { - /* Share exclusive lock conflicts with itself */ - LockRelationOid(relid, ShareUpdateExclusiveLock); + if (nowait) + { + if (ConditionalLockRelationOid(relid, ShareUpdateExclusiveLock)) + return true; + return false; + } + else + LockRelationOid(relid, ShareUpdateExclusiveLock); + + return true; } /* @@ -41,21 +49,30 @@ xact_unlock_partitioned_rel(Oid relid) } /* - * Lock certain relation's data (INSERT | UPDATE | DELETE). + * Lock relation exclusively (SELECTs are possible). */ -void -xact_lock_rel_data(Oid relid) +bool +xact_lock_rel_exclusive(Oid relid, bool nowait) { - LockRelationOid(relid, ShareLock); + if (nowait) + { + if (ConditionalLockRelationOid(relid, ExclusiveLock)) + return true; + return false; + } + else + LockRelationOid(relid, ExclusiveLock); + + return true; } /* - * Unlock relation's data. + * Unlock relation (exclusive lock). */ void -xact_unlock_rel_data(Oid relid) +xact_unlock_rel_exclusive(Oid relid) { - UnlockRelationOid(relid, ShareLock); + UnlockRelationOid(relid, ExclusiveLock); } /* @@ -79,25 +96,6 @@ xact_bgw_conflicting_lock_exists(Oid relid) return false; } -/* - * Check if table is being modified - * concurrently in a separate transaction. - */ -bool -xact_is_table_being_modified(Oid relid) -{ - /* - * Check if someone has already started a - * transaction and modified table's contents. - */ - if (ConditionalLockRelationOid(relid, ExclusiveLock)) - { - UnlockRelationOid(relid, ExclusiveLock); - return false; - } - - return true; -} /* * Check if current transaction's level is READ COMMITTED. diff --git a/src/xact_handling.h b/src/xact_handling.h index 2903978de7..7bd8098997 100644 --- a/src/xact_handling.h +++ b/src/xact_handling.h @@ -17,17 +17,16 @@ /* * Transaction locks. */ -void xact_lock_partitioned_rel(Oid relid); +bool xact_lock_partitioned_rel(Oid relid, bool nowait); void xact_unlock_partitioned_rel(Oid relid); -void xact_lock_rel_data(Oid relid); -void xact_unlock_rel_data(Oid relid); +bool xact_lock_rel_exclusive(Oid relid, bool nowait); +void xact_unlock_rel_exclusive(Oid relid); /* * Utility checks. */ bool xact_bgw_conflicting_lock_exists(Oid relid); -bool xact_is_table_being_modified(Oid relid); bool xact_is_level_read_committed(void); #endif From 852a7ee782b1e1187237588192f212da5845fdfd Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 31 Aug 2016 20:22:52 +0300 Subject: [PATCH 106/184] RuntimeAppend now caches prel->enable_parent (just in case) --- src/nodes_common.c | 45 ++++++++++++++++++++++++-------------- src/runtime_merge_append.c | 41 ++++++++++++++++++++-------------- src/runtimeappend.h | 11 +++++----- 3 files changed, 58 insertions(+), 39 deletions(-) diff --git a/src/nodes_common.c b/src/nodes_common.c index 4bc3e762da..d092f625bf 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -173,12 +173,13 @@ append_part_attr_to_tlist(List *tlist, Index relno, const PartRelationInfo *prel } static void -pack_runtimeappend_private(CustomScan *cscan, RuntimeAppendPath *path) +pack_runtimeappend_private(CustomScan *cscan, RuntimeAppendPath *path, + bool enable_parent) { ChildScanCommon *children = path->children; int nchildren = path->nchildren; - List *custom_private = NIL; - List *custom_oids = NIL; + List *custom_private = NIL, + *custom_oids = NIL; int i; for (i = 0; i < nchildren; i++) @@ -188,31 +189,39 @@ pack_runtimeappend_private(CustomScan *cscan, RuntimeAppendPath *path) pfree(children[i]); } - /* Save main table and partition relids as first element of 'custom_private' */ + /* Save parent & partition Oids and a flag as first element of 'custom_private' */ custom_private = lappend(custom_private, - list_make2(list_make1_oid(path->relid), - custom_oids)); + list_make3(list_make1_oid(path->relid), + custom_oids, /* list of Oids */ + list_make1_int(enable_parent))); + /* Store freshly built 'custom_private' */ cscan->custom_private = custom_private; } static void unpack_runtimeappend_private(RuntimeAppendState *scan_state, CustomScan *cscan) { - ListCell *oid_cell; - ListCell *plan_cell; - List *runtimeappend_private = linitial(cscan->custom_private); - List *custom_oids = (List *) lsecond(runtimeappend_private); - int nchildren = list_length(custom_oids); + ListCell *oid_cell, + *plan_cell; + List *runtimeappend_private = linitial(cscan->custom_private), + *custom_oids; /* Oids of partitions */ + int custom_oids_count; /* number of partitions */ + HTAB *children_table; HASHCTL *children_table_config = &scan_state->children_table_config; int i; + /* Extract Oids list from packed data */ + custom_oids = (List *) lsecond(runtimeappend_private); + custom_oids_count = list_length(custom_oids); + memset(children_table_config, 0, sizeof(HASHCTL)); children_table_config->keysize = sizeof(Oid); children_table_config->entrysize = sizeof(ChildScanCommonData); - children_table = hash_create("Plan storage", nchildren, + children_table = hash_create("RuntimeAppend plan storage", + custom_oids_count, children_table_config, HASH_ELEM | HASH_BLOBS); @@ -233,8 +242,10 @@ unpack_runtimeappend_private(RuntimeAppendState *scan_state, CustomScan *cscan) child->original_order = i++; /* will be used in EXPLAIN */ } + /* Finally fill 'scan_state' with unpacked elements */ scan_state->children_table = children_table; scan_state->relid = linitial_oid(linitial(runtimeappend_private)); + scan_state->enable_parent = (bool) linitial_int(lthird(runtimeappend_private)); } @@ -400,7 +411,8 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, cscan->custom_plans = custom_plans; cscan->methods = scan_methods; - pack_runtimeappend_private(cscan, rpath); + /* Cache 'prel->enable_parent' as well */ + pack_runtimeappend_private(cscan, rpath, prel->enable_parent); return &cscan->scan.plan; } @@ -502,6 +514,7 @@ rescan_append_common(CustomScanState *node) const PartRelationInfo *prel; List *ranges; ListCell *lc; + WalkerContext wcxt; Oid *parts; int nparts; @@ -511,18 +524,18 @@ rescan_append_common(CustomScanState *node) /* First we select all available partitions... */ ranges = list_make1_irange(make_irange(0, PrelLastChild(prel), false)); - InitWalkerContext(&scan_state->wcxt, prel, econtext, false); + InitWalkerContext(&wcxt, prel, econtext, false); foreach (lc, scan_state->custom_exprs) { WrapperNode *wn; /* ... then we cut off irrelevant ones using the provided clauses */ - wn = walk_expr_tree((Expr *) lfirst(lc), &scan_state->wcxt); + wn = walk_expr_tree((Expr *) lfirst(lc), &wcxt); ranges = irange_list_intersect(ranges, wn->rangeset); } /* Get Oids of the required partitions */ - parts = get_partition_oids(ranges, &nparts, prel, prel->enable_parent); + parts = get_partition_oids(ranges, &nparts, prel, scan_state->enable_parent); /* Select new plans for this run using 'parts' */ if (scan_state->cur_plans) diff --git a/src/runtime_merge_append.c b/src/runtime_merge_append.c index 3de234b3bc..ad6389336f 100644 --- a/src/runtime_merge_append.c +++ b/src/runtime_merge_append.c @@ -113,18 +113,18 @@ static void pack_runtimemergeappend_private(CustomScan *cscan, MergeAppendGuts *mag) { List *runtimemergeappend_private = NIL; - List *sortColIdx = NIL, - *sortOperators = NIL, - *collations = NIL, - *nullsFirst = NIL; + List *sortColIdx = NIL, + *sortOperators = NIL, + *collations = NIL, + *nullsFirst = NIL; int i; for (i = 0; i < mag->numCols; i++) { - sortColIdx = lappend_int(sortColIdx, mag->sortColIdx[i]); - sortOperators = lappend_oid(sortOperators, mag->sortOperators[i]); - collations = lappend_oid(collations, mag->collations[i]); - nullsFirst = lappend_int(nullsFirst, mag->nullsFirst[i]); + sortColIdx = lappend_int(sortColIdx, mag->sortColIdx[i]); + sortOperators = lappend_oid(sortOperators, mag->sortOperators[i]); + collations = lappend_oid(collations, mag->collations[i]); + nullsFirst = lappend_int(nullsFirst, mag->nullsFirst[i]); } runtimemergeappend_private = list_make2(makeInteger(mag->numCols), @@ -133,7 +133,14 @@ pack_runtimemergeappend_private(CustomScan *cscan, MergeAppendGuts *mag) collations, nullsFirst)); - /* Append RuntimeMergeAppend's data to the 'custom_private' */ + /* + * Append RuntimeMergeAppend's data to the 'custom_private' (2nd). + * + * This way some sort of hierarchy is maintained in 'custom_private': + * inherited structure (in this case RuntimeAppend) is stored first, + * so we can think of pack\unpack functions as 'constructors' to some + * extent. + */ cscan->custom_private = lappend(cscan->custom_private, runtimemergeappend_private); } @@ -168,15 +175,15 @@ unpack_runtimemergeappend_private(RuntimeMergeAppendState *scan_state, runtimemergeappend_private = lsecond(cscan->custom_private); scan_state->numCols = intVal(linitial(runtimemergeappend_private)); - sortColIdx = linitial(lsecond(runtimemergeappend_private)); - sortOperators = lsecond(lsecond(runtimemergeappend_private)); - collations = lthird(lsecond(runtimemergeappend_private)); - nullsFirst = lfourth(lsecond(runtimemergeappend_private)); + sortColIdx = linitial(lsecond(runtimemergeappend_private)); + sortOperators = lsecond(lsecond(runtimemergeappend_private)); + collations = lthird(lsecond(runtimemergeappend_private)); + nullsFirst = lfourth(lsecond(runtimemergeappend_private)); - FillStateField(sortColIdx, AttrNumber, lfirst_int); - FillStateField(sortOperators, Oid, lfirst_oid); - FillStateField(collations, Oid, lfirst_oid); - FillStateField(nullsFirst, bool, lfirst_int); + FillStateField(sortColIdx, AttrNumber, lfirst_int); + FillStateField(sortOperators, Oid, lfirst_oid); + FillStateField(collations, Oid, lfirst_oid); + FillStateField(nullsFirst, bool, lfirst_int); } void diff --git a/src/runtimeappend.h b/src/runtimeappend.h index c5a6aa6de4..55c1320e99 100644 --- a/src/runtimeappend.h +++ b/src/runtimeappend.h @@ -23,9 +23,9 @@ typedef struct { CustomPath cpath; - Oid relid; /* relid of the partitioned table */ + Oid relid; /* relid of the partitioned table */ - ChildScanCommon *children; /* all available plans */ + ChildScanCommon *children; /* all available plans */ int nchildren; } RuntimeAppendPath; @@ -46,15 +46,14 @@ typedef struct ChildScanCommon *cur_plans; int ncur_plans; + /* Should we include parent table? Cached for prepared statements */ + bool enable_parent; + /* Index of the selected plan state */ int running_idx; /* Last saved tuple (for SRF projections) */ TupleTableSlot *slot; - - /* Cached walker context */ - WalkerContext wcxt; - bool wcxt_cached; } RuntimeAppendState; From 271e4a03e1717f3d1bc09712164e6337286e7ac1 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Wed, 31 Aug 2016 23:23:35 +0300 Subject: [PATCH 107/184] version 1.0_rc1 --- pg_pathman.control | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pg_pathman.control b/pg_pathman.control index d42ea8c09d..7831b8759d 100644 --- a/pg_pathman.control +++ b/pg_pathman.control @@ -1,4 +1,4 @@ # pg_pathman extension -comment 'Partitioning tool' -default_version = '0.1' +comment 'Partitioning tool ver. 1.0_rc1' +default_version = '1.0_rc1' module_pathname='$libdir/pg_pathman' From 40c2a421a6e2875ecb32a11c1d39dfeef1b1d8ee Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Wed, 31 Aug 2016 23:27:46 +0300 Subject: [PATCH 108/184] change version in Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 60f3f65c0b..3fa2b737cf 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ OBJS = src/init.o src/relation_info.o src/utils.o src/partition_filter.o src/run src/pathman_workers.o src/hooks.o src/nodes_common.o src/xact_handling.o $(WIN32RES) EXTENSION = pg_pathman -EXTVERSION = 0.1 +EXTVERSION = 1.0_rc1 DATA_built = $(EXTENSION)--$(EXTVERSION).sql PGFILEDESC = "pg_pathman - partitioning tool" From 32a9b2e8dcd844e68c383ff525ee50d74bf16756 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Thu, 1 Sep 2016 11:36:45 +0300 Subject: [PATCH 109/184] fix a compilation warning --- src/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hooks.c b/src/hooks.c index 1698d3d8cf..728a628bea 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -279,7 +279,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb /* Add parent if needed */ if (prel->enable_parent) - append_child_relation(root, rel, rti, rte, i, rte->relid, NULL); + append_child_relation(root, rel, rti, rte, 0, rte->relid, NULL); /* * Iterate all indexes in rangeset and append corresponding child From 7804c9b282ad8bf309e9b38582480dfd38efbddd Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 1 Sep 2016 16:28:08 +0300 Subject: [PATCH 110/184] do not perform finish_delayed_invalidation() & load_config() on transaction-related utility statements --- src/hooks.c | 9 +++++++++ src/xact_handling.c | 36 ++++++++++++++++++++++++++++++++++++ src/xact_handling.h | 4 ++++ 3 files changed, 49 insertions(+) diff --git a/src/hooks.c b/src/hooks.c index 728a628bea..c29820edb9 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -14,6 +14,7 @@ #include "runtimeappend.h" #include "runtime_merge_append.h" #include "utils.h" +#include "xact_handling.h" #include "miscadmin.h" #include "optimizer/cost.h" @@ -451,6 +452,14 @@ pathman_post_parse_analysis_hook(ParseState *pstate, Query *query) if (post_parse_analyze_hook_next) post_parse_analyze_hook_next(pstate, query); + /* We shouldn't do anything on BEGIN or SET ISOLATION LEVEL stmts */ + if (query->commandType == CMD_UTILITY && + (xact_is_transaction_stmt(query->utilityStmt) || + xact_is_set_transaction_stmt(query->utilityStmt))) + { + return; + } + /* Finish delayed invalidation jobs */ if (IsPathmanReady()) finish_delayed_invalidation(); diff --git a/src/xact_handling.c b/src/xact_handling.c index 14c819a9f3..408fe4402d 100644 --- a/src/xact_handling.c +++ b/src/xact_handling.c @@ -109,6 +109,42 @@ xact_is_level_read_committed(void) return false; } +/* + * Check if 'stmt' is BEGIN\ROLLBACK etc transaction statement. + */ +bool +xact_is_transaction_stmt(Node *stmt) +{ + if (!stmt) + return false; + + if (IsA(stmt, TransactionStmt)) + return true; + + return false; +} + +/* + * Check if 'stmt' is SET TRANSACTION statement. + */ +bool +xact_is_set_transaction_stmt(Node *stmt) +{ + if (!stmt) + return false; + + if (IsA(stmt, VariableSetStmt)) + { + VariableSetStmt *var_set_stmt = (VariableSetStmt *) stmt; + + /* special case for SET TRANSACTION ... */ + if (var_set_stmt->kind == VAR_SET_MULTI) + return true; + } + + return false; +} + /* * Do we hold the specified lock? */ diff --git a/src/xact_handling.h b/src/xact_handling.h index 7bd8098997..b5f8ed3c6c 100644 --- a/src/xact_handling.h +++ b/src/xact_handling.h @@ -13,6 +13,8 @@ #include "pathman.h" +#include "postgres.h" + /* * Transaction locks. @@ -28,5 +30,7 @@ void xact_unlock_rel_exclusive(Oid relid); */ bool xact_bgw_conflicting_lock_exists(Oid relid); bool xact_is_level_read_committed(void); +bool xact_is_transaction_stmt(Node *stmt); +bool xact_is_set_transaction_stmt(Node *stmt); #endif From c521f583496994c2004c6e6387d9724659d44029 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 1 Sep 2016 17:07:58 +0300 Subject: [PATCH 111/184] remove useless check from bgw_main_spawn_partitions() --- src/pathman_workers.c | 12 ------------ src/xact_handling.c | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/src/pathman_workers.c b/src/pathman_workers.c index 2df35b2fc5..c038dea976 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -18,7 +18,6 @@ #include "pathman_workers.h" #include "relation_info.h" #include "utils.h" -#include "xact_handling.h" #include "access/htup_details.h" #include "access/xact.h" @@ -360,17 +359,6 @@ bgw_main_spawn_partitions(Datum main_arg) DebugPrintDatum(value, args->value_type), MyProcPid); #endif - /* Check again if there's a conflicting lock */ - if (xact_bgw_conflicting_lock_exists(args->partitioned_table)) - { - elog(LOG, "%s: there's a conflicting lock on relation \"%s\"", - spawn_partitions_bgw, - get_rel_name_or_relid(args->partitioned_table)); - - dsm_detach(segment); - return; /* exit quickly */ - } - /* Create partitions and save the Oid of the last one */ args->result = create_partitions_internal(args->partitioned_table, value, /* unpacked Datum */ diff --git a/src/xact_handling.c b/src/xact_handling.c index 408fe4402d..44d9195bd0 100644 --- a/src/xact_handling.c +++ b/src/xact_handling.c @@ -168,7 +168,7 @@ do_we_hold_the_lock(Oid relid, LOCKMODE lockmode) return false; default: - return false; /* should not happen */ + return false; } } From c7a8abf277dc77f628ae7afb9639c26419de65de Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 1 Sep 2016 18:13:44 +0300 Subject: [PATCH 112/184] slightly improve messages in init.sql --- expected/pg_pathman.out | 2 +- init.sql | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index b1240fadff..4fa5aa5602 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1690,7 +1690,7 @@ INSERT INTO messages SELECT g, md5(g::text) FROM generate_series(1, 10) as g; INSERT INTO replies SELECT g, g, md5(g::text) FROM generate_series(1, 10) as g; SELECT create_range_partitions('messages', 'id', 1, 100, 2); WARNING: Foreign key 'replies_message_id_fkey' references to the relation 'messages' -ERROR: Relation 'messages' is referenced from other relations +ERROR: Relation "messages" is referenced from other relations ALTER TABLE replies DROP CONSTRAINT replies_message_id_fkey; SELECT create_range_partitions('messages', 'id', 1, 100, 2); NOTICE: sequence "messages_seq" does not exist, skipping diff --git a/init.sql b/init.sql index 5ca451ca54..09fb7d5440 100644 --- a/init.sql +++ b/init.sql @@ -341,7 +341,7 @@ BEGIN END LOOP; IF is_referenced THEN - RAISE EXCEPTION 'Relation ''%'' is referenced from other relations', p_relation; + RAISE EXCEPTION 'Relation "%" is referenced from other relations', p_relation; END IF; RETURN TRUE; @@ -515,7 +515,7 @@ BEGIN DELETE FROM @extschema@.pathman_config_params WHERE partrel = parent_relid; IF conf_num_del = 0 THEN - RAISE EXCEPTION 'table % has no partitions', parent_relid::text; + RAISE EXCEPTION 'Relation "%" has no partitions', parent_relid::text; END IF; FOR v_rec IN (SELECT inhrelid::regclass::text AS tbl From 4227a98f375814c6c06d5bccd3c7c7572f36b56a Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Thu, 1 Sep 2016 18:31:00 +0300 Subject: [PATCH 113/184] travis-ci python tests --- tests/partitioning_test.py | 289 ++++++++++++++++++++----------------- travis/pg-travis-test.sh | 24 ++- 2 files changed, 176 insertions(+), 137 deletions(-) diff --git a/tests/partitioning_test.py b/tests/partitioning_test.py index e8a8d03b33..6dd589b8db 100644 --- a/tests/partitioning_test.py +++ b/tests/partitioning_test.py @@ -40,101 +40,115 @@ def catchup_replica(self, master, replica): 'FROM pg_stat_replication WHERE application_name = \'%s\'' % replica.name) + def printlog(self, logfile): + with open(logfile, 'r') as log: + for line in log.readlines(): + print line + def test_concurrent(self): """Tests concurrent partitioning""" node = get_new_node('test') - node.init() - node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') - node.start() - self.init_test_data(node) - - node.psql('postgres', 'select partition_table_concurrently(\'abc\')') - - while True: - # update some rows to check for deadlocks - # import ipdb; ipdb.set_trace() - node.safe_psql('postgres', - ''' - update abc set t = 'test' - where id in (select (random() * 300000)::int from generate_series(1, 3000)) - ''') - - count = node.execute('postgres', 'select count(*) from pathman_concurrent_part_tasks') - - # if there is no active workers then it means work is done - if count[0][0] == 0: - break - time.sleep(1) - - data = node.execute('postgres', 'select count(*) from only abc') - self.assertEqual(data[0][0], 0) - data = node.execute('postgres', 'select count(*) from abc') - self.assertEqual(data[0][0], 300000) - - node.stop() + try: + node.init() + node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + self.init_test_data(node) + + node.psql('postgres', 'select partition_table_concurrently(\'abc\')') + + while True: + # update some rows to check for deadlocks + # import ipdb; ipdb.set_trace() + node.safe_psql('postgres', + ''' + update abc set t = 'test' + where id in (select (random() * 300000)::int from generate_series(1, 3000)) + ''') + + count = node.execute('postgres', 'select count(*) from pathman_concurrent_part_tasks') + + # if there is no active workers then it means work is done + if count[0][0] == 0: + break + time.sleep(1) + + data = node.execute('postgres', 'select count(*) from only abc') + self.assertEqual(data[0][0], 0) + data = node.execute('postgres', 'select count(*) from abc') + self.assertEqual(data[0][0], 300000) + + node.stop() + except Exception, e: + self.printlog(node.logs_dir + '/postgresql.log') + raise e def test_replication(self): """Tests how pg_pathman works with replication""" node = get_new_node('master') replica = get_new_node('repl') - # initialize master server - node.init(allows_streaming=True) - node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') - node.start() - node.backup('my_backup') - - # initialize replica from backup - replica.init_from_backup(node, 'my_backup', has_streaming=True) - replica.start() - - # initialize pg_pathman extension and some test data - self.init_test_data(node) - - # wait until replica catches up - self.catchup_replica(node, replica) - - # check that results are equal - self.assertEqual( - node.psql('postgres', 'explain (costs off) select * from abc'), - replica.psql('postgres', 'explain (costs off) select * from abc') - ) - - # enable parent and see if it is enabled in replica - node.psql('postgres', 'select enable_parent(\'abc\'') - - self.catchup_replica(node, replica) - self.assertEqual( - node.psql('postgres', 'explain (costs off) select * from abc'), - replica.psql('postgres', 'explain (costs off) select * from abc') - ) - self.assertEqual( - node.psql('postgres', 'select * from abc'), - replica.psql('postgres', 'select * from abc') - ) - self.assertEqual( - node.execute('postgres', 'select count(*) from abc')[0][0], - 300000 - ) - - # check that direct UPDATE in pathman_config_params invalidates - # cache - node.psql( - 'postgres', - 'update pathman_config_params set enable_parent = false') - self.catchup_replica(node, replica) - self.assertEqual( - node.psql('postgres', 'explain (costs off) select * from abc'), - replica.psql('postgres', 'explain (costs off) select * from abc') - ) - self.assertEqual( - node.psql('postgres', 'select * from abc'), - replica.psql('postgres', 'select * from abc') - ) - self.assertEqual( - node.execute('postgres', 'select count(*) from abc')[0][0], - 0 - ) + try: + # initialize master server + node.init(allows_streaming=True) + node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + node.backup('my_backup') + + # initialize replica from backup + replica.init_from_backup(node, 'my_backup', has_streaming=True) + replica.start() + + # initialize pg_pathman extension and some test data + self.init_test_data(node) + + # wait until replica catches up + self.catchup_replica(node, replica) + + # check that results are equal + self.assertEqual( + node.psql('postgres', 'explain (costs off) select * from abc'), + replica.psql('postgres', 'explain (costs off) select * from abc') + ) + + # enable parent and see if it is enabled in replica + node.psql('postgres', 'select enable_parent(\'abc\'') + + self.catchup_replica(node, replica) + self.assertEqual( + node.psql('postgres', 'explain (costs off) select * from abc'), + replica.psql('postgres', 'explain (costs off) select * from abc') + ) + self.assertEqual( + node.psql('postgres', 'select * from abc'), + replica.psql('postgres', 'select * from abc') + ) + self.assertEqual( + node.execute('postgres', 'select count(*) from abc')[0][0], + 300000 + ) + + # check that direct UPDATE in pathman_config_params invalidates + # cache + node.psql( + 'postgres', + 'update pathman_config_params set enable_parent = false') + self.catchup_replica(node, replica) + self.assertEqual( + node.psql('postgres', 'explain (costs off) select * from abc'), + replica.psql('postgres', 'explain (costs off) select * from abc') + ) + self.assertEqual( + node.psql('postgres', 'select * from abc'), + replica.psql('postgres', 'select * from abc') + ) + self.assertEqual( + node.execute('postgres', 'select count(*) from abc')[0][0], + 0 + ) + except Exception, e: + self.printlog(node.logs_dir + '/postgresql.log') + self.printlog(replica.logs_dir + '/postgresql.log') + raise e def test_locks(self): """Test that a session trying to create new partitions waits for other @@ -170,61 +184,66 @@ def add_partition(node, flag, query): # Initialize master server node = get_new_node('master') - node.init() - node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') - node.start() - node.safe_psql( - 'postgres', - 'create extension pg_pathman; ' - + 'create table abc(id serial, t text); ' - + 'insert into abc select generate_series(1, 100000); ' - + 'select create_range_partitions(\'abc\', \'id\', 1, 50000);' - ) - - # Start transaction that will create partition - con = node.connect() - con.begin() - con.execute('select append_range_partition(\'abc\')') - - # Start threads that suppose to add new partitions and wait some time - query = [ - 'select prepend_range_partition(\'abc\')', - 'select append_range_partition(\'abc\')', - 'select add_range_partition(\'abc\', 500000, 550000)', - ] - threads = [] - for i in range(3): - thread = \ - threading.Thread(target=add_partition, args=(node, flags[i], query[i])) - threads.append(thread) - thread.start() - time.sleep(3) - - # This threads should wait until current transaction finished - with lock: + + try: + node.init() + node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + node.safe_psql( + 'postgres', + 'create extension pg_pathman; ' + + 'create table abc(id serial, t text); ' + + 'insert into abc select generate_series(1, 100000); ' + + 'select create_range_partitions(\'abc\', \'id\', 1, 50000);' + ) + + # Start transaction that will create partition + con = node.connect() + con.begin() + con.execute('select append_range_partition(\'abc\')') + + # Start threads that suppose to add new partitions and wait some time + query = [ + 'select prepend_range_partition(\'abc\')', + 'select append_range_partition(\'abc\')', + 'select add_range_partition(\'abc\', 500000, 550000)', + ] + threads = [] for i in range(3): - self.assertEqual(flags[i].get(), False) + thread = \ + threading.Thread(target=add_partition, args=(node, flags[i], query[i])) + threads.append(thread) + thread.start() + time.sleep(3) - # Commit transaction. Since then other sessions can create partitions - con.commit() + # This threads should wait until current transaction finished + with lock: + for i in range(3): + self.assertEqual(flags[i].get(), False) - # Now wait until each thread finishes - for i in range(3): - threads[i].join() + # Commit transaction. Since then other sessions can create partitions + con.commit() - # Check flags, it should be true which means that threads are finished - with lock: + # Now wait until each thread finishes for i in range(3): - self.assertEqual(flags[i].get(), True) + threads[i].join() - # Check that all partitions are created - self.assertEqual( - node.safe_psql( - 'postgres', - 'select count(*) from pg_inherits where inhparent=\'abc\'::regclass' - ), - '6\n' - ) + # Check flags, it should be true which means that threads are finished + with lock: + for i in range(3): + self.assertEqual(flags[i].get(), True) + + # Check that all partitions are created + self.assertEqual( + node.safe_psql( + 'postgres', + 'select count(*) from pg_inherits where inhparent=\'abc\'::regclass' + ), + '6\n' + ) + except Exception, e: + self.printlog(node.logs_dir + '/postgresql.log') + raise e if __name__ == "__main__": unittest.main() diff --git a/travis/pg-travis-test.sh b/travis/pg-travis-test.sh index 1ad78bd2d4..44552ae35c 100644 --- a/travis/pg-travis-test.sh +++ b/travis/pg-travis-test.sh @@ -6,7 +6,8 @@ sudo apt-get update # required packages -packages="postgresql-$PGVERSION postgresql-server-dev-$PGVERSION postgresql-common" +apt_packages="postgresql-$PGVERSION postgresql-server-dev-$PGVERSION postgresql-common python-pip python-dev build-essential" +pip_packages="testgres" # exit code status=0 @@ -25,7 +26,7 @@ echo 'exit 0' | sudo tee /etc/init.d/postgresql sudo chmod a+x /etc/init.d/postgresql # install required packages -sudo apt-get -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -y install -qq $packages +sudo apt-get -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -y install -qq $apt_packages # create cluster 'test' sudo pg_createcluster --start $PGVERSION test -p 55435 -- -A trust @@ -73,4 +74,23 @@ PGPORT=55435 make installcheck USE_PGXS=1 PGUSER=postgres PG_CONFIG=$config_path # show diff if it exists if test -f regression.diffs; then cat regression.diffs; fi + +set +u + +# create a virtual environment and activate it +virtualenv /tmp/envs/pg_pathman +source /tmp/envs/pg_pathman/bin/activate + +# install pip packages +pip install $pip_packages + +# set permission to write postgres locks +sudo chmod a+w /var/run/postgresql/ + +# run python tests +cd tests +PG_CONFIG=$config_path python -m unittest partitioning_test || status=$? + +set -u + exit $status From d00a9ae250ab38ec73ec071a04eb8ae7eeae401e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 6 Sep 2016 00:40:59 +0300 Subject: [PATCH 114/184] fixed build_check_constraint_name_internal() (don't use Oids) --- src/init.c | 2 +- src/pl_funcs.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/init.c b/src/init.c index 940bfddff9..60eff1adb3 100644 --- a/src/init.c +++ b/src/init.c @@ -552,7 +552,7 @@ find_inheritance_children_array(Oid parentrelId, LOCKMODE lockmode, uint32 *size char * build_check_constraint_name_internal(Oid relid, AttrNumber attno) { - return psprintf("pathman_%u_%u_check", relid, attno); + return psprintf("pathman_%s_%u_check", get_rel_name(relid), attno); } /* diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 56d44a01a2..a647a73075 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -519,7 +519,7 @@ build_check_constraint_name_attnum(PG_FUNCTION_ARGS) result = build_check_constraint_name_internal(relid, attnum); - PG_RETURN_TEXT_P(cstring_to_text(result)); + PG_RETURN_TEXT_P(cstring_to_text(quote_identifier(result))); } Datum @@ -539,7 +539,7 @@ build_check_constraint_name_attname(PG_FUNCTION_ARGS) result = build_check_constraint_name_internal(relid, attnum); - PG_RETURN_TEXT_P(cstring_to_text(result)); + PG_RETURN_TEXT_P(cstring_to_text(quote_identifier(result))); } Datum From e7da26f1666e0b684f08100f42e0763f5fac6733 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 6 Sep 2016 01:18:12 +0300 Subject: [PATCH 115/184] version 1.0 --- Makefile | 2 +- pg_pathman.control | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 3fa2b737cf..4224a99b2c 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ OBJS = src/init.o src/relation_info.o src/utils.o src/partition_filter.o src/run src/pathman_workers.o src/hooks.o src/nodes_common.o src/xact_handling.o $(WIN32RES) EXTENSION = pg_pathman -EXTVERSION = 1.0_rc1 +EXTVERSION = 1.0 DATA_built = $(EXTENSION)--$(EXTVERSION).sql PGFILEDESC = "pg_pathman - partitioning tool" diff --git a/pg_pathman.control b/pg_pathman.control index 7831b8759d..ecc4ef641f 100644 --- a/pg_pathman.control +++ b/pg_pathman.control @@ -1,4 +1,4 @@ # pg_pathman extension -comment 'Partitioning tool ver. 1.0_rc1' -default_version = '1.0_rc1' +comment 'Partitioning tool ver. 1.0' +default_version = '1.0' module_pathname='$libdir/pg_pathman' From ddde0e9d0ed8c4021eaa64902abdbb89bfff32b8 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 6 Sep 2016 01:55:55 +0300 Subject: [PATCH 116/184] add PGXN version badge to README.md & README.rus.md --- README.md | 1 + README.rus.md | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index 0c5c36ce44..ba404b0dc9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ [![Build Status](https://travis-ci.org/postgrespro/pg_pathman.svg?branch=master)](https://travis-ci.org/postgrespro/pg_pathman) +[![PGXN version](https://badge.fury.io/pg/pg_pathman.svg)](https://badge.fury.io/pg/pg_pathman) # pg_pathman diff --git a/README.rus.md b/README.rus.md index a06f25ceca..f4b31bad7a 100644 --- a/README.rus.md +++ b/README.rus.md @@ -1,4 +1,5 @@ [![Build Status](https://travis-ci.org/postgrespro/pg_pathman.svg?branch=master)](https://travis-ci.org/postgrespro/pg_pathman) +[![PGXN version](https://badge.fury.io/pg/pg_pathman.svg)](https://badge.fury.io/pg/pg_pathman) # pg_pathman From 1bcb666c60d1be3d6fa9e1269ecf3c6641cab7a2 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 6 Sep 2016 17:13:57 +0300 Subject: [PATCH 117/184] don't call parent's triggers on children (this does not work with foreign keys anyway) --- src/partition_filter.c | 43 ++++++------------------ src/utils.c | 75 ------------------------------------------ src/utils.h | 3 -- 3 files changed, 10 insertions(+), 111 deletions(-) diff --git a/src/partition_filter.c b/src/partition_filter.c index 71e1b8946e..85e0649004 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -232,7 +232,9 @@ partition_filter_exec(CustomScanState *node) ResetExprContext(econtext); /* Replace parent table with a suitable partition */ + old_cxt = MemoryContextSwitchTo(estate->es_query_cxt); estate->es_result_relation_info = getResultRelInfo(selected_partid, state); + MemoryContextSwitchTo(old_cxt); return slot; } @@ -285,24 +287,16 @@ getResultRelInfo(Oid partid, PartitionFilterState *state) #define CopyToResultRelInfo(field_name) \ ( resultRelInfo->field_name = state->savedRelInfo->field_name ) -#define ResizeTriggerField(field_name, field_type) \ - do { \ - if (resultRelInfo->field_name) \ - pfree(resultRelInfo->field_name); \ - resultRelInfo->field_name = (field_type *) \ - palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(field_type)); \ - } while (0) - - ResultRelInfoHolder *resultRelInfoHandle; + ResultRelInfoHolder *resultRelInfoHolder; bool found; - resultRelInfoHandle = hash_search(state->result_rels_table, + resultRelInfoHolder = hash_search(state->result_rels_table, (const void *) &partid, HASH_ENTER, &found); + /* If not found, create & cache new ResultRelInfo */ if (!found) { - bool grown_up; ResultRelInfo *resultRelInfo = (ResultRelInfo *) palloc(sizeof(ResultRelInfo)); InitResultRelInfo(resultRelInfo, @@ -312,24 +306,6 @@ getResultRelInfo(Oid partid, PartitionFilterState *state) ExecOpenIndices(resultRelInfo, state->onConflictAction != ONCONFLICT_NONE); - resultRelInfo->ri_TrigDesc = append_trigger_descs(resultRelInfo->ri_TrigDesc, - state->savedRelInfo->ri_TrigDesc, - &grown_up); - if (grown_up) - { - ResizeTriggerField(ri_TrigFunctions, FmgrInfo); - ResizeTriggerField(ri_TrigWhenExprs, List *); - - if (resultRelInfo->ri_TrigInstrument) - { - pfree(resultRelInfo->ri_TrigInstrument); - - resultRelInfo->ri_TrigInstrument = - InstrAlloc(resultRelInfo->ri_TrigDesc->numtriggers, - state->css.ss.ps.state->es_instrument); - } - } - /* Copy necessary fields from saved ResultRelInfo */ CopyToResultRelInfo(ri_WithCheckOptions); CopyToResultRelInfo(ri_WithCheckOptionExprs); @@ -341,14 +317,15 @@ getResultRelInfo(Oid partid, PartitionFilterState *state) /* ri_ConstraintExprs will be initialized by ExecRelCheck() */ resultRelInfo->ri_ConstraintExprs = NULL; - resultRelInfoHandle->partid = partid; - resultRelInfoHandle->resultRelInfo = resultRelInfo; - /* Make 'range table index' point to the parent relation */ resultRelInfo->ri_RangeTableIndex = state->savedRelInfo->ri_RangeTableIndex; + + /* Now fill the ResultRelInfo holder */ + resultRelInfoHolder->partid = partid; + resultRelInfoHolder->resultRelInfo = resultRelInfo; } - return resultRelInfoHandle->resultRelInfo; + return resultRelInfoHolder->resultRelInfo; } /* diff --git a/src/utils.c b/src/utils.c index 4176cdd857..e3bc1df434 100644 --- a/src/utils.c +++ b/src/utils.c @@ -279,81 +279,6 @@ check_rinfo_for_partitioned_attr(List *rinfo, Index varno, AttrNumber varattno) return false; } -/* - * Append trigger info contained in 'more' to 'src', both remain unmodified. - * - * This allows us to execute some of main table's triggers on children. - * See ExecInsert() for more details. - */ -TriggerDesc * -append_trigger_descs(TriggerDesc *src, TriggerDesc *more, bool *grown_up) -{ -#define CopyToTriggerDesc(bool_field_name) \ - ( new_desc->bool_field_name |= (src->bool_field_name || more->bool_field_name) ) - - TriggerDesc *new_desc = (TriggerDesc *) palloc0(sizeof(TriggerDesc)); - Trigger *cur_trigger; - int i; - - /* Quick choices */ - if (!src && !more) - { - *grown_up = false; - return NULL; - } - else if (!src) - { - *grown_up = true; /* expand space for new triggers */ - return more; - } - else if (!more) - { - *grown_up = false; /* no new triggers will be added */ - return src; - } - - *grown_up = true; - new_desc->numtriggers = src->numtriggers + more->numtriggers; - new_desc->triggers = (Trigger *) palloc(new_desc->numtriggers * sizeof(Trigger)); - - cur_trigger = new_desc->triggers; - - /* Copy triggers from 'a' */ - for (i = 0; i < src->numtriggers; i++) - memcpy(cur_trigger++, &(src->triggers[i]), sizeof(Trigger)); - - /* Copy triggers from 'b' */ - for (i = 0; i < more->numtriggers; i++) - memcpy(cur_trigger++, &(more->triggers[i]), sizeof(Trigger)); - - /* Copy insert bool flags */ - CopyToTriggerDesc(trig_insert_before_row); - CopyToTriggerDesc(trig_insert_after_row); - CopyToTriggerDesc(trig_insert_instead_row); - CopyToTriggerDesc(trig_insert_before_statement); - CopyToTriggerDesc(trig_insert_after_statement); - - /* Copy update bool flags */ - CopyToTriggerDesc(trig_update_before_row); - CopyToTriggerDesc(trig_update_after_row); - CopyToTriggerDesc(trig_update_instead_row); - CopyToTriggerDesc(trig_update_before_statement); - CopyToTriggerDesc(trig_update_after_statement); - - /* Copy delete bool flags */ - CopyToTriggerDesc(trig_delete_before_row); - CopyToTriggerDesc(trig_delete_after_row); - CopyToTriggerDesc(trig_delete_instead_row); - CopyToTriggerDesc(trig_delete_before_statement); - CopyToTriggerDesc(trig_delete_after_statement); - - /* Copy truncate bool flags */ - CopyToTriggerDesc(trig_truncate_before_statement); - CopyToTriggerDesc(trig_truncate_after_statement); - - return new_desc; -} - /* * Get BTORDER_PROC for two types described by Oids */ diff --git a/src/utils.h b/src/utils.h index cd8419c3b5..94272bf024 100644 --- a/src/utils.h +++ b/src/utils.h @@ -36,9 +36,6 @@ List * build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); void change_varnos(Node *node, Oid old_varno, Oid new_varno); -TriggerDesc * append_trigger_descs(TriggerDesc *src, - TriggerDesc *more, - bool *grown_up); /* * Rowmark processing. From 562cab6c78bdd4fc4a4234d825bb2a4075dbf938 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 6 Sep 2016 18:42:49 +0300 Subject: [PATCH 118/184] introduce function copy_foreign_keys(), generate fkeys on children --- hash.sql | 4 +++- init.sql | 25 +++++++++++++++++++++++++ range.sql | 2 ++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/hash.sql b/hash.sql index 2df89fd731..fd113aad3e 100644 --- a/hash.sql +++ b/hash.sql @@ -63,12 +63,14 @@ BEGIN EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (@extschema@.get_hash_part_idx(%s(%s), %s) = %s)', v_child_relname, - @extschema@.build_check_constraint_name(v_child_relname::regclass, + @extschema@.build_check_constraint_name(v_child_relname::REGCLASS, attribute), v_hashfunc, attribute, partitions_count, partnum); + + PERFORM @extschema@.copy_foreign_keys(parent_relid, v_child_relname::REGCLASS); END LOOP; /* Notify backend about changes */ diff --git a/init.sql b/init.sql index 09fb7d5440..8d9f6e6b8b 100644 --- a/init.sql +++ b/init.sql @@ -546,6 +546,31 @@ $$ LANGUAGE plpgsql SET pg_pathman.enable_partitionfilter = off; /* ensures that PartitionFilter is OFF */ +/* + * Copy all of parent's foreign keys. + */ +CREATE OR REPLACE FUNCTION @extschema@.copy_foreign_keys( + parent_relid REGCLASS, + partition REGCLASS) +RETURNS VOID AS +$$ +DECLARE + rec RECORD; + +BEGIN + PERFORM @extschema@.validate_relname(parent_relid); + PERFORM @extschema@.validate_relname(partition); + + FOR rec IN (SELECT oid as conid FROM pg_catalog.pg_constraint + WHERE conrelid = parent_relid AND contype = 'f') + LOOP + EXECUTE format('ALTER TABLE %s ADD %s', + partition::TEXT, + pg_get_constraintdef(rec.conid)); + END LOOP; +END +$$ LANGUAGE plpgsql; + /* * Create DDL trigger to call pathman_ddl_trigger_func(). diff --git a/range.sql b/range.sql index 8c1511c033..d837df0c10 100644 --- a/range.sql +++ b/range.sql @@ -478,6 +478,8 @@ BEGIN p_start_value, p_end_value)); + PERFORM @extschema@.copy_foreign_keys(parent_relid, v_child_relname::REGCLASS); + RETURN v_child_relname; END $$ LANGUAGE plpgsql From a9101d1cda25e401b53834b7df156f62542059c5 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 6 Sep 2016 19:03:56 +0300 Subject: [PATCH 119/184] add regression tests for copied foreign keys --- expected/pg_pathman.out | 59 +++++++++++++++++++++++++++++++++++++++++ sql/pg_pathman.sql | 20 ++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 4fa5aa5602..00570b92db 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1707,3 +1707,62 @@ EXPLAIN (COSTS OFF) SELECT * FROM messages; -> Seq Scan on messages_2 (3 rows) +/* Check primary keys generation */ +CREATE TABLE test_ref(comment TEXT UNIQUE); +INSERT INTO test_ref VALUES('test'); +CREATE TABLE test_fkey( + id INT NOT NULL, + comment TEXT, + FOREIGN KEY (comment) REFERENCES test_ref(comment)); +INSERT INTO test_fkey SELECT generate_series(1, 1000), 'test'; +SELECT create_range_partitions('test_fkey', 'id', 1, 100); +NOTICE: sequence "test_fkey_seq" does not exist, skipping + create_range_partitions +------------------------- + 10 +(1 row) + +INSERT INTO test_fkey VALUES(1, 'wrong'); +ERROR: insert or update on table "test_fkey_1" violates foreign key constraint "test_fkey_1_comment_fkey" +SELECT drop_partitions('test_fkey'); +NOTICE: function public.test_fkey_upd_trig_func() does not exist, skipping +NOTICE: 100 rows copied from test_fkey_10 +NOTICE: 100 rows copied from test_fkey_9 +NOTICE: 100 rows copied from test_fkey_8 +NOTICE: 100 rows copied from test_fkey_7 +NOTICE: 100 rows copied from test_fkey_6 +NOTICE: 100 rows copied from test_fkey_5 +NOTICE: 100 rows copied from test_fkey_4 +NOTICE: 100 rows copied from test_fkey_3 +NOTICE: 100 rows copied from test_fkey_2 +NOTICE: 100 rows copied from test_fkey_1 + drop_partitions +----------------- + 10 +(1 row) + +SELECT create_hash_partitions('test_fkey', 'id', 10); + create_hash_partitions +------------------------ + 10 +(1 row) + +INSERT INTO test_fkey VALUES(1, 'wrong'); +ERROR: insert or update on table "test_fkey_0" violates foreign key constraint "test_fkey_0_comment_fkey" +SELECT drop_partitions('test_fkey'); +NOTICE: function public.test_fkey_upd_trig_func() does not exist, skipping +NOTICE: 94 rows copied from test_fkey_9 +NOTICE: 108 rows copied from test_fkey_8 +NOTICE: 118 rows copied from test_fkey_7 +NOTICE: 95 rows copied from test_fkey_6 +NOTICE: 90 rows copied from test_fkey_5 +NOTICE: 101 rows copied from test_fkey_4 +NOTICE: 116 rows copied from test_fkey_3 +NOTICE: 90 rows copied from test_fkey_2 +NOTICE: 90 rows copied from test_fkey_1 +NOTICE: 98 rows copied from test_fkey_0 + drop_partitions +----------------- + 10 +(1 row) + diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 1cf5118d4e..6ae75f303d 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -644,3 +644,23 @@ SELECT create_range_partitions('messages', 'id', 1, 100, 2); ALTER TABLE replies DROP CONSTRAINT replies_message_id_fkey; SELECT create_range_partitions('messages', 'id', 1, 100, 2); EXPLAIN (COSTS OFF) SELECT * FROM messages; + +/* Check primary keys generation */ +CREATE TABLE test_ref(comment TEXT UNIQUE); +INSERT INTO test_ref VALUES('test'); + +CREATE TABLE test_fkey( + id INT NOT NULL, + comment TEXT, + FOREIGN KEY (comment) REFERENCES test_ref(comment)); + +INSERT INTO test_fkey SELECT generate_series(1, 1000), 'test'; + +SELECT create_range_partitions('test_fkey', 'id', 1, 100); +INSERT INTO test_fkey VALUES(1, 'wrong'); +INSERT INTO test_fkey VALUES(1, 'test'); +SELECT drop_partitions('test_fkey'); + +SELECT create_hash_partitions('test_fkey', 'id', 10); +INSERT INTO test_fkey VALUES(1, 'test'); +SELECT drop_partitions('test_fkey'); From fe6e10cb8cd2acb2365830e29639428b8e647949 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 6 Sep 2016 19:12:45 +0300 Subject: [PATCH 120/184] fix regression tests for copied foreign keys --- expected/pg_pathman.out | 6 ++++-- sql/pg_pathman.sql | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 00570b92db..ade6d8b711 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1724,6 +1724,7 @@ NOTICE: sequence "test_fkey_seq" does not exist, skipping INSERT INTO test_fkey VALUES(1, 'wrong'); ERROR: insert or update on table "test_fkey_1" violates foreign key constraint "test_fkey_1_comment_fkey" +INSERT INTO test_fkey VALUES(1, 'test'); SELECT drop_partitions('test_fkey'); NOTICE: function public.test_fkey_upd_trig_func() does not exist, skipping NOTICE: 100 rows copied from test_fkey_10 @@ -1735,7 +1736,7 @@ NOTICE: 100 rows copied from test_fkey_5 NOTICE: 100 rows copied from test_fkey_4 NOTICE: 100 rows copied from test_fkey_3 NOTICE: 100 rows copied from test_fkey_2 -NOTICE: 100 rows copied from test_fkey_1 +NOTICE: 101 rows copied from test_fkey_1 drop_partitions ----------------- 10 @@ -1749,6 +1750,7 @@ SELECT create_hash_partitions('test_fkey', 'id', 10); INSERT INTO test_fkey VALUES(1, 'wrong'); ERROR: insert or update on table "test_fkey_0" violates foreign key constraint "test_fkey_0_comment_fkey" +INSERT INTO test_fkey VALUES(1, 'test'); SELECT drop_partitions('test_fkey'); NOTICE: function public.test_fkey_upd_trig_func() does not exist, skipping NOTICE: 94 rows copied from test_fkey_9 @@ -1760,7 +1762,7 @@ NOTICE: 101 rows copied from test_fkey_4 NOTICE: 116 rows copied from test_fkey_3 NOTICE: 90 rows copied from test_fkey_2 NOTICE: 90 rows copied from test_fkey_1 -NOTICE: 98 rows copied from test_fkey_0 +NOTICE: 100 rows copied from test_fkey_0 drop_partitions ----------------- 10 diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 6ae75f303d..6a8a424188 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -662,5 +662,6 @@ INSERT INTO test_fkey VALUES(1, 'test'); SELECT drop_partitions('test_fkey'); SELECT create_hash_partitions('test_fkey', 'id', 10); +INSERT INTO test_fkey VALUES(1, 'wrong'); INSERT INTO test_fkey VALUES(1, 'test'); SELECT drop_partitions('test_fkey'); From e5a1624d9622b1e97d7744b4fa4cc2ab114e9339 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 6 Sep 2016 22:35:45 +0300 Subject: [PATCH 121/184] improve documentation (closes #27) --- README.md | 4 ++-- README.rus.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ba404b0dc9..06c47e9e4d 100644 --- a/README.md +++ b/README.md @@ -142,13 +142,13 @@ Merge two adjacent RANGE partitions. First, data from `partition2` is copied to append_range_partition(p_relation REGCLASS, partition_name TEXT DEFAULT NULL) ``` -Append new RANGE partition. +Append new RANGE partition with `pathman_config.range_interval` as interval. ```plpgsql prepend_range_partition(p_relation REGCLASS, partition_name TEXT DEFAULT NULL) ``` -Prepend new RANGE partition. +Prepend new RANGE partition with `pathman_config.range_interval` as interval. ```plpgsql add_range_partition(relation REGCLASS, diff --git a/README.rus.md b/README.rus.md index f4b31bad7a..789f0d8a88 100644 --- a/README.rus.md +++ b/README.rus.md @@ -144,13 +144,13 @@ merge_range_partitions(partition1 REGCLASS, partition2 REGCLASS) append_range_partition(p_relation REGCLASS, partition_name TEXT DEFAULT NULL) ``` -Добавляет новую RANGE секцию в конец списка секций. +Добавляет новую RANGE секцию с диапазоном `pathman_config.range_interval` в конец списка секций. ```plpgsql prepend_range_partition(p_relation REGCLASS, partition_name TEXT DEFAULT NULL) ``` -Добавляет новую RANGE секцию в начало списка секций. +Добавляет новую RANGE секцию с диапазоном `pathman_config.range_interval` в начало списка секций. ```plpgsql add_range_partition(relation REGCLASS, From 370fca76123d1a7fddb5fd2d552941424076daee Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 6 Sep 2016 22:45:09 +0300 Subject: [PATCH 122/184] add compatibility status to README.md & README.rus.md --- README.md | 2 ++ README.rus.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index 06c47e9e4d..a0c91b0c37 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ The `pg_pathman` module provides optimized partitioning mechanism and functions to manage partitions. +The extension is compatible with PostgreSQL 9.5 (9.6 support is coming soon). + ## Overview **Partitioning** means splitting one large table into smaller pieces. Each row in such table is moved to a single partition according to the partitioning key. PostgreSQL supports partitioning via table inheritance: each partition must be created as a child table with CHECK CONSTRAINT. For example: diff --git a/README.rus.md b/README.rus.md index 789f0d8a88..6acea3c5d1 100644 --- a/README.rus.md +++ b/README.rus.md @@ -5,6 +5,8 @@ Модуль `pg_pathman` предоставляет оптимизированный механизм секционирования, а также функции для создания и управления секциями. +Расширение совместимо с PostgreSQL 9.5 (поддержка 9.6 будет добавлена в одном из ближайших обновлений). + ## Концепция pg_pathman **Секционирование** -- это способ разбиения одной большой таблицы на множество меньших по размеру. Для каждой записи можно однозначно определить секцию, в которой она должна храниться посредством вычисления ключа. From c7c7d71eeecc9b4caf6acab0262187a30e1bc999 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 7 Sep 2016 11:23:21 +0300 Subject: [PATCH 123/184] fix null pointer access in get_tableoids_list() --- src/utils.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/utils.c b/src/utils.c index e3bc1df434..56455e473b 100644 --- a/src/utils.c +++ b/src/utils.c @@ -101,9 +101,10 @@ get_tableoids_list(List *tlist) if (!IsA(var, Var)) continue; - if (strlen(te->resname) > TABLEOID_STR_BASE_LEN && - 0 == strncmp(te->resname, TABLEOID_STR(""), TABLEOID_STR_BASE_LEN) && - var->varoattno == TableOidAttributeNumber) + /* Check that column name begins with TABLEOID_STR & it's tableoid */ + if (var->varoattno == TableOidAttributeNumber && + (te->resname && strlen(te->resname) > TABLEOID_STR_BASE_LEN) && + 0 == strncmp(te->resname, TABLEOID_STR(""), TABLEOID_STR_BASE_LEN)) { result = lappend(result, te); } From 86f6f8bdf1b4bba553c5ed6267c06a12b50a44da Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 7 Sep 2016 12:23:05 +0300 Subject: [PATCH 124/184] intruduce rowmark_add_tableoids_walker(), fix rowmark_add_tableoids() --- expected/pg_pathman.out | 38 +++++++++++++++ sql/pg_pathman.sql | 7 +++ src/utils.c | 103 +++++++++++++++++++++------------------- 3 files changed, 98 insertions(+), 50 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index ade6d8b711..dbf85012b2 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1768,3 +1768,41 @@ NOTICE: 100 rows copied from test_fkey_0 10 (1 row) +/* Check rowmarks */ +SELECT create_hash_partitions('test_fkey', 'id', 5); + create_hash_partitions +------------------------ + 5 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM test_fkey +WHERE id = (SELECT id FROM test_fkey ORDER BY id OFFSET 10 LIMIT 1 FOR UPDATE) +FOR SHARE; + QUERY PLAN +----------------------------------------------------------------------- + LockRows + InitPlan 1 (returns $1) + -> Limit + -> LockRows + -> Sort + Sort Key: test_fkey_0_1.id + -> Append + -> Seq Scan on test_fkey_0 test_fkey_0_1 + -> Seq Scan on test_fkey_1 test_fkey_1_1 + -> Seq Scan on test_fkey_2 test_fkey_2_1 + -> Seq Scan on test_fkey_3 test_fkey_3_1 + -> Seq Scan on test_fkey_4 test_fkey_4_1 + -> Append + -> Seq Scan on test_fkey_0 + Filter: (id = $1) + -> Seq Scan on test_fkey_1 + Filter: (id = $1) + -> Seq Scan on test_fkey_2 + Filter: (id = $1) + -> Seq Scan on test_fkey_3 + Filter: (id = $1) + -> Seq Scan on test_fkey_4 + Filter: (id = $1) +(23 rows) + diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 6a8a424188..0ebe0ce56b 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -665,3 +665,10 @@ SELECT create_hash_partitions('test_fkey', 'id', 10); INSERT INTO test_fkey VALUES(1, 'wrong'); INSERT INTO test_fkey VALUES(1, 'test'); SELECT drop_partitions('test_fkey'); + +/* Check rowmarks */ +SELECT create_hash_partitions('test_fkey', 'id', 5); +EXPLAIN (COSTS OFF) +SELECT * FROM test_fkey +WHERE id = (SELECT id FROM test_fkey ORDER BY id OFFSET 10 LIMIT 1 FOR UPDATE) +FOR SHARE; diff --git a/src/utils.c b/src/utils.c index 56455e473b..d72999f57f 100644 --- a/src/utils.c +++ b/src/utils.c @@ -39,6 +39,7 @@ static void change_varnos_in_restrinct_info(RestrictInfo *rinfo, static bool change_varno_walker(Node *node, change_varno_context *context); static List *get_tableoids_list(List *tlist); static void lock_rows_visitor(Plan *plan, void *context); +static bool rowmark_add_tableoids_walker(Node *node, void *context); /* @@ -492,6 +493,57 @@ plan_tree_walker(Plan *plan, visitor(plan, context); } +static bool +rowmark_add_tableoids_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + + if (IsA(node, Query)) + { + Query *parse = (Query *) node; + ListCell *lc; + + /* Generate 'tableoid' for partitioned table rowmark */ + foreach (lc, parse->rowMarks) + { + RowMarkClause *rc = (RowMarkClause *) lfirst(lc); + Oid parent = getrelid(rc->rti, parse->rtable); + Var *var; + TargetEntry *tle; + char resname[64]; + + /* Check that table is partitioned */ + if (!get_pathman_relation_info(parent)) + continue; + + var = makeVar(rc->rti, + TableOidAttributeNumber, + OIDOID, + -1, + InvalidOid, + 0); + + /* Use parent's Oid as TABLEOID_STR's key (%u) */ + snprintf(resname, sizeof(resname), TABLEOID_STR("%u"), parent); + + tle = makeTargetEntry((Expr *) var, + list_length(parse->targetList) + 1, + pstrdup(resname), + true); + + /* There's no problem here since new attribute is junk */ + parse->targetList = lappend(parse->targetList, tle); + } + + return query_tree_walker((Query *) node, + rowmark_add_tableoids_walker, + NULL, 0); + } + + return expression_tree_walker(node, rowmark_add_tableoids_walker, NULL); +} + /* * Add missing 'TABLEOID_STR%u' junk attributes for inherited partitions * @@ -504,56 +556,7 @@ plan_tree_walker(Plan *plan, void rowmark_add_tableoids(Query *parse) { - ListCell *lc; - - check_stack_depth(); - - foreach(lc, parse->rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); - - switch(rte->rtekind) - { - case RTE_SUBQUERY: - rowmark_add_tableoids(rte->subquery); - break; - - default: - break; - } - } - - /* Generate 'tableoid' for partitioned table rowmark */ - foreach (lc, parse->rowMarks) - { - RowMarkClause *rc = (RowMarkClause *) lfirst(lc); - Oid parent = getrelid(rc->rti, parse->rtable); - Var *var; - TargetEntry *tle; - char resname[64]; - - /* Check that table is partitioned */ - if (!get_pathman_relation_info(parent)) - continue; - - var = makeVar(rc->rti, - TableOidAttributeNumber, - OIDOID, - -1, - InvalidOid, - 0); - - /* Use parent's Oid as TABLEOID_STR's key (%u) */ - snprintf(resname, sizeof(resname), TABLEOID_STR("%u"), parent); - - tle = makeTargetEntry((Expr *) var, - list_length(parse->targetList) + 1, - pstrdup(resname), - true); - - /* There's no problem here since new attribute is junk */ - parse->targetList = lappend(parse->targetList, tle); - } + rowmark_add_tableoids_walker((Node *) parse, NULL); } /* From 71b348d5d96b33fb98d7073d5d977a984a1c85fd Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 7 Sep 2016 17:09:04 +0300 Subject: [PATCH 125/184] fix runtime clauses for RuntimeAppend (select clause provided that it contains only relevant cols), introduce function get_partitioned_attr_clauses() --- src/hooks.c | 22 ++++++++++----------- src/nodes_common.c | 38 +++++++++++++++++++++++++++++++++-- src/nodes_common.h | 4 ++++ src/utils.c | 49 ---------------------------------------------- src/utils.h | 6 ------ 5 files changed, 50 insertions(+), 69 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index c29820edb9..2c98f47dd0 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -101,9 +101,8 @@ pathman_join_pathlist_hook(PlannerInfo *root, /* Check that innerrel's RestrictInfo contains partitioned column */ innerrel_rinfo_contains_part_attr = - check_rinfo_for_partitioned_attr(innerrel->baserestrictinfo, - innerrel->relid, - inner_prel->attnum); + get_partitioned_attr_clauses(innerrel->baserestrictinfo, + inner_prel, innerrel->relid) != NULL; foreach (lc, innerrel->pathlist) { @@ -132,9 +131,9 @@ pathman_join_pathlist_hook(PlannerInfo *root, * ppi->ppi_clauses reference partition attribute */ if (!(innerrel_rinfo_contains_part_attr || - (ppi && check_rinfo_for_partitioned_attr(ppi->ppi_clauses, - innerrel->relid, - inner_prel->attnum)))) + (ppi && get_partitioned_attr_clauses(ppi->ppi_clauses, + inner_prel, + innerrel->relid)))) continue; inner = create_runtimeappend_path(root, cur_inner_path, @@ -310,10 +309,10 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb if (!clause_contains_params((Node *) get_actual_clauses(rel->baserestrictinfo))) return; + /* Check that rel's RestrictInfo contains partitioned column */ rel_rinfo_contains_part_attr = - check_rinfo_for_partitioned_attr(rel->baserestrictinfo, - rel->relid, - prel->attnum); + get_partitioned_attr_clauses(rel->baserestrictinfo, + prel, rel->relid) != NULL; foreach (lc, rel->pathlist) { @@ -334,9 +333,8 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb * ppi->ppi_clauses reference partition attribute */ if (!(rel_rinfo_contains_part_attr || - (ppi && check_rinfo_for_partitioned_attr(ppi->ppi_clauses, - rel->relid, - prel->attnum)))) + (ppi && get_partitioned_attr_clauses(ppi->ppi_clauses, + prel, rel->relid)))) continue; if (IsA(cur_path, AppendPath) && pg_pathman_enable_runtimeappend) diff --git a/src/nodes_common.c b/src/nodes_common.c index d092f625bf..5b8f48e84c 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -12,7 +12,9 @@ #include "runtimeappend.h" #include "utils.h" +#include "access/sysattr.h" #include "optimizer/restrictinfo.h" +#include "optimizer/var.h" #include "utils/memutils.h" @@ -248,6 +250,38 @@ unpack_runtimeappend_private(RuntimeAppendState *scan_state, CustomScan *cscan) scan_state->enable_parent = (bool) linitial_int(lthird(runtimeappend_private)); } +/* + * Filter all available clauses and extract relevant ones. + */ +List * +get_partitioned_attr_clauses(List *restrictinfo_list, + const PartRelationInfo *prel, + Index partitioned_rel) +{ +#define AdjustAttno(attno) \ + ( (AttrNumber) (part_attno + FirstLowInvalidHeapAttributeNumber) ) + + List *result = NIL; + ListCell *l; + + foreach(l, restrictinfo_list) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Bitmapset *varattnos = NULL; + int part_attno; + + Assert(IsA(rinfo, RestrictInfo)); + pull_varattnos((Node *) rinfo->clause, partitioned_rel, &varattnos); + + if (bms_get_singleton_member(varattnos, &part_attno) && + AdjustAttno(part_attno) == prel->attnum) + { + result = lappend(result, rinfo->clause); + } + } + return result; +} + /* Transform partition ranges into plain array of partition Oids */ Oid * @@ -385,7 +419,7 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, Plan *child_plan = (Plan *) lfirst(lc2); RelOptInfo *child_rel = ((Path *) lfirst(lc1))->parent; - /* Replace rel's tlist with a matching one */ + /* Replace rel's tlist with a matching one */ if (!cscan->scan.plan.targetlist) tlist = replace_tlist_varnos(child_plan->targetlist, rel); @@ -407,7 +441,7 @@ create_append_plan_common(PlannerInfo *root, RelOptInfo *rel, /* Since we're not scanning any real table directly */ cscan->scan.scanrelid = 0; - cscan->custom_exprs = get_actual_clauses(clauses); + cscan->custom_exprs = get_partitioned_attr_clauses(clauses, prel, rel->relid); cscan->custom_plans = custom_plans; cscan->methods = scan_methods; diff --git a/src/nodes_common.h b/src/nodes_common.h index ef3cb3df5a..51087bfab2 100644 --- a/src/nodes_common.h +++ b/src/nodes_common.h @@ -60,6 +60,10 @@ clear_plan_states(CustomScanState *scan_state) } } +List * get_partitioned_attr_clauses(List *restrictinfo_list, + const PartRelationInfo *prel, + Index partitioned_rel); + Oid * get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel, bool include_parent); diff --git a/src/utils.c b/src/utils.c index 56455e473b..8be3c67b4b 100644 --- a/src/utils.c +++ b/src/utils.c @@ -41,25 +41,6 @@ static List *get_tableoids_list(List *tlist); static void lock_rows_visitor(Plan *plan, void *context); -/* - * Execute 'cb_proc' on 'xact_context' reset. - */ -void -execute_on_xact_mcxt_reset(MemoryContext xact_context, - MemoryContextCallbackFunction cb_proc, - void *arg) -{ - MemoryContextCallback *mcxt_cb = MemoryContextAlloc(xact_context, - sizeof(MemoryContextCallback)); - - /* Initialize MemoryContextCallback */ - mcxt_cb->arg = arg; - mcxt_cb->func = cb_proc; - mcxt_cb->next = NULL; - - MemoryContextRegisterResetCallback(xact_context, mcxt_cb); -} - /* * Check whether clause contains PARAMs or not */ @@ -250,36 +231,6 @@ build_index_tlist(PlannerInfo *root, IndexOptInfo *index, return tlist; } -/* - * We should ensure that 'rel->baserestrictinfo' or 'ppi->ppi_clauses' contain - * Var which corresponds to partition attribute before creating RuntimeXXX - * paths since they are used by create_scan_plan() to form 'scan_clauses' - * that are passed to create_customscan_plan(). - */ -bool -check_rinfo_for_partitioned_attr(List *rinfo, Index varno, AttrNumber varattno) -{ - List *vars; - List *clauses; - ListCell *lc; - - clauses = get_actual_clauses(rinfo); - - vars = pull_var_clause((Node *) clauses, - PVC_REJECT_AGGREGATES, - PVC_REJECT_PLACEHOLDERS); - - foreach (lc, vars) - { - Var *var = (Var *) lfirst(lc); - - if (var->varno == varno && var->varoattno == varattno) - return true; - } - - return false; -} - /* * Get BTORDER_PROC for two types described by Oids */ diff --git a/src/utils.h b/src/utils.h index 94272bf024..96aeb29c9b 100644 --- a/src/utils.h +++ b/src/utils.h @@ -49,9 +49,6 @@ void postprocess_lock_rows(List *rtable, Plan *plan); bool clause_contains_params(Node *clause); bool is_date_type_internal(Oid typid); bool is_string_type_internal(Oid typid); -bool check_rinfo_for_partitioned_attr(List *rinfo, - Index varno, - AttrNumber varattno); /* * Misc. @@ -67,9 +64,6 @@ Oid get_binary_operator_oid(char *opname, Oid arg1, Oid arg2); void fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, Oid type2); -void execute_on_xact_mcxt_reset(MemoryContext xact_context, - MemoryContextCallbackFunction cb_proc, - void *arg); char * datum_to_cstring(Datum datum, Oid typid); From 067570e8d2c73f0cd0f1f31b84372af836a494d2 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 8 Sep 2016 23:20:05 +0300 Subject: [PATCH 126/184] improve row estimation for NestLoop involving RuntimeAppend --- src/hooks.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 2c98f47dd0..6db5434be9 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -99,19 +99,21 @@ pathman_join_pathlist_hook(PlannerInfo *root, paramsel *= wrap->paramsel; } - /* Check that innerrel's RestrictInfo contains partitioned column */ + /* Check that innerrel's RestrictInfos contain partitioned column */ innerrel_rinfo_contains_part_attr = get_partitioned_attr_clauses(innerrel->baserestrictinfo, inner_prel, innerrel->relid) != NULL; foreach (lc, innerrel->pathlist) { + AppendPath *cur_inner_path = (AppendPath *) lfirst(lc); Path *outer, *inner; NestPath *nest_path; /* NestLoop we're creating */ ParamPathInfo *ppi; /* parameterization info */ Relids inner_required; /* required paremeterization relids */ - AppendPath *cur_inner_path = (AppendPath *) lfirst(lc); + List *filtered_joinclauses = NIL; + ListCell *rinfo_lc; if (!IsA(cur_inner_path, AppendPath)) continue; @@ -136,8 +138,7 @@ pathman_join_pathlist_hook(PlannerInfo *root, innerrel->relid)))) continue; - inner = create_runtimeappend_path(root, cur_inner_path, - ppi, paramsel); + inner = create_runtimeappend_path(root, cur_inner_path, ppi, paramsel); initial_cost_nestloop(root, &workspace, jointype, outer, inner, /* built paths */ @@ -151,7 +152,29 @@ pathman_join_pathlist_hook(PlannerInfo *root, pathkeys, calc_nestloop_required_outer(outer, inner)); - /* Finally we can add new NestLoop path */ + /* Discard all clauses that are to be evaluated by 'inner' */ + foreach (rinfo_lc, extra->restrictlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(rinfo_lc); + + Assert(IsA(rinfo, RestrictInfo)); + if (!join_clause_is_movable_to(rinfo, inner->parent)) + filtered_joinclauses = lappend(filtered_joinclauses, rinfo); + } + + /* + * Override 'rows' value produced by standard estimator. + * Currently we use get_parameterized_joinrel_size() since + * it works just fine, but this might change some day. + */ + nest_path->path.rows = get_parameterized_joinrel_size(root, + joinrel, + outer->rows, + inner->rows, + extra->sjinfo, + filtered_joinclauses); + + /* Finally we can add the new NestLoop path */ add_path(joinrel, (Path *) nest_path); } } From e710bbf01099268be66f74a061aa9a3498ffd183 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 9 Sep 2016 13:45:41 +0300 Subject: [PATCH 127/184] remove commented code --- src/pg_pathman.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 8f35b1528b..1c6e8bf5c8 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -1510,7 +1510,6 @@ handle_boolexpr(const BoolExpr *expr, WalkerContext *context) switch (expr->boolop) { case OR_EXPR: - // finish_least_greatest(arg, context); result->rangeset = irange_list_union(result->rangeset, arg->rangeset); break; case AND_EXPR: @@ -1671,7 +1670,6 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) path = create_seqscan_path(root, rel, required_outer); #endif add_path(rel, path); - // set_pathkeys(root, rel, path); /* Consider index scans */ create_index_paths(root, rel); From 9476927014b69c3a9683a30fd2ff5b6c206f2724 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 12 Sep 2016 14:09:30 +0300 Subject: [PATCH 128/184] miscellaneous fixes --- src/nodes_common.c | 2 +- src/pg_pathman.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nodes_common.c b/src/nodes_common.c index 5b8f48e84c..7a9b35fd22 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -259,7 +259,7 @@ get_partitioned_attr_clauses(List *restrictinfo_list, Index partitioned_rel) { #define AdjustAttno(attno) \ - ( (AttrNumber) (part_attno + FirstLowInvalidHeapAttributeNumber) ) + ( (AttrNumber) (attno + FirstLowInvalidHeapAttributeNumber) ) List *result = NIL; ListCell *l; diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 1c6e8bf5c8..7359b693a3 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -544,6 +544,7 @@ append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, /* Recalc parent relation tuples count */ rel->tuples += childrel->tuples; + /* Close child relations, but keep locks */ heap_close(newrelation, NoLock); From adaa6e3ac147630386c91c81003c4ee66fa0b303 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 13 Sep 2016 11:33:37 +0300 Subject: [PATCH 129/184] add tablespace support --- hash.sql | 12 +++-- init.sql | 15 ++++++ range.sql | 125 +++++++++++++++++++++++++++++++++++++------------ src/pl_funcs.c | 28 +++++++++++ src/utils.c | 25 ++++++++++ src/utils.h | 3 ++ 6 files changed, 175 insertions(+), 33 deletions(-) diff --git a/hash.sql b/hash.sql index fd113aad3e..8c6be12987 100644 --- a/hash.sql +++ b/hash.sql @@ -24,6 +24,7 @@ DECLARE v_plain_schema TEXT; v_plain_relname TEXT; v_hashfunc TEXT; + v_tablespace TEXT; BEGIN IF partition_data = true THEN @@ -49,6 +50,9 @@ BEGIN INSERT INTO @extschema@.pathman_config (partrel, attname, parttype) VALUES (parent_relid, attribute, 1); + /* Determine tablespace of parent table */ + v_tablespace := @extschema@.get_rel_tablespace_name(parent_relid); + /* Create partitions and update pg_pathman configuration */ FOR partnum IN 0..partitions_count-1 LOOP @@ -56,9 +60,11 @@ BEGIN quote_ident(v_plain_schema), quote_ident(v_plain_relname || '_' || partnum)); - EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', - v_child_relname, - parent_relid::TEXT); + EXECUTE format( + 'CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s) TABLESPACE %s', + v_child_relname, + parent_relid::TEXT, + v_tablespace); EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (@extschema@.get_hash_part_idx(%s(%s), %s) = %s)', diff --git a/init.sql b/init.sql index 8d9f6e6b8b..7c47b7da49 100644 --- a/init.sql +++ b/init.sql @@ -40,6 +40,14 @@ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config_params ( CREATE UNIQUE INDEX i_pathman_config_params ON @extschema@.pathman_config_params(partrel); +CREATE TYPE partition AS ( + parent REGCLASS, + parttype INTEGER, + child REGCLASS, + start_value TEXT, + end_value TEXT +); + /* * Invalidate relcache every time someone changes parameters config. */ @@ -695,3 +703,10 @@ LANGUAGE C STRICT; CREATE OR REPLACE FUNCTION @extschema@.debug_capture() RETURNS VOID AS 'pg_pathman', 'debug_capture' LANGUAGE C STRICT; + +/* + * Return tablespace name for specified relation + */ +CREATE OR REPLACE FUNCTION @extschema@.get_rel_tablespace_name(relation REGCLASS) +RETURNS TEXT AS 'pg_pathman', 'get_rel_tablespace_name' +LANGUAGE C STRICT; diff --git a/range.sql b/range.sql index d837df0c10..15ac8a90c1 100644 --- a/range.sql +++ b/range.sql @@ -91,8 +91,8 @@ DECLARE v_rows_count INTEGER; v_max p_start_value%TYPE; v_cur_value p_start_value%TYPE := p_start_value; + v_tablespace TEXT; i INTEGER; - BEGIN IF partition_data = true THEN /* Acquire data modification lock */ @@ -149,12 +149,20 @@ BEGIN INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); + /* Determine tablespace of parent table */ + v_tablespace := @extschema@.get_rel_tablespace_name(parent_relid); + /* Create first partition */ FOR i IN 1..p_count LOOP - EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s)', - pg_typeof(p_start_value)) - USING parent_relid, p_start_value, p_start_value + p_interval; + EXECUTE + format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s, tablespace:=$4)', + pg_typeof(p_start_value)) + USING + parent_relid, + p_start_value, + p_start_value + p_interval, + v_tablespace; p_start_value := p_start_value + p_interval; END LOOP; @@ -190,6 +198,7 @@ DECLARE v_rows_count INTEGER; v_max p_start_value%TYPE; v_cur_value p_start_value%TYPE := p_start_value; + v_tablespace TEXT; i INTEGER; BEGIN @@ -250,12 +259,18 @@ BEGIN INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); + /* Determine tablespace of parent table */ + v_tablespace := @extschema@.get_rel_tablespace_name(parent_relid); + /* create first partition */ FOR i IN 1..p_count LOOP - PERFORM @extschema@.create_single_range_partition(parent_relid, - p_start_value, - p_start_value + p_interval); + PERFORM @extschema@.create_single_range_partition( + parent_relid, + p_start_value, + p_start_value + p_interval, + tablespace := v_tablespace); + p_start_value := p_start_value + p_interval; END LOOP; @@ -288,6 +303,7 @@ RETURNS INTEGER AS $$ DECLARE part_count INTEGER := 0; + v_tablespace TEXT; BEGIN IF partition_data = true THEN @@ -320,11 +336,17 @@ BEGIN INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); + /* Determine tablespace of parent table */ + v_tablespace := @extschema@.get_rel_tablespace_name(parent_relid); + WHILE p_start_value <= p_end_value LOOP - PERFORM @extschema@.create_single_range_partition(parent_relid, - p_start_value, - p_start_value + p_interval); + PERFORM @extschema@.create_single_range_partition( + parent_relid, + p_start_value, + p_start_value + p_interval, + tablespace := v_tablespace); + p_start_value := p_start_value + p_interval; part_count := part_count + 1; END LOOP; @@ -358,6 +380,7 @@ RETURNS INTEGER AS $$ DECLARE part_count INTEGER := 0; + v_tablespace TEXT; BEGIN IF partition_data = true THEN @@ -386,11 +409,19 @@ BEGIN INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); + /* Determine tablespace of parent table */ + v_tablespace := @extschema@.get_rel_tablespace_name(parent_relid); + WHILE p_start_value <= p_end_value LOOP - EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s);', - pg_typeof(p_start_value)) - USING parent_relid, p_start_value, p_start_value + p_interval; + EXECUTE + format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s, tablespace:=$4);', + pg_typeof(p_start_value)) + USING + parent_relid, + p_start_value, + p_start_value + p_interval, + v_tablespace; p_start_value := p_start_value + p_interval; part_count := part_count + 1; @@ -419,7 +450,8 @@ CREATE OR REPLACE FUNCTION @extschema@.create_single_range_partition( parent_relid REGCLASS, p_start_value ANYELEMENT, p_end_value ANYELEMENT, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL, + tablespace TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -431,7 +463,7 @@ DECLARE v_plain_relname TEXT; v_child_relname_exists BOOL; v_seq_name TEXT; - + v_create_table_query TEXT; BEGIN v_attname := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; @@ -466,7 +498,15 @@ BEGIN v_child_relname := partition_name; END IF; - EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)', + v_create_table_query := 'CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)'; + + /* If tablespace is specified then add it to a create query */ + if NOT tablespace IS NULL THEN + v_create_table_query := v_create_table_query || ' TABLESPACE ' ||tablespace; + END IF; + RAISE NOTICE 'query: %', v_create_table_query; + + EXECUTE format(v_create_table_query, v_child_relname, parent_relid::TEXT); @@ -708,7 +748,8 @@ $$ LANGUAGE plpgsql; */ CREATE OR REPLACE FUNCTION @extschema@.append_range_partition( parent_relid REGCLASS, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL, + tablespace TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -734,13 +775,14 @@ BEGIN EXECUTE format( - 'SELECT @extschema@.append_partition_internal($1, $2, $3, ARRAY[]::%s[], $4)', + 'SELECT @extschema@.append_partition_internal($1, $2, $3, ARRAY[]::%s[], $4, $5)', v_atttype) USING parent_relid, v_atttype, v_interval, - partition_name + partition_name, + tablespace INTO v_part_name; @@ -762,7 +804,8 @@ CREATE OR REPLACE FUNCTION @extschema@.append_partition_internal( p_atttype TEXT, p_interval TEXT, p_range ANYARRAY DEFAULT NULL, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL, + tablespace TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -773,6 +816,11 @@ BEGIN RAISE EXCEPTION 'Cannot append to empty partitions set'; END IF; + /* If tablespace isn't specified then choose parent's tablespace */ + IF tablespace IS NULL THEN + tablespace := @extschema@.get_rel_tablespace_name(parent_relid); + END IF; + p_range := @extschema@.get_range_by_idx(parent_relid, -1, 0); IF @extschema@.is_date_type(p_atttype::regtype) THEN @@ -784,13 +832,14 @@ BEGIN ELSE EXECUTE format( - 'SELECT @extschema@.create_single_range_partition($1, $2, $2 + $3::%s, $4)', + 'SELECT @extschema@.create_single_range_partition($1, $2, $2 + $3::%s, $4, $5)', p_atttype) USING parent_relid, p_range[2], p_interval, - partition_name + partition_name, + tablespace INTO v_part_name; END IF; @@ -806,7 +855,8 @@ LANGUAGE plpgsql; */ CREATE OR REPLACE FUNCTION @extschema@.prepend_range_partition( parent_relid REGCLASS, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL, + tablespace TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -829,13 +879,14 @@ BEGIN EXECUTE format( - 'SELECT @extschema@.prepend_partition_internal($1, $2, $3, ARRAY[]::%s[], $4)', + 'SELECT @extschema@.prepend_partition_internal($1, $2, $3, ARRAY[]::%s[], $4, $5)', v_atttype) USING parent_relid, v_atttype, v_interval, - partition_name + partition_name, + tablespace INTO v_part_name; @@ -857,7 +908,8 @@ CREATE OR REPLACE FUNCTION @extschema@.prepend_partition_internal( p_atttype TEXT, p_interval TEXT, p_range ANYARRAY DEFAULT NULL, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL, + tablespace TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -868,6 +920,11 @@ BEGIN RAISE EXCEPTION 'Cannot prepend to empty partitions set'; END IF; + /* If tablespace isn't specified then choose parent's tablespace */ + IF tablespace IS NULL THEN + tablespace := @extschema@.get_rel_tablespace_name(parent_relid); + END IF; + p_range := @extschema@.get_range_by_idx(parent_relid, 0, 0); IF @extschema@.is_date_type(p_atttype::regtype) THEN @@ -879,13 +936,14 @@ BEGIN ELSE EXECUTE format( - 'SELECT @extschema@.create_single_range_partition($1, $2 - $3::%s, $2, $4)', + 'SELECT @extschema@.create_single_range_partition($1, $2 - $3::%s, $2, $4, $5)', p_atttype) USING parent_relid, p_range[1], p_interval, - partition_name + partition_name, + tablespace INTO v_part_name; END IF; @@ -903,7 +961,8 @@ CREATE OR REPLACE FUNCTION @extschema@.add_range_partition( parent_relid REGCLASS, p_start_value ANYELEMENT, p_end_value ANYELEMENT, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL, + tablespace TEXT DEFAULT NULL) RETURNS TEXT AS $$ DECLARE @@ -923,11 +982,17 @@ BEGIN RAISE EXCEPTION 'Specified range overlaps with existing partitions'; END IF; + /* If tablespace isn't specified then choose parent's tablespace */ + IF tablespace IS NULL THEN + tablespace := @extschema@.get_rel_tablespace_name(parent_relid); + END IF; + /* Create new partition */ v_part_name := @extschema@.create_single_range_partition(parent_relid, p_start_value, p_end_value, - partition_name); + partition_name, + tablespace); PERFORM @extschema@.on_update_partitions(parent_relid); RETURN v_part_name; diff --git a/src/pl_funcs.c b/src/pl_funcs.c index a647a73075..038f789291 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -19,6 +19,7 @@ #include "access/xact.h" #include "catalog/indexing.h" #include "commands/sequence.h" +#include "commands/tablespace.h" #include "miscadmin.h" #include "utils/array.h" #include "utils/builtins.h" @@ -55,6 +56,7 @@ PG_FUNCTION_INFO_V1( invalidate_relcache ); PG_FUNCTION_INFO_V1( lock_partitioned_relation ); PG_FUNCTION_INFO_V1( prevent_relation_modification ); PG_FUNCTION_INFO_V1( debug_capture ); +PG_FUNCTION_INFO_V1( get_rel_tablespace_name ); static void on_partitions_created_internal(Oid partitioned_table, bool add_callbacks); @@ -748,3 +750,29 @@ debug_capture(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +/* + * Return tablespace name for specified relation + */ +Datum +get_rel_tablespace_name(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Oid tablespace_id; + char *result; + + tablespace_id = get_rel_tablespace(relid); + + /* If tablespace id is InvalidOid then use the default tablespace */ + if (!OidIsValid(tablespace_id)) + { + tablespace_id = GetDefaultTablespace(get_rel_persistence(relid)); + + /* If tablespace is still invalid then use database's default */ + if (!OidIsValid(tablespace_id)) + tablespace_id = MyDatabaseTableSpace; + } + + result = get_tablespace_name(tablespace_id); + PG_RETURN_TEXT_P(cstring_to_text(result)); +} diff --git a/src/utils.c b/src/utils.c index 8be3c67b4b..c7651f81e7 100644 --- a/src/utils.c +++ b/src/utils.c @@ -636,3 +636,28 @@ get_rel_name_or_relid(Oid relid) return DatumGetCString(DirectFunctionCall1(regclassout, ObjectIdGetDatum(relid))); } + + +#if PG_VERSION_NUM < 90600 +/* + * Returns the relpersistence associated with a given relation. + * + * NOTE: this function is implemented in 9.6 + */ +char +get_rel_persistence(Oid relid) +{ + HeapTuple tp; + Form_pg_class reltup; + char result; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for relation %u", relid); + reltup = (Form_pg_class) GETSTRUCT(tp); + result = reltup->relpersistence; + ReleaseSysCache(tp); + + return result; +} +#endif diff --git a/src/utils.h b/src/utils.h index 96aeb29c9b..d355113943 100644 --- a/src/utils.h +++ b/src/utils.h @@ -66,5 +66,8 @@ void fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type2); char * datum_to_cstring(Datum datum, Oid typid); +#if PG_VERSION_NUM < 90600 +char get_rel_persistence(Oid relid); +#endif #endif From 2f8eb9060f80bc8e44e6bc4672ed83d83369016b Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 13 Sep 2016 15:17:26 +0300 Subject: [PATCH 130/184] tablespace tests --- range.sql | 6 +- tests/partitioning_test.py | 552 +++++++++++++++++++++---------------- 2 files changed, 323 insertions(+), 235 deletions(-) diff --git a/range.sql b/range.sql index 15ac8a90c1..1bbc79c6d3 100644 --- a/range.sql +++ b/range.sql @@ -828,7 +828,8 @@ BEGIN parent_relid, p_range[2], p_range[2] + p_interval::interval, - partition_name); + partition_name, + tablespace); ELSE EXECUTE format( @@ -932,7 +933,8 @@ BEGIN parent_relid, p_range[1] - p_interval::interval, p_range[1], - partition_name); + partition_name, + tablespace); ELSE EXECUTE format( diff --git a/tests/partitioning_test.py b/tests/partitioning_test.py index 6dd589b8db..f1c03706cb 100644 --- a/tests/partitioning_test.py +++ b/tests/partitioning_test.py @@ -1,4 +1,4 @@ -#coding: utf-8 +# coding: utf-8 """ concurrent_partitioning_test.py Tests concurrent partitioning worker with simultaneous update queries @@ -7,243 +7,329 @@ """ import unittest -from testgres import get_new_node, clean_all, stop_all -from subprocess import Popen, PIPE -import subprocess +from testgres import get_new_node, stop_all import time +import os class PartitioningTests(unittest.TestCase): - def setUp(self): - self.setup_cmd = [ - 'create extension pg_pathman', - 'create table abc(id serial, t text)', - 'insert into abc select generate_series(1, 300000)', - 'select create_hash_partitions(\'abc\', \'id\', 3, partition_data := false)', - ] - - def tearDown(self): - stop_all() - # clean_all() - - def init_test_data(self, node): - """Initialize pg_pathman extension and test data""" - for cmd in self.setup_cmd: - node.safe_psql('postgres', cmd) - - def catchup_replica(self, master, replica): - """Wait until replica synchronizes with master""" - master.poll_query_until( - 'postgres', - 'SELECT pg_current_xlog_location() <= replay_location ' - 'FROM pg_stat_replication WHERE application_name = \'%s\'' - % replica.name) - - def printlog(self, logfile): - with open(logfile, 'r') as log: - for line in log.readlines(): - print line - - def test_concurrent(self): - """Tests concurrent partitioning""" - node = get_new_node('test') - try: - node.init() - node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') - node.start() - self.init_test_data(node) - - node.psql('postgres', 'select partition_table_concurrently(\'abc\')') - - while True: - # update some rows to check for deadlocks - # import ipdb; ipdb.set_trace() - node.safe_psql('postgres', - ''' - update abc set t = 'test' - where id in (select (random() * 300000)::int from generate_series(1, 3000)) - ''') - - count = node.execute('postgres', 'select count(*) from pathman_concurrent_part_tasks') - - # if there is no active workers then it means work is done - if count[0][0] == 0: - break - time.sleep(1) - - data = node.execute('postgres', 'select count(*) from only abc') - self.assertEqual(data[0][0], 0) - data = node.execute('postgres', 'select count(*) from abc') - self.assertEqual(data[0][0], 300000) - - node.stop() - except Exception, e: - self.printlog(node.logs_dir + '/postgresql.log') - raise e - - def test_replication(self): - """Tests how pg_pathman works with replication""" - node = get_new_node('master') - replica = get_new_node('repl') - - try: - # initialize master server - node.init(allows_streaming=True) - node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') - node.start() - node.backup('my_backup') - - # initialize replica from backup - replica.init_from_backup(node, 'my_backup', has_streaming=True) - replica.start() - - # initialize pg_pathman extension and some test data - self.init_test_data(node) - - # wait until replica catches up - self.catchup_replica(node, replica) - - # check that results are equal - self.assertEqual( - node.psql('postgres', 'explain (costs off) select * from abc'), - replica.psql('postgres', 'explain (costs off) select * from abc') - ) - - # enable parent and see if it is enabled in replica - node.psql('postgres', 'select enable_parent(\'abc\'') - - self.catchup_replica(node, replica) - self.assertEqual( - node.psql('postgres', 'explain (costs off) select * from abc'), - replica.psql('postgres', 'explain (costs off) select * from abc') - ) - self.assertEqual( - node.psql('postgres', 'select * from abc'), - replica.psql('postgres', 'select * from abc') - ) - self.assertEqual( - node.execute('postgres', 'select count(*) from abc')[0][0], - 300000 - ) - - # check that direct UPDATE in pathman_config_params invalidates - # cache - node.psql( - 'postgres', - 'update pathman_config_params set enable_parent = false') - self.catchup_replica(node, replica) - self.assertEqual( - node.psql('postgres', 'explain (costs off) select * from abc'), - replica.psql('postgres', 'explain (costs off) select * from abc') - ) - self.assertEqual( - node.psql('postgres', 'select * from abc'), - replica.psql('postgres', 'select * from abc') - ) - self.assertEqual( - node.execute('postgres', 'select count(*) from abc')[0][0], - 0 - ) - except Exception, e: - self.printlog(node.logs_dir + '/postgresql.log') - self.printlog(replica.logs_dir + '/postgresql.log') - raise e - - def test_locks(self): - """Test that a session trying to create new partitions waits for other - sessions if they doing the same""" - - import threading - import time - - class Flag: - def __init__(self, value): - self.flag = value - - def set(self, value): - self.flag = value - - def get(self): - return self.flag - - # There is one flag for each thread which shows if thread have done - # its work - flags = [Flag(False) for i in xrange(3)] - - # All threads synchronizes though this lock - lock = threading.Lock() - - # Define thread function - def add_partition(node, flag, query): - """ We expect that this query will wait until another session - commits or rolls back""" - node.safe_psql('postgres', query) - with lock: - flag.set(True) - - # Initialize master server - node = get_new_node('master') - - try: - node.init() - node.append_conf('postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') - node.start() - node.safe_psql( - 'postgres', - 'create extension pg_pathman; ' - + 'create table abc(id serial, t text); ' - + 'insert into abc select generate_series(1, 100000); ' - + 'select create_range_partitions(\'abc\', \'id\', 1, 50000);' - ) - - # Start transaction that will create partition - con = node.connect() - con.begin() - con.execute('select append_range_partition(\'abc\')') - - # Start threads that suppose to add new partitions and wait some time - query = [ - 'select prepend_range_partition(\'abc\')', - 'select append_range_partition(\'abc\')', - 'select add_range_partition(\'abc\', 500000, 550000)', - ] - threads = [] - for i in range(3): - thread = \ - threading.Thread(target=add_partition, args=(node, flags[i], query[i])) - threads.append(thread) - thread.start() - time.sleep(3) - - # This threads should wait until current transaction finished - with lock: - for i in range(3): - self.assertEqual(flags[i].get(), False) - - # Commit transaction. Since then other sessions can create partitions - con.commit() - - # Now wait until each thread finishes - for i in range(3): - threads[i].join() - - # Check flags, it should be true which means that threads are finished - with lock: - for i in range(3): - self.assertEqual(flags[i].get(), True) - - # Check that all partitions are created - self.assertEqual( - node.safe_psql( - 'postgres', - 'select count(*) from pg_inherits where inhparent=\'abc\'::regclass' - ), - '6\n' - ) - except Exception, e: - self.printlog(node.logs_dir + '/postgresql.log') - raise e + def setUp(self): + self.setup_cmd = [ + 'create extension pg_pathman', + 'create table abc(id serial, t text)', + 'insert into abc select generate_series(1, 300000)', + 'select create_hash_partitions(\'abc\', \'id\', 3, partition_data := false)', + ] + + def tearDown(self): + stop_all() + # clean_all() + + def init_test_data(self, node): + """Initialize pg_pathman extension and test data""" + for cmd in self.setup_cmd: + node.safe_psql('postgres', cmd) + + def catchup_replica(self, master, replica): + """Wait until replica synchronizes with master""" + master.poll_query_until( + 'postgres', + 'SELECT pg_current_xlog_location() <= replay_location ' + 'FROM pg_stat_replication WHERE application_name = \'%s\'' + % replica.name) + + def printlog(self, logfile): + with open(logfile, 'r') as log: + for line in log.readlines(): + print line + + def test_concurrent(self): + """Tests concurrent partitioning""" + node = get_new_node('test') + try: + node.init() + node.append_conf( + 'postgresql.conf', + 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + self.init_test_data(node) + + node.psql( + 'postgres', + 'select partition_table_concurrently(\'abc\')') + + while True: + # update some rows to check for deadlocks + node.safe_psql( + 'postgres', + ''' + update abc set t = 'test' + where id in (select (random() * 300000)::int + from generate_series(1, 3000)) + ''') + + count = node.execute( + 'postgres', + 'select count(*) from pathman_concurrent_part_tasks') + + # if there is no active workers then it means work is done + if count[0][0] == 0: + break + time.sleep(1) + + data = node.execute('postgres', 'select count(*) from only abc') + self.assertEqual(data[0][0], 0) + data = node.execute('postgres', 'select count(*) from abc') + self.assertEqual(data[0][0], 300000) + + node.stop() + except Exception, e: + self.printlog(node.logs_dir + '/postgresql.log') + raise e + + def test_replication(self): + """Tests how pg_pathman works with replication""" + node = get_new_node('master') + replica = get_new_node('repl') + + try: + # initialize master server + node.init(allows_streaming=True) + node.append_conf( + 'postgresql.conf', + 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + node.backup('my_backup') + + # initialize replica from backup + replica.init_from_backup(node, 'my_backup', has_streaming=True) + replica.start() + + # initialize pg_pathman extension and some test data + self.init_test_data(node) + + # wait until replica catches up + self.catchup_replica(node, replica) + + # check that results are equal + self.assertEqual( + node.psql('postgres', 'explain (costs off) select * from abc'), + replica.psql('postgres', 'explain (costs off) select * from abc') + ) + + # enable parent and see if it is enabled in replica + node.psql('postgres', 'select enable_parent(\'abc\'') + + self.catchup_replica(node, replica) + self.assertEqual( + node.psql('postgres', 'explain (costs off) select * from abc'), + replica.psql('postgres', 'explain (costs off) select * from abc') + ) + self.assertEqual( + node.psql('postgres', 'select * from abc'), + replica.psql('postgres', 'select * from abc') + ) + self.assertEqual( + node.execute('postgres', 'select count(*) from abc')[0][0], + 300000 + ) + + # check that direct UPDATE in pathman_config_params invalidates + # cache + node.psql( + 'postgres', + 'update pathman_config_params set enable_parent = false') + self.catchup_replica(node, replica) + self.assertEqual( + node.psql('postgres', 'explain (costs off) select * from abc'), + replica.psql('postgres', 'explain (costs off) select * from abc') + ) + self.assertEqual( + node.psql('postgres', 'select * from abc'), + replica.psql('postgres', 'select * from abc') + ) + self.assertEqual( + node.execute('postgres', 'select count(*) from abc')[0][0], + 0 + ) + except Exception, e: + self.printlog(node.logs_dir + '/postgresql.log') + self.printlog(replica.logs_dir + '/postgresql.log') + raise e + + def test_locks(self): + """Test that a session trying to create new partitions waits for other + sessions if they doing the same""" + + import threading + import time + + class Flag: + def __init__(self, value): + self.flag = value + + def set(self, value): + self.flag = value + + def get(self): + return self.flag + + # There is one flag for each thread which shows if thread have done + # its work + flags = [Flag(False) for i in xrange(3)] + + # All threads synchronizes though this lock + lock = threading.Lock() + + # Define thread function + def add_partition(node, flag, query): + """ We expect that this query will wait until another session + commits or rolls back""" + node.safe_psql('postgres', query) + with lock: + flag.set(True) + + # Initialize master server + node = get_new_node('master') + + try: + node.init() + node.append_conf( + 'postgresql.conf', + 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + node.safe_psql( + 'postgres', + 'create extension pg_pathman; ' + + 'create table abc(id serial, t text); ' + + 'insert into abc select generate_series(1, 100000); ' + + 'select create_range_partitions(\'abc\', \'id\', 1, 50000);' + ) + + # Start transaction that will create partition + con = node.connect() + con.begin() + con.execute('select append_range_partition(\'abc\')') + + # Start threads that suppose to add new partitions and wait some + # time + query = [ + 'select prepend_range_partition(\'abc\')', + 'select append_range_partition(\'abc\')', + 'select add_range_partition(\'abc\', 500000, 550000)', + ] + threads = [] + for i in range(3): + thread = threading.Thread( + target=add_partition, + args=(node, flags[i], query[i])) + threads.append(thread) + thread.start() + time.sleep(3) + + # This threads should wait until current transaction finished + with lock: + for i in range(3): + self.assertEqual(flags[i].get(), False) + + # Commit transaction. Since then other sessions can create + # partitions + con.commit() + + # Now wait until each thread finishes + for i in range(3): + threads[i].join() + + # Check flags, it should be true which means that threads are + # finished + with lock: + for i in range(3): + self.assertEqual(flags[i].get(), True) + + # Check that all partitions are created + self.assertEqual( + node.safe_psql( + 'postgres', + 'select count(*) from pg_inherits where inhparent=\'abc\'::regclass' + ), + '6\n' + ) + except Exception, e: + self.printlog(node.logs_dir + '/postgresql.log') + raise e + + def test_tablespace(self): + """Check tablespace support""" + + def check_tablespace(node, tablename, tablespace): + res = node.execute( + 'postgres', + 'select get_rel_tablespace_name(\'{}\')'.format(tablename)) + if len(res) == 0: + return False + + return res[0][0] == tablespace + + node = get_new_node('master') + node.init() + node.append_conf( + 'postgresql.conf', + 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + path = os.path.join(node.data_dir, 'test_space_location') + os.mkdir(path) + node.psql('postgres', 'create extension pg_pathman') + + # create tablespace + node.psql( + 'postgres', + 'create tablespace test_space location \'{}\''.format(path)) + + # create table in this tablespace + node.psql( + 'postgres', + 'create table abc(a serial, b int) tablespace test_space') + + # create three partitions. Excpect that they will be created in the + # same tablespace as the parent table + node.psql( + 'postgres', + 'select create_range_partitions(\'abc\', \'a\', 1, 10, 3)') + self.assertTrue(check_tablespace(node, 'abc', 'test_space')) + + # check tablespace for appended partition + node.psql( + 'postgres', + 'select append_range_partition(\'abc\', \'abc_appended\')') + self.assertTrue(check_tablespace(node, 'abc_appended', 'test_space')) + + # check tablespace for prepended partition + node.psql( + 'postgres', + 'select prepend_range_partition(\'abc\', \'abc_prepended\')') + self.assertTrue(check_tablespace(node, 'abc_prepended', 'test_space')) + + # check tablespace for prepended partition + node.psql( + 'postgres', + 'select add_range_partition(\'abc\', 41, 51, \'abc_added\')') + self.assertTrue(check_tablespace(node, 'abc_added', 'test_space')) + + # now let's specify tablespace explicitly + node.psql( + 'postgres', + 'select append_range_partition(\'abc\', \'abc_appended_2\', \'pg_default\')') + node.psql( + 'postgres', + 'select prepend_range_partition(\'abc\', \'abc_prepended_2\', \'pg_default\')') + node.psql( + 'postgres', + 'select add_range_partition(\'abc\', 61, 71, \'abc_added_2\', \'pg_default\')') + self.assertTrue(check_tablespace(node, 'abc_appended_2', 'pg_default')) + self.assertTrue(check_tablespace(node, 'abc_prepended_2', 'pg_default')) + self.assertTrue(check_tablespace(node, 'abc_added_2', 'pg_default')) + if __name__ == "__main__": unittest.main() From 6ae4959b6ae8134e4971602716713c6e77a9dbf6 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 13 Sep 2016 19:18:19 +0300 Subject: [PATCH 131/184] rename get_attribute_type_name() -> get_attribute_type(), introduce function get_base_type(), improve VAR handling in read_opexpr_const(), fix partitioning of domains --- hash.sql | 25 +++++++------ init.sql | 12 +++++-- range.sql | 52 +++++++++++++-------------- src/hooks.c | 1 - src/init.c | 81 ++++++++++++++++++++++++++++++++++++------ src/partition_filter.c | 1 - src/pg_pathman.c | 32 ++++++++++++----- src/pl_funcs.c | 37 +++++++++---------- src/utils.c | 5 +++ 9 files changed, 164 insertions(+), 82 deletions(-) diff --git a/hash.sql b/hash.sql index fd113aad3e..dc807fb172 100644 --- a/hash.sql +++ b/hash.sql @@ -20,10 +20,10 @@ RETURNS INTEGER AS $$ DECLARE v_child_relname TEXT; - v_type TEXT; v_plain_schema TEXT; v_plain_relname TEXT; - v_hashfunc TEXT; + v_atttype REGTYPE; + v_hashfunc REGPROC; BEGIN IF partition_data = true THEN @@ -38,13 +38,13 @@ BEGIN attribute := lower(attribute); PERFORM @extschema@.common_relation_checks(parent_relid, attribute); - v_type := @extschema@.get_attribute_type_name(parent_relid, attribute); + /* Fetch atttype and its hash function */ + v_atttype := @extschema@.get_attribute_type(parent_relid, attribute); + v_hashfunc := @extschema@.get_type_hash_func(v_atttype); SELECT * INTO v_plain_schema, v_plain_relname FROM @extschema@.get_plain_schema_and_relname(parent_relid); - v_hashfunc := @extschema@.get_type_hash_func(v_type::regtype)::regproc; - /* Insert new entry to pathman config */ INSERT INTO @extschema@.pathman_config (partrel, attname, parttype) VALUES (parent_relid, attribute, 1); @@ -65,7 +65,7 @@ BEGIN v_child_relname, @extschema@.build_check_constraint_name(v_child_relname::REGCLASS, attribute), - v_hashfunc, + v_hashfunc::TEXT, attribute, partitions_count, partnum); @@ -137,7 +137,7 @@ DECLARE child_relname_format TEXT; funcname TEXT; triggername TEXT; - atttype TEXT; + atttype REGTYPE; hashfunc TEXT; partitions_count INTEGER; @@ -180,13 +180,12 @@ BEGIN quote_ident(plain_relname || '_%s'); /* Fetch base hash function for atttype */ - atttype := @extschema@.get_attribute_type_name(parent_relid, attr); - hashfunc := @extschema@.get_type_hash_func(atttype::regtype)::regproc; + atttype := @extschema@.get_attribute_type(parent_relid, attr); /* Format function definition and execute it */ - func := format(func, funcname, attr, partitions_count, att_val_fmt, - old_fields, att_fmt, new_fields, child_relname_format, hashfunc); - EXECUTE func; + EXECUTE format(func, funcname, attr, partitions_count, att_val_fmt, + old_fields, att_fmt, new_fields, child_relname_format, + @extschema@.get_type_hash_func(atttype)::TEXT); /* Create trigger on every partition */ FOR num IN 0..partitions_count-1 @@ -205,7 +204,7 @@ $$ LANGUAGE plpgsql; * Returns hash function OID for specified type */ CREATE OR REPLACE FUNCTION @extschema@.get_type_hash_func(REGTYPE) -RETURNS OID AS 'pg_pathman', 'get_type_hash_func' +RETURNS REGPROC AS 'pg_pathman', 'get_type_hash_func' LANGUAGE C STRICT; /* diff --git a/init.sql b/init.sql index 8d9f6e6b8b..0d082cd322 100644 --- a/init.sql +++ b/init.sql @@ -614,6 +614,14 @@ CREATE OR REPLACE FUNCTION @extschema@.get_parent_of_partition(REGCLASS) RETURNS REGCLASS AS 'pg_pathman', 'get_parent_of_partition_pl' LANGUAGE C STRICT; +/* + * Extract basic type of a domain. + */ +CREATE OR REPLACE FUNCTION @extschema@.get_base_type(REGTYPE) +RETURNS REGTYPE AS 'pg_pathman', 'get_base_type_pl' +LANGUAGE C STRICT; + + /* * Checks if attribute is nullable */ @@ -633,9 +641,9 @@ LANGUAGE C STRICT; /* * Returns attribute type name for relation */ -CREATE OR REPLACE FUNCTION @extschema@.get_attribute_type_name( +CREATE OR REPLACE FUNCTION @extschema@.get_attribute_type( REGCLASS, TEXT) -RETURNS TEXT AS 'pg_pathman', 'get_attribute_type_name' +RETURNS REGTYPE AS 'pg_pathman', 'get_attribute_type_pl' LANGUAGE C STRICT; /* diff --git a/range.sql b/range.sql index d837df0c10..3b7eb906c5 100644 --- a/range.sql +++ b/range.sql @@ -138,7 +138,7 @@ BEGIN p_attribute, p_start_value, p_start_value + p_interval * p_count, - pg_typeof(p_start_value)); + @extschema@.get_base_type(pg_typeof(p_start_value))::TEXT); END IF; /* Create sequence for child partitions names */ @@ -153,7 +153,7 @@ BEGIN FOR i IN 1..p_count LOOP EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s)', - pg_typeof(p_start_value)) + @extschema@.get_base_type(pg_typeof(p_start_value))::TEXT) USING parent_relid, p_start_value, p_start_value + p_interval; p_start_value := p_start_value + p_interval; @@ -283,7 +283,7 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( p_start_value ANYELEMENT, p_end_value ANYELEMENT, p_interval ANYELEMENT, - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT true) RETURNS INTEGER AS $$ DECLARE @@ -353,7 +353,7 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( p_start_value ANYELEMENT, p_end_value ANYELEMENT, p_interval INTERVAL, - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT true) RETURNS INTEGER AS $$ DECLARE @@ -389,7 +389,7 @@ BEGIN WHILE p_start_value <= p_end_value LOOP EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s);', - pg_typeof(p_start_value)) + @extschema@.get_base_type(pg_typeof(p_start_value))::TEXT) USING parent_relid, p_start_value, p_start_value + p_interval; p_start_value := p_start_value + p_interval; @@ -588,7 +588,7 @@ DECLARE v_parent_relid2 REGCLASS; v_attname TEXT; v_part_type INTEGER; - v_atttype TEXT; + v_atttype REGTYPE; BEGIN IF partition1 = partition2 THEN @@ -623,10 +623,10 @@ BEGIN RAISE EXCEPTION 'Specified partitions aren''t RANGE partitions'; END IF; - v_atttype := @extschema@.get_attribute_type_name(partition1, v_attname); + v_atttype := @extschema@.get_attribute_type(partition1, v_attname); EXECUTE format('SELECT @extschema@.merge_range_partitions_internal($1, $2, $3, NULL::%s)', - v_atttype) + @extschema@.get_base_type(v_atttype)::TEXT) USING v_parent_relid1, partition1, partition2; /* Tell backend to reload configuration */ @@ -713,7 +713,7 @@ RETURNS TEXT AS $$ DECLARE v_attname TEXT; - v_atttype TEXT; + v_atttype REGTYPE; v_part_name TEXT; v_interval TEXT; @@ -730,12 +730,11 @@ BEGIN RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; END IF; - v_atttype := @extschema@.get_attribute_type_name(parent_relid, v_attname); + v_atttype := @extschema@.get_attribute_type(parent_relid, v_attname); EXECUTE - format( - 'SELECT @extschema@.append_partition_internal($1, $2, $3, ARRAY[]::%s[], $4)', - v_atttype) + format('SELECT @extschema@.append_partition_internal($1, $2, $3, ARRAY[]::%s[], $4)', + @extschema@.get_base_type(v_atttype)::TEXT) USING parent_relid, v_atttype, @@ -759,7 +758,7 @@ LANGUAGE plpgsql; */ CREATE OR REPLACE FUNCTION @extschema@.append_partition_internal( parent_relid REGCLASS, - p_atttype TEXT, + p_atttype REGTYPE, p_interval TEXT, p_range ANYARRAY DEFAULT NULL, partition_name TEXT DEFAULT NULL) @@ -775,7 +774,7 @@ BEGIN p_range := @extschema@.get_range_by_idx(parent_relid, -1, 0); - IF @extschema@.is_date_type(p_atttype::regtype) THEN + IF @extschema@.is_date_type(p_atttype) THEN v_part_name := @extschema@.create_single_range_partition( parent_relid, p_range[2], @@ -783,9 +782,8 @@ BEGIN partition_name); ELSE EXECUTE - format( - 'SELECT @extschema@.create_single_range_partition($1, $2, $2 + $3::%s, $4)', - p_atttype) + format('SELECT @extschema@.create_single_range_partition($1, $2, $2 + $3::%s, $4)', + @extschema@.get_base_type(p_atttype)::TEXT) USING parent_relid, p_range[2], @@ -811,7 +809,7 @@ RETURNS TEXT AS $$ DECLARE v_attname TEXT; - v_atttype TEXT; + v_atttype REGTYPE; v_part_name TEXT; v_interval TEXT; @@ -825,12 +823,11 @@ BEGIN RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; END IF; - v_atttype := @extschema@.get_attribute_type_name(parent_relid, v_attname); + v_atttype := @extschema@.get_attribute_type(parent_relid, v_attname); EXECUTE - format( - 'SELECT @extschema@.prepend_partition_internal($1, $2, $3, ARRAY[]::%s[], $4)', - v_atttype) + format('SELECT @extschema@.prepend_partition_internal($1, $2, $3, ARRAY[]::%s[], $4)', + @extschema@.get_base_type(v_atttype)::TEXT) USING parent_relid, v_atttype, @@ -854,7 +851,7 @@ LANGUAGE plpgsql; */ CREATE OR REPLACE FUNCTION @extschema@.prepend_partition_internal( parent_relid REGCLASS, - p_atttype TEXT, + p_atttype REGTYPE, p_interval TEXT, p_range ANYARRAY DEFAULT NULL, partition_name TEXT DEFAULT NULL) @@ -870,7 +867,7 @@ BEGIN p_range := @extschema@.get_range_by_idx(parent_relid, 0, 0); - IF @extschema@.is_date_type(p_atttype::regtype) THEN + IF @extschema@.is_date_type(p_atttype) THEN v_part_name := @extschema@.create_single_range_partition( parent_relid, p_range[1] - p_interval::interval, @@ -878,9 +875,8 @@ BEGIN partition_name); ELSE EXECUTE - format( - 'SELECT @extschema@.create_single_range_partition($1, $2 - $3::%s, $2, $4)', - p_atttype) + format('SELECT @extschema@.create_single_range_partition($1, $2 - $3::%s, $2, $4)', + @extschema@.get_base_type(p_atttype)::TEXT) USING parent_relid, p_range[1], diff --git a/src/hooks.c b/src/hooks.c index 6db5434be9..5f3e5b4046 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -550,7 +550,6 @@ pathman_relcache_hook(Datum arg, Oid relid) /* Both syscache and pathman's cache say it isn't a partition */ case PPS_ENTRY_NOT_FOUND: { - /* NOTE: Remove NOT_USED when it's time */ delay_invalidation_parent_rel(partitioned_table); #ifdef NOT_USED elog(DEBUG2, "Invalidation message for relation %u [%u]", diff --git a/src/init.c b/src/init.c index 60eff1adb3..f27a25e9ca 100644 --- a/src/init.c +++ b/src/init.c @@ -27,6 +27,7 @@ #include "catalog/pg_type.h" #include "miscadmin.h" #include "optimizer/clauses.h" +#include "parser/parse_coerce.h" #include "utils/datum.h" #include "utils/inval.h" #include "utils/builtins.h" @@ -854,7 +855,9 @@ validate_range_constraint(const Expr *expr, } /* - * Reads const value from expressions of kind: VAR >= CONST or VAR < CONST + * Reads const value from expressions of kind: + * 1) VAR >= CONST OR VAR < CONST + * 2) RELABELTYPE(VAR) >= CONST OR RELABELTYPE(VAR) < CONST */ static bool read_opexpr_const(const OpExpr *opexpr, @@ -863,6 +866,7 @@ read_opexpr_const(const OpExpr *opexpr, { const Node *left; const Node *right; + const Var *part_attr; /* partitioned column */ const Const *constant; if (list_length(opexpr->args) != 2) @@ -871,24 +875,81 @@ read_opexpr_const(const OpExpr *opexpr, left = linitial(opexpr->args); right = lsecond(opexpr->args); - if (!IsA(left, Var) || !IsA(right, Const)) - return false; - if (((Var *) left)->varoattno != prel->attnum) + /* VAR is a part of RelabelType node */ + if (IsA(left, RelabelType) && IsA(right, Const)) + { + Var *var = (Var *) ((RelabelType *) left)->arg; + + if (IsA(var, Var)) + part_attr = var; + else + return false; + } + /* left arg is of type VAR */ + else if (IsA(left, Var) && IsA(right, Const)) + { + part_attr = (Var *) left; + } + /* Something is wrong, retreat! */ + else return false; + + /* VAR.attno == partitioned attribute number */ + if (part_attr->varoattno != prel->attnum) return false; + + /* CONST is NOT NULL */ if (((Const *) right)->constisnull) return false; constant = (Const *) right; - /* Check that types match */ - if (prel->atttype != constant->consttype) + /* Check that types are binary coercible */ + if (IsBinaryCoercible(constant->consttype, prel->atttype)) { - elog(WARNING, "Constant type in some check constraint does " - "not match the partitioned column's type"); - return false; + *val = constant->constvalue; } + /* If not, try to perfrom a type cast */ + else + { + CoercionPathType ret; + Oid castfunc = InvalidOid; + + ret = find_coercion_pathway(prel->atttype, constant->consttype, + COERCION_EXPLICIT, &castfunc); - *val = constant->constvalue; + switch (ret) + { + /* There's a function */ + case COERCION_PATH_FUNC: + { + /* Perform conversion */ + Assert(castfunc != InvalidOid); + *val = OidFunctionCall1(castfunc, constant->constvalue); + } + break; + + /* Types are binary compatible (no implicit cast) */ + case COERCION_PATH_RELABELTYPE: + { + /* We don't perform any checks here */ + *val = constant->constvalue; + } + break; + + /* TODO: implement these if needed */ + case COERCION_PATH_ARRAYCOERCE: + case COERCION_PATH_COERCEVIAIO: + + /* There's no cast available */ + case COERCION_PATH_NONE: + default: + { + elog(WARNING, "Constant type in some check constraint " + "does not match the partitioned column's type"); + return false; + } + } + } return true; } diff --git a/src/partition_filter.c b/src/partition_filter.c index 85e0649004..897105a1b2 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -253,7 +253,6 @@ partition_filter_end(CustomScanState *node) hash_seq_init(&stat, state->result_rels_table); while ((rri_handle = (ResultRelInfoHolder *) hash_seq_search(&stat)) != NULL) { - /* FIXME: add ResultRelInfos to estate->es_result_relations to fix triggers */ ExecCloseIndices(rri_handle->resultRelInfo); heap_close(rri_handle->resultRelInfo->ri_RelationDesc, RowExclusiveLock); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 7359b693a3..681047d111 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -857,9 +857,16 @@ extract_binary_interval_from_text(Datum interval_text, /* interval as TEXT */ else elog(ERROR, "Cannot find input function for type %u", part_atttype); - /* Convert interval from CSTRING to 'prel->atttype' */ - interval_binary = OidFunctionCall1(typein_proc, - CStringGetDatum(interval_cstring)); + /* + * Convert interval from CSTRING to 'prel->atttype'. + * + * Note: We pass 3 arguments in case + * 'typein_proc' also takes Oid & typmod. + */ + interval_binary = OidFunctionCall3(typein_proc, + CStringGetDatum(interval_cstring), + ObjectIdGetDatum(part_atttype), + Int32GetDatum(-1)); if (interval_type) *interval_type = part_atttype; } @@ -887,8 +894,11 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) /* Get both PartRelationInfo & PATHMAN_CONFIG contents for this relation */ if (pathman_config_contains_relation(relid, values, isnull, NULL)) { - Datum min_rvalue, - max_rvalue; + Oid base_atttype; /* base type of prel->atttype */ + Oid base_value_type; /* base type of value_type */ + + Datum min_rvalue, /* absolute MIN */ + max_rvalue; /* absolute MAX */ Oid interval_type = InvalidOid; Datum interval_binary, /* assigned 'width' of a single partition */ @@ -900,6 +910,10 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) prel = get_pathman_relation_info(relid); shout_if_prel_is_invalid(relid, prel, PT_RANGE); + /* Fetch base types of prel->atttype & value_type */ + base_atttype = getBaseType(prel->atttype); + base_value_type = getBaseType(value_type); + /* Read max & min range values from PartRelationInfo */ min_rvalue = prel->ranges[0].min; max_rvalue = prel->ranges[PrelLastChild(prel)].max; @@ -909,23 +923,23 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) /* Convert interval to binary representation */ interval_binary = extract_binary_interval_from_text(interval_text, - prel->atttype, + base_atttype, &interval_type); /* Fill the FmgrInfo struct with a cmp(value, part_attribute) function */ - fill_type_cmp_fmgr_info(&interval_type_cmp, value_type, prel->atttype); + fill_type_cmp_fmgr_info(&interval_type_cmp, base_value_type, base_atttype); if (SPI_connect() != SPI_OK_CONNECT) elog(ERROR, "Could not connect using SPI"); /* while (value >= MAX) ... */ spawn_partitions(PrelParentRelid(prel), value, max_rvalue, - prel->atttype, &interval_type_cmp, interval_binary, + base_atttype, &interval_type_cmp, interval_binary, interval_type, true, &partid); /* while (value < MIN) ... */ spawn_partitions(PrelParentRelid(prel), value, min_rvalue, - prel->atttype, &interval_type_cmp, interval_binary, + base_atttype, &interval_type_cmp, interval_binary, interval_type, false, &partid); SPI_finish(); /* close SPI connection */ diff --git a/src/pl_funcs.c b/src/pl_funcs.c index a647a73075..a5522ed02e 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -34,7 +34,8 @@ PG_FUNCTION_INFO_V1( on_partitions_created ); PG_FUNCTION_INFO_V1( on_partitions_updated ); PG_FUNCTION_INFO_V1( on_partitions_removed ); PG_FUNCTION_INFO_V1( get_parent_of_partition_pl ); -PG_FUNCTION_INFO_V1( get_attribute_type_name ); +PG_FUNCTION_INFO_V1( get_base_type_pl ); +PG_FUNCTION_INFO_V1( get_attribute_type_pl ); PG_FUNCTION_INFO_V1( find_or_create_range_partition); PG_FUNCTION_INFO_V1( get_range_by_idx ); PG_FUNCTION_INFO_V1( get_range_by_part_oid ); @@ -159,14 +160,24 @@ get_parent_of_partition_pl(PG_FUNCTION_ARGS) } /* - * Get type (as text) of a given attribute. + * Extract basic type of a domain. */ Datum -get_attribute_type_name(PG_FUNCTION_ARGS) +get_base_type_pl(PG_FUNCTION_ARGS) +{ + PG_RETURN_OID(getBaseType(PG_GETARG_OID(0))); +} + + +/* + * Get type (as REGTYPE) of a given attribute. + */ +Datum +get_attribute_type_pl(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); text *attname = PG_GETARG_TEXT_P(1); - char *result; + Oid result; HeapTuple tp; /* NOTE: for now it's the most efficient way */ @@ -174,10 +185,10 @@ get_attribute_type_name(PG_FUNCTION_ARGS) if (HeapTupleIsValid(tp)) { Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); - result = format_type_be(att_tup->atttypid); + result = att_tup->atttypid; ReleaseSysCache(tp); - PG_RETURN_TEXT_P(cstring_to_text(result)); + PG_RETURN_OID(result); } else elog(ERROR, "Cannot find type name for attribute \"%s\" " @@ -425,6 +436,7 @@ get_hash_part_idx(PG_FUNCTION_ARGS) PG_RETURN_UINT32(hash_to_part_index(value, part_count)); } + /* * Traits. */ @@ -475,7 +487,6 @@ build_range_condition(PG_FUNCTION_ARGS) Oid min_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 1), max_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 2); - char *subst_str; /* substitution string */ char *result; /* This is not going to trigger (not now, at least), just for the safety */ @@ -483,18 +494,8 @@ build_range_condition(PG_FUNCTION_ARGS) elog(ERROR, "Cannot build range condition: " "boundaries should be of the same type"); - /* Check if we need single quotes */ - /* TODO: check for primitive types instead, that would be better */ - if (is_date_type_internal(min_bound_type) || - is_string_type_internal(min_bound_type)) - { - subst_str = "%1$s >= '%2$s' AND %1$s < '%3$s'"; - } - else - subst_str = "%1$s >= %2$s AND %1$s < %3$s"; - /* Create range condition CSTRING */ - result = psprintf(subst_str, + result = psprintf("%1$s >= '%2$s' AND %1$s < '%3$s'", text_to_cstring(attname), datum_to_cstring(min_bound, min_bound_type), datum_to_cstring(max_bound, max_bound_type)); diff --git a/src/utils.c b/src/utils.c index 8be3c67b4b..0ba2e48a50 100644 --- a/src/utils.c +++ b/src/utils.c @@ -246,6 +246,11 @@ fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, Oid type2) type1, type2, BTORDER_PROC); + + if (cmp_proc_oid == InvalidOid) + elog(ERROR, "Missing comparison function for types %u & %u", + type1, type2); + fmgr_info(cmp_proc_oid, finfo); return; From 2f5804f2732efbfd3f630f5aec52f1511160eb3c Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 14 Sep 2016 18:43:28 +0300 Subject: [PATCH 132/184] refactoring for PartitionFilter custom node (extract ResultRelInfo cache), introduce copy_stmt_hooking subsystem --- Makefile | 3 +- src/copy_stmt_hooking.c | 33 +++++ src/copy_stmt_hooking.h | 12 ++ src/hooks.c | 31 +++++ src/hooks.h | 9 ++ src/partition_filter.c | 291 +++++++++++++++++++++++----------------- src/partition_filter.h | 40 ++++-- src/pg_pathman.c | 2 + 8 files changed, 284 insertions(+), 137 deletions(-) create mode 100644 src/copy_stmt_hooking.c create mode 100644 src/copy_stmt_hooking.h diff --git a/Makefile b/Makefile index 4224a99b2c..9396f7dbc5 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,8 @@ MODULE_big = pg_pathman OBJS = src/init.o src/relation_info.o src/utils.o src/partition_filter.o src/runtimeappend.o \ src/runtime_merge_append.o src/pg_pathman.o src/dsm_array.o src/rangeset.o src/pl_funcs.o \ - src/pathman_workers.o src/hooks.o src/nodes_common.o src/xact_handling.o $(WIN32RES) + src/pathman_workers.o src/hooks.o src/nodes_common.o src/xact_handling.o src/copy_stmt_hooking.o \ + $(WIN32RES) EXTENSION = pg_pathman EXTVERSION = 1.0 diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c new file mode 100644 index 0000000000..66eda8254b --- /dev/null +++ b/src/copy_stmt_hooking.c @@ -0,0 +1,33 @@ +#include "copy_stmt_hooking.h" +#include "relation_info.h" + +#include "catalog/namespace.h" +#include "commands/copy.h" + + +/* + * Is pg_pathman supposed to handle this COPY stmt? + */ +bool +is_pathman_related_copy(Node *parsetree) +{ + CopyStmt *copy_stmt = (CopyStmt *) parsetree; + Oid partitioned_table; + + /* Check that it's a CopyStmt */ + if (!IsA(parsetree, CopyStmt)) + return false; + + /* Also check that stmt->relation exists */ + if (!copy_stmt->relation) + return false; + + /* TODO: select appropriate lock for COPY */ + partitioned_table = RangeVarGetRelid(copy_stmt->relation, NoLock, false); + + /* Check that relation is partitioned */ + if (get_pathman_relation_info(partitioned_table)) + return true; + + return false; +} diff --git a/src/copy_stmt_hooking.h b/src/copy_stmt_hooking.h new file mode 100644 index 0000000000..84f69bc6e9 --- /dev/null +++ b/src/copy_stmt_hooking.h @@ -0,0 +1,12 @@ +#ifndef COPY_STMT_HOOKING_H +#define COPY_STMT_HOOKING_H + + +#include "postgres.h" +#include "commands/copy.h" +#include "nodes/nodes.h" + + +bool is_pathman_related_copy(Node *parsetree); + +#endif diff --git a/src/hooks.c b/src/hooks.c index 6db5434be9..48f62b186e 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -8,6 +8,7 @@ * ------------------------------------------------------------------------ */ +#include "copy_stmt_hooking.h" #include "hooks.h" #include "init.h" #include "partition_filter.h" @@ -27,6 +28,7 @@ set_rel_pathlist_hook_type set_rel_pathlist_hook_next = NULL; planner_hook_type planner_hook_next = NULL; post_parse_analyze_hook_type post_parse_analyze_hook_next = NULL; shmem_startup_hook_type shmem_startup_hook_next = NULL; +ProcessUtility_hook_type process_utility_hook_next = NULL; /* Take care of joins */ @@ -574,3 +576,32 @@ pathman_relcache_hook(Datum arg, Oid relid) break; } } + +/* + * Utility function invoker hook. + */ +void +pathman_process_utility_hook(Node *parsetree, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo params, + DestReceiver *dest, + char *completionTag) +{ + /* Call hooks set by other extensions */ + if (process_utility_hook_next) + process_utility_hook_next(parsetree, queryString, + context, params, + dest, completionTag); + + /* Override standard COPY statements if needed */ + if (is_pathman_related_copy(parsetree)) + { + elog(INFO, "copy!"); + } + + /* Call internal implementation */ + standard_ProcessUtility(parsetree, queryString, + context, params, + dest, completionTag); +} diff --git a/src/hooks.h b/src/hooks.h index 022387b130..5b349a3440 100644 --- a/src/hooks.h +++ b/src/hooks.h @@ -16,6 +16,7 @@ #include "optimizer/paths.h" #include "parser/analyze.h" #include "storage/ipc.h" +#include "tcop/utility.h" extern set_join_pathlist_hook_type set_join_pathlist_next; @@ -23,6 +24,7 @@ extern set_rel_pathlist_hook_type set_rel_pathlist_hook_next; extern planner_hook_type planner_hook_next; extern post_parse_analyze_hook_type post_parse_analyze_hook_next; extern shmem_startup_hook_type shmem_startup_hook_next; +extern ProcessUtility_hook_type process_utility_hook_next; void pathman_join_pathlist_hook(PlannerInfo *root, @@ -50,4 +52,11 @@ void pathman_shmem_startup_hook(void); void pathman_relcache_hook(Datum arg, Oid relid); +void pathman_process_utility_hook(Node *parsetree, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo params, + DestReceiver *dest, + char *completionTag); + #endif diff --git a/src/partition_filter.c b/src/partition_filter.c index 85e0649004..fb13963ecd 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -25,8 +25,9 @@ CustomScanMethods partition_filter_plan_methods; CustomExecMethods partition_filter_exec_methods; +static void partition_filter_visitor(Plan *plan, void *context); static List * pfilter_build_tlist(List *tlist); -static ResultRelInfo * getResultRelInfo(Oid partid, PartitionFilterState *state); + void init_partition_filter_static_data(void) @@ -55,6 +56,145 @@ init_partition_filter_static_data(void) NULL); } + +/* + * Add PartitionFilter nodes to the plan tree + */ +void +add_partition_filters(List *rtable, Plan *plan) +{ + if (pg_pathman_enable_partition_filter) + plan_tree_walker(plan, partition_filter_visitor, rtable); +} + + +/* + * Initialize ResultPartsStorage (hash table etc). + */ +void +init_result_parts_storage(ResultPartsStorage *parts_storage) +{ + HASHCTL *result_rels_table_config = &parts_storage->result_rels_table_config; + + memset(result_rels_table_config, 0, sizeof(HASHCTL)); + result_rels_table_config->keysize = sizeof(Oid); + result_rels_table_config->entrysize = sizeof(ResultRelInfoHolder); + + parts_storage->result_rels_table = hash_create("ResultRelInfo storage", 10, + result_rels_table_config, + HASH_ELEM | HASH_BLOBS); + + parts_storage->saved_rel_info = NULL; +} + +/* + * Free ResultPartsStorage (close relations etc). + */ +void +fini_result_parts_storage(ResultPartsStorage *parts_storage) +{ + HASH_SEQ_STATUS stat; + ResultRelInfoHolder *rri_handle; /* ResultRelInfo holder */ + + hash_seq_init(&stat, parts_storage->result_rels_table); + while ((rri_handle = (ResultRelInfoHolder *) hash_seq_search(&stat)) != NULL) + { + ExecCloseIndices(rri_handle->result_rel_info); + heap_close(rri_handle->result_rel_info->ri_RelationDesc, + RowExclusiveLock); + } + hash_destroy(parts_storage->result_rels_table); +} + +/* + * Find a ResultRelInfo for the partition using ResultPartsStorage. + */ +ResultRelInfo * +scan_result_parts_storage(Oid partid, + ResultPartsStorage *storage, + bool speculative_insertion) +{ +#define CopyToResultRelInfo(field_name) \ + ( resultRelInfo->field_name = storage->saved_rel_info->field_name ) + + ResultRelInfoHolder *resultRelInfoHolder; + bool found; + + resultRelInfoHolder = hash_search(storage->result_rels_table, + (const void *) &partid, + HASH_ENTER, &found); + + /* If not found, create & cache new ResultRelInfo */ + if (!found) + { + ResultRelInfo *resultRelInfo = (ResultRelInfo *) palloc(sizeof(ResultRelInfo)); + + InitResultRelInfo(resultRelInfo, + heap_open(partid, RowExclusiveLock), + 0, + 0); /* TODO: select suitable options */ + + ExecOpenIndices(resultRelInfo, speculative_insertion); + + /* Copy necessary fields from saved ResultRelInfo */ + CopyToResultRelInfo(ri_WithCheckOptions); + CopyToResultRelInfo(ri_WithCheckOptionExprs); + CopyToResultRelInfo(ri_junkFilter); + CopyToResultRelInfo(ri_projectReturning); + CopyToResultRelInfo(ri_onConflictSetProj); + CopyToResultRelInfo(ri_onConflictSetWhere); + + /* ri_ConstraintExprs will be initialized by ExecRelCheck() */ + resultRelInfo->ri_ConstraintExprs = NULL; + + /* Make 'range table index' point to the parent relation */ + resultRelInfo->ri_RangeTableIndex = + storage->saved_rel_info->ri_RangeTableIndex; + + /* Now fill the ResultRelInfo holder */ + resultRelInfoHolder->partid = partid; + resultRelInfoHolder->result_rel_info = resultRelInfo; + } + + return resultRelInfoHolder->result_rel_info; +} + + +/* + * Find matching partitions for 'value' using PartRelationInfo. + */ +Oid * +find_partitions_for_value(Datum value, const PartRelationInfo *prel, + ExprContext *econtext, int *nparts) +{ +#define CopyToTempConst(const_field, attr_field) \ + ( temp_const.const_field = prel->attr_field ) + + Const temp_const; /* temporary const for expr walker */ + WalkerContext wcxt; + List *ranges = NIL; + + /* Prepare dummy Const node */ + NodeSetTag(&temp_const, T_Const); + temp_const.location = -1; + + /* Fill const with value ... */ + temp_const.constvalue = value; + temp_const.constisnull = false; + + /* ... and some other important data */ + CopyToTempConst(consttype, atttype); + CopyToTempConst(consttypmod, atttypmod); + CopyToTempConst(constcollid, attcollid); + CopyToTempConst(constlen, attlen); + CopyToTempConst(constbyval, attbyval); + + InitWalkerContext(&wcxt, prel, econtext, true); + ranges = walk_expr_tree((Expr *) &temp_const, &wcxt)->rangeset; + return get_partition_oids(ranges, nparts, prel, false); +} + + Plan * make_partition_filter(Plan *subplan, Oid partitioned_table, OnConflictAction conflict_action) @@ -76,8 +216,7 @@ make_partition_filter(Plan *subplan, Oid partitioned_table, cscan->custom_scan_tlist = subplan->targetlist; /* Pack partitioned table's Oid and conflict_action */ - cscan->custom_private = list_make2_int(partitioned_table, - conflict_action); + cscan->custom_private = list_make2_int(partitioned_table, conflict_action); return &cscan->scan.plan; } @@ -96,15 +235,14 @@ partition_filter_create_scan_state(CustomScan *node) /* Extract necessary variables */ state->subplan = (Plan *) linitial(node->custom_plans); state->partitioned_table = linitial_int(node->custom_private); - state->onConflictAction = lsecond_int(node->custom_private); + state->on_conflict_action = lsecond_int(node->custom_private); /* Check boundaries */ - Assert(state->onConflictAction >= ONCONFLICT_NONE || - state->onConflictAction <= ONCONFLICT_UPDATE); + Assert(state->on_conflict_action >= ONCONFLICT_NONE || + state->on_conflict_action <= ONCONFLICT_UPDATE); - /* Prepare dummy Const node */ - NodeSetTag(&state->temp_const, T_Const); - state->temp_const.location = -1; + /* There should be exactly one subplan */ + Assert(list_length(node->custom_plans) == 1); return (Node *) state; } @@ -114,31 +252,16 @@ partition_filter_begin(CustomScanState *node, EState *estate, int eflags) { PartitionFilterState *state = (PartitionFilterState *) node; - HTAB *result_rels_table; - HASHCTL *result_rels_table_config = &state->result_rels_table_config; - + /* It's convenient to store PlanState in 'custom_ps' */ node->custom_ps = list_make1(ExecInitNode(state->subplan, estate, eflags)); - state->savedRelInfo = NULL; - - memset(result_rels_table_config, 0, sizeof(HASHCTL)); - result_rels_table_config->keysize = sizeof(Oid); - result_rels_table_config->entrysize = sizeof(ResultRelInfoHolder); - result_rels_table = hash_create("ResultRelInfo storage", 10, - result_rels_table_config, - HASH_ELEM | HASH_BLOBS); - - state->result_rels_table = result_rels_table; + init_result_parts_storage(&state->result_parts); state->warning_triggered = false; } TupleTableSlot * partition_filter_exec(CustomScanState *node) { -#define CopyToTempConst(const_field, attr_field) \ - ( state->temp_const.const_field = \ - slot->tts_tupleDescriptor->attrs[prel->attnum - 1]->attr_field ) - PartitionFilterState *state = (PartitionFilterState *) node; ExprContext *econtext = node->ss.ps.ps_ExprContext; @@ -149,8 +272,8 @@ partition_filter_exec(CustomScanState *node) slot = ExecProcNode(child_ps); /* Save original ResultRelInfo */ - if (!state->savedRelInfo) - state->savedRelInfo = estate->es_result_relation_info; + if (!state->result_parts.saved_rel_info) + state->result_parts.saved_rel_info = estate->es_result_relation_info; if (!TupIsNull(slot)) { @@ -158,12 +281,11 @@ partition_filter_exec(CustomScanState *node) MemoryContext old_cxt; - List *ranges; int nparts; Oid *parts; Oid selected_partid; + bool speculative_insert; - WalkerContext wcxt; bool isnull; Datum value; @@ -179,27 +301,18 @@ partition_filter_exec(CustomScanState *node) return slot; } - /* Extract partitioned column value */ + /* Extract partitioned column's value (also check types) */ + Assert(slot->tts_tupleDescriptor-> + attrs[prel->attnum - 1]->atttypid == prel->atttype); value = slot_getattr(slot, prel->attnum, &isnull); - - /* Fill const with value ... */ - state->temp_const.constvalue = value; - state->temp_const.constisnull = isnull; - - /* ... and some other important data */ - CopyToTempConst(consttype, atttypid); - CopyToTempConst(consttypmod, atttypmod); - CopyToTempConst(constcollid, attcollation); - CopyToTempConst(constlen, attlen); - CopyToTempConst(constbyval, attbyval); - - InitWalkerContext(&wcxt, prel, econtext, true); + if (isnull) + elog(ERROR, "partitioned column's value should not be NULL"); /* Switch to per-tuple context */ old_cxt = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); - ranges = walk_expr_tree((Expr *) &state->temp_const, &wcxt)->rangeset; - parts = get_partition_oids(ranges, &nparts, prel, false); + /* Search for matching partitions */ + parts = find_partitions_for_value(value, prel, econtext, &nparts); if (nparts > 1) elog(ERROR, "PartitionFilter selected more than one partition"); @@ -212,8 +325,7 @@ partition_filter_exec(CustomScanState *node) if (prel->auto_partition && IsAutoPartitionEnabled()) { selected_partid = create_partitions(state->partitioned_table, - state->temp_const.constvalue, - state->temp_const.consttype); + value, prel->atttype); /* get_pathman_relation_info() will refresh this entry */ invalidate_pathman_relation_info(state->partitioned_table, NULL); @@ -221,8 +333,7 @@ partition_filter_exec(CustomScanState *node) else elog(ERROR, "There is no suitable partition for key '%s'", - datum_to_cstring(state->temp_const.constvalue, - state->temp_const.consttype)); + datum_to_cstring(value, prel->atttype)); } else selected_partid = parts[0]; @@ -233,7 +344,10 @@ partition_filter_exec(CustomScanState *node) /* Replace parent table with a suitable partition */ old_cxt = MemoryContextSwitchTo(estate->es_query_cxt); - estate->es_result_relation_info = getResultRelInfo(selected_partid, state); + speculative_insert = state->on_conflict_action != ONCONFLICT_NONE; + estate->es_result_relation_info = scan_result_parts_storage(selected_partid, + &state->result_parts, + speculative_insert); MemoryContextSwitchTo(old_cxt); return slot; @@ -247,18 +361,8 @@ partition_filter_end(CustomScanState *node) { PartitionFilterState *state = (PartitionFilterState *) node; - HASH_SEQ_STATUS stat; - ResultRelInfoHolder *rri_handle; /* ResultRelInfo holder */ - - hash_seq_init(&stat, state->result_rels_table); - while ((rri_handle = (ResultRelInfoHolder *) hash_seq_search(&stat)) != NULL) - { - /* FIXME: add ResultRelInfos to estate->es_result_relations to fix triggers */ - ExecCloseIndices(rri_handle->resultRelInfo); - heap_close(rri_handle->resultRelInfo->ri_RelationDesc, - RowExclusiveLock); - } - hash_destroy(state->result_rels_table); + /* Close cached relations */ + fini_result_parts_storage(&state->result_parts); Assert(list_length(node->custom_ps) == 1); ExecEndNode((PlanState *) linitial(node->custom_ps)); @@ -277,57 +381,6 @@ partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *e /* Nothing to do here now */ } - -/* - * Construct ResultRelInfo for a partition. - */ -static ResultRelInfo * -getResultRelInfo(Oid partid, PartitionFilterState *state) -{ -#define CopyToResultRelInfo(field_name) \ - ( resultRelInfo->field_name = state->savedRelInfo->field_name ) - - ResultRelInfoHolder *resultRelInfoHolder; - bool found; - - resultRelInfoHolder = hash_search(state->result_rels_table, - (const void *) &partid, - HASH_ENTER, &found); - - /* If not found, create & cache new ResultRelInfo */ - if (!found) - { - ResultRelInfo *resultRelInfo = (ResultRelInfo *) palloc(sizeof(ResultRelInfo)); - - InitResultRelInfo(resultRelInfo, - heap_open(partid, RowExclusiveLock), - 0, - state->css.ss.ps.state->es_instrument); - - ExecOpenIndices(resultRelInfo, state->onConflictAction != ONCONFLICT_NONE); - - /* Copy necessary fields from saved ResultRelInfo */ - CopyToResultRelInfo(ri_WithCheckOptions); - CopyToResultRelInfo(ri_WithCheckOptionExprs); - CopyToResultRelInfo(ri_junkFilter); - CopyToResultRelInfo(ri_projectReturning); - CopyToResultRelInfo(ri_onConflictSetProj); - CopyToResultRelInfo(ri_onConflictSetWhere); - - /* ri_ConstraintExprs will be initialized by ExecRelCheck() */ - resultRelInfo->ri_ConstraintExprs = NULL; - - /* Make 'range table index' point to the parent relation */ - resultRelInfo->ri_RangeTableIndex = state->savedRelInfo->ri_RangeTableIndex; - - /* Now fill the ResultRelInfo holder */ - resultRelInfoHolder->partid = partid; - resultRelInfoHolder->resultRelInfo = resultRelInfo; - } - - return resultRelInfoHolder->resultRelInfo; -} - /* * Build partition filter's target list pointing to subplan tuple's elements */ @@ -392,13 +445,3 @@ partition_filter_visitor(Plan *plan, void *context) modify_table->onConflictAction); } } - -/* - * Add PartitionFilter nodes to the plan tree - */ -void -add_partition_filters(List *rtable, Plan *plan) -{ - if (pg_pathman_enable_partition_filter) - plan_tree_walker(plan, partition_filter_visitor, rtable); -} diff --git a/src/partition_filter.h b/src/partition_filter.h index d16cb0c0d4..afe238041c 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -19,25 +19,34 @@ #include "optimizer/planner.h" +/* + * Single element of 'result_rels_table'. + */ typedef struct { Oid partid; - ResultRelInfo *resultRelInfo; + ResultRelInfo *result_rel_info; } ResultRelInfoHolder; +/* + * Cached ResultRelInfos of partitions. + */ +typedef struct +{ + ResultRelInfo *saved_rel_info; + HTAB *result_rels_table; + HASHCTL result_rels_table_config; +} ResultPartsStorage; + typedef struct { CustomScanState css; Oid partitioned_table; - OnConflictAction onConflictAction; - ResultRelInfo *savedRelInfo; + OnConflictAction on_conflict_action; - Plan *subplan; - Const temp_const; /* temporary const for expr walker */ - - HTAB *result_rels_table; - HASHCTL result_rels_table_config; + Plan *subplan; /* proxy variable to store subplan */ + ResultPartsStorage result_parts; bool warning_triggered; } PartitionFilterState; @@ -49,13 +58,20 @@ extern CustomScanMethods partition_filter_plan_methods; extern CustomExecMethods partition_filter_exec_methods; -void rowmark_add_tableoids(Query *parse); - -void postprocess_lock_rows(List *rtable, Plan *plan); +void init_partition_filter_static_data(void); void add_partition_filters(List *rtable, Plan *plan); -void init_partition_filter_static_data(void); +/* ResultPartsStorage init\fini\scan function */ +void init_result_parts_storage(ResultPartsStorage *parts_storage); +void fini_result_parts_storage(ResultPartsStorage *parts_storage); +ResultRelInfo * scan_result_parts_storage(Oid partid, + ResultPartsStorage *storage, + bool speculative_insertion); + +/* */ +Oid *find_partitions_for_value(Datum value, const PartRelationInfo *prel, + ExprContext *econtext, int *nparts); Plan * make_partition_filter(Plan *subplan, Oid partitioned_table, diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 7359b693a3..551c5abedc 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -161,6 +161,8 @@ _PG_init(void) post_parse_analyze_hook = pathman_post_parse_analysis_hook; planner_hook_next = planner_hook; planner_hook = pathman_planner_hook; + process_utility_hook_next = ProcessUtility_hook; + ProcessUtility_hook = pathman_process_utility_hook; /* Initialize static data for all subsystems */ init_main_pathman_toggle(); From 381834c3ad007d970b50b58aeb18bf01926b1a90 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 15 Sep 2016 18:45:13 +0300 Subject: [PATCH 133/184] light refactoring, introduce GUC variable 'pg_pathman.override_copy', implement COPY TO for partitioned tables --- src/copy_stmt_hooking.c | 290 ++++++++++++++++++++++++++++++++++++- src/copy_stmt_hooking.h | 11 ++ src/hooks.c | 18 ++- src/init.c | 19 ++- src/init.h | 25 +++- src/partition_filter.c | 2 +- src/pg_pathman.c | 2 +- src/runtime_merge_append.c | 2 + 8 files changed, 352 insertions(+), 17 deletions(-) diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c index 66eda8254b..c8b5af097e 100644 --- a/src/copy_stmt_hooking.c +++ b/src/copy_stmt_hooking.c @@ -1,8 +1,35 @@ +/* ------------------------------------------------------------------------ + * + * copy_stmt_hooking.c + * Override COPY TO/FROM statement for partitioned tables + * + * Copyright (c) 2016, Postgres Professional + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * ------------------------------------------------------------------------ + */ + #include "copy_stmt_hooking.h" +#include "init.h" #include "relation_info.h" +#include "access/htup_details.h" +#include "access/sysattr.h" +#include "access/xact.h" #include "catalog/namespace.h" +#include "catalog/pg_attribute.h" #include "commands/copy.h" +#include "commands/trigger.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/rls.h" + +#include "libpq/libpq.h" /* @@ -14,6 +41,12 @@ is_pathman_related_copy(Node *parsetree) CopyStmt *copy_stmt = (CopyStmt *) parsetree; Oid partitioned_table; + if (!IsOverrideCopyEnabled()) + { + elog(DEBUG1, "COPY statement hooking is disabled"); + return false; + } + /* Check that it's a CopyStmt */ if (!IsA(parsetree, CopyStmt)) return false; @@ -23,11 +56,266 @@ is_pathman_related_copy(Node *parsetree) return false; /* TODO: select appropriate lock for COPY */ - partitioned_table = RangeVarGetRelid(copy_stmt->relation, NoLock, false); + partitioned_table = RangeVarGetRelid(copy_stmt->relation, + (copy_stmt->is_from ? + RowExclusiveLock : + AccessShareLock), + false); /* Check that relation is partitioned */ if (get_pathman_relation_info(partitioned_table)) + { + elog(DEBUG1, "Overriding default behavior for COPY (%u)", partitioned_table); return true; + } return false; } + +/* + * CopyGetAttnums - build an integer list of attnums to be copied + * + * The input attnamelist is either the user-specified column list, + * or NIL if there was none (in which case we want all the non-dropped + * columns). + * + * rel can be NULL ... it's only used for error reports. + */ +static List * +CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist) +{ + List *attnums = NIL; + + if (attnamelist == NIL) + { + /* Generate default column list */ + Form_pg_attribute *attr = tupDesc->attrs; + int attr_count = tupDesc->natts; + int i; + + for (i = 0; i < attr_count; i++) + { + if (attr[i]->attisdropped) + continue; + attnums = lappend_int(attnums, i + 1); + } + } + else + { + /* Validate the user-supplied list and extract attnums */ + ListCell *l; + + foreach(l, attnamelist) + { + char *name = strVal(lfirst(l)); + int attnum; + int i; + + /* Lookup column name */ + attnum = InvalidAttrNumber; + for (i = 0; i < tupDesc->natts; i++) + { + if (tupDesc->attrs[i]->attisdropped) + continue; + if (namestrcmp(&(tupDesc->attrs[i]->attname), name) == 0) + { + attnum = tupDesc->attrs[i]->attnum; + break; + } + } + if (attnum == InvalidAttrNumber) + { + if (rel != NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + name, RelationGetRelationName(rel)))); + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + name))); + } + /* Check for duplicates */ + if (list_member_int(attnums, attnum)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" specified more than once", + name))); + attnums = lappend_int(attnums, attnum); + } + } + + return attnums; +} + +/* + * Execute COPY TO/FROM statement for a partitioned table. + * NOTE: based on DoCopy() (see copy.c). + */ +void +PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) +{ + CopyState cstate; + bool is_from = stmt->is_from; + bool pipe = (stmt->filename == NULL); + Relation rel; + Oid relid; + Node *query = NULL; + List *range_table = NIL; + + /* Disallow COPY TO/FROM file or program except to superusers. */ + if (!pipe && !superuser()) + { + if (stmt->is_program) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to COPY to or from an external program"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + else + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to COPY to or from a file"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + } + + if (stmt->relation) + { + TupleDesc tupDesc; + AclMode required_access = (is_from ? ACL_INSERT : ACL_SELECT); + List *attnums; + ListCell *cur; + RangeTblEntry *rte; + + Assert(!stmt->query); + + /* Open the relation (we've locked it in is_pathman_related_copy()) */ + rel = heap_openrv(stmt->relation, NoLock); + + relid = RelationGetRelid(rel); + + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = RelationGetRelid(rel); + rte->relkind = rel->rd_rel->relkind; + rte->requiredPerms = required_access; + range_table = list_make1(rte); + + tupDesc = RelationGetDescr(rel); + attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist); + foreach(cur, attnums) + { + int attno = lfirst_int(cur) - FirstLowInvalidHeapAttributeNumber; + + if (is_from) + rte->insertedCols = bms_add_member(rte->insertedCols, attno); + else + rte->selectedCols = bms_add_member(rte->selectedCols, attno); + } + ExecCheckRTPerms(range_table, true); + + /* + * We should perform a query instead of low-level heap scan whenever: + * a) table has a RLS policy; + * b) table is partitioned & it's COPY FROM. + */ + if (check_enable_rls(rte->relid, InvalidOid, false) == RLS_ENABLED || + is_from == false) /* rewrite COPY table TO statements */ + { + SelectStmt *select; + ColumnRef *cr; + ResTarget *target; + RangeVar *from; + + if (is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY FROM not supported with row-level security"), + errhint("Use INSERT statements instead."))); + + /* Build target list */ + cr = makeNode(ColumnRef); + + if (!stmt->attlist) + cr->fields = list_make1(makeNode(A_Star)); + else + cr->fields = stmt->attlist; + + cr->location = 1; + + target = makeNode(ResTarget); + target->name = NULL; + target->indirection = NIL; + target->val = (Node *) cr; + target->location = 1; + + /* + * Build RangeVar for from clause, fully qualified based on the + * relation which we have opened and locked. + */ + from = makeRangeVar(get_namespace_name(RelationGetNamespace(rel)), + RelationGetRelationName(rel), -1); + + /* Build query */ + select = makeNode(SelectStmt); + select->targetList = list_make1(target); + select->fromClause = list_make1(from); + + query = (Node *) select; + + /* + * Close the relation for now, but keep the lock on it to prevent + * changes between now and when we start the query-based COPY. + * + * We'll reopen it later as part of the query-based COPY. + */ + heap_close(rel, NoLock); + rel = NULL; + } + } + else + { + Assert(stmt->query); + + query = stmt->query; + relid = InvalidOid; + rel = NULL; + } + + /* COPY ... FROM ... */ + if (is_from) + { + /* There should be relation */ + Assert(rel); + + /* check read-only transaction and parallel mode */ + if (XactReadOnly && !rel->rd_islocaltemp) + PreventCommandIfReadOnly("PATHMAN COPY FROM"); + PreventCommandIfParallelMode("PATHMAN COPY FROM"); + + cstate = BeginCopyFrom(rel, stmt->filename, stmt->is_program, + stmt->attlist, stmt->options); + /* TODO: copy files to DB */ + heap_close(rel, NoLock); + *processed = 0; + EndCopyFrom(cstate); + } + /* COPY ... TO ... */ + else + { + CopyStmt modified_copy_stmt; + + /* We should've created a query */ + Assert(query); + + /* Copy 'stmt' and override some of the fields */ + modified_copy_stmt = *stmt; + modified_copy_stmt.relation = NULL; + modified_copy_stmt.query = query; + + /* Call standard DoCopy using a new CopyStmt */ + DoCopy(&modified_copy_stmt, queryString, processed); + } +} diff --git a/src/copy_stmt_hooking.h b/src/copy_stmt_hooking.h index 84f69bc6e9..389a411c43 100644 --- a/src/copy_stmt_hooking.h +++ b/src/copy_stmt_hooking.h @@ -1,3 +1,13 @@ +/* ------------------------------------------------------------------------ + * + * copy_stmt_hooking.h + * Transaction-specific locks and other functions + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + #ifndef COPY_STMT_HOOKING_H #define COPY_STMT_HOOKING_H @@ -8,5 +18,6 @@ bool is_pathman_related_copy(Node *parsetree); +void PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed); #endif diff --git a/src/hooks.c b/src/hooks.c index 48f62b186e..7c6c6848de 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -387,17 +387,22 @@ pg_pathman_enable_assign_hook(bool newval, void *extra) /* Return quickly if nothing has changed */ if (newval == (pg_pathman_init_state.pg_pathman_enable && + pg_pathman_init_state.auto_partition && + pg_pathman_init_state.override_copy && pg_pathman_enable_runtimeappend && pg_pathman_enable_runtime_merge_append && pg_pathman_enable_partition_filter)) return; + pg_pathman_init_state.auto_partition = newval; + pg_pathman_init_state.override_copy = newval; pg_pathman_enable_runtime_merge_append = newval; pg_pathman_enable_runtimeappend = newval; pg_pathman_enable_partition_filter = newval; elog(NOTICE, - "RuntimeAppend, RuntimeMergeAppend and PartitionFilter nodes have been %s", + "RuntimeAppend, RuntimeMergeAppend and PartitionFilter nodes " + "and some other options have been %s", newval ? "enabled" : "disabled"); } @@ -594,10 +599,17 @@ pathman_process_utility_hook(Node *parsetree, context, params, dest, completionTag); - /* Override standard COPY statements if needed */ + /* Override standard COPY statement if needed */ if (is_pathman_related_copy(parsetree)) { - elog(INFO, "copy!"); + uint64 processed; + + PathmanDoCopy((CopyStmt *) parsetree, queryString, &processed); + if (completionTag) + snprintf(completionTag, COMPLETION_TAG_BUFSIZE, + "PATHMAN COPY " UINT64_FORMAT, processed); + + return; /* don't call standard_ProcessUtility() */ } /* Call internal implementation */ diff --git a/src/init.c b/src/init.c index 60eff1adb3..fe7ee3a685 100644 --- a/src/init.c +++ b/src/init.c @@ -102,10 +102,10 @@ restore_pathman_init_state(const PathmanInitState *temp_init_state) } /* - * Create main GUC. + * Create main GUCs. */ void -init_main_pathman_toggle(void) +init_main_pathman_toggles(void) { /* Main toggle, load_config() will enable it */ DefineCustomBoolVariable("pg_pathman.enable", @@ -119,8 +119,9 @@ init_main_pathman_toggle(void) pg_pathman_enable_assign_hook, NULL); + /* Global toggle for automatic partition creation */ DefineCustomBoolVariable("pg_pathman.enable_auto_partition", - "Enables auto partition propagation", + "Enables automatic partition creation", NULL, &pg_pathman_init_state.auto_partition, true, @@ -129,6 +130,18 @@ init_main_pathman_toggle(void) NULL, NULL, NULL); + + /* Global toggle for COPY stmt handling */ + DefineCustomBoolVariable("pg_pathman.override_copy", + "Override COPY statement handling", + NULL, + &pg_pathman_init_state.override_copy, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); } /* diff --git a/src/init.h b/src/init.h index 9375976db1..effb2675c7 100644 --- a/src/init.h +++ b/src/init.h @@ -27,6 +27,7 @@ typedef struct { bool pg_pathman_enable; /* GUC variable implementation */ bool auto_partition; /* GUC variable for auto partition propagation */ + bool override_copy; /* override COPY TO/FROM */ bool initialization_needed; /* do we need to perform init? */ } PathmanInitState; @@ -41,22 +42,27 @@ extern PathmanInitState pg_pathman_init_state; /* * Check if pg_pathman is initialized. */ -#define IsPathmanInitialized() ( !pg_pathman_init_state.initialization_needed ) +#define IsPathmanInitialized() ( !pg_pathman_init_state.initialization_needed ) /* * Check if pg_pathman is enabled. */ -#define IsPathmanEnabled() ( pg_pathman_init_state.pg_pathman_enable ) +#define IsPathmanEnabled() ( pg_pathman_init_state.pg_pathman_enable ) /* * Check if pg_pathman is initialized & enabled. */ -#define IsPathmanReady() ( IsPathmanInitialized() && IsPathmanEnabled() ) +#define IsPathmanReady() ( IsPathmanInitialized() && IsPathmanEnabled() ) /* - * Check if auto partition propagation enabled + * Should we override COPY stmt handling? */ -#define IsAutoPartitionEnabled() ( pg_pathman_init_state.auto_partition ) +#define IsOverrideCopyEnabled() ( pg_pathman_init_state.override_copy ) + +/* + * Check if auto partition creation is enabled. + */ +#define IsAutoPartitionEnabled() ( pg_pathman_init_state.auto_partition ) /* * Enable/disable auto partition propagation. Note that this only works if @@ -65,7 +71,8 @@ extern PathmanInitState pg_pathman_init_state; */ #define SetAutoPartitionEnabled(value) \ do { \ - pg_pathman_init_state.auto_partition = value; \ + Assert((value) == true || (value) == false); \ + pg_pathman_init_state.auto_partition = (value); \ } while (0) /* @@ -74,6 +81,8 @@ extern PathmanInitState pg_pathman_init_state; #define DisablePathman() \ do { \ pg_pathman_init_state.pg_pathman_enable = false; \ + pg_pathman_init_state.auto_partition = false; \ + pg_pathman_init_state.override_copy = false; \ pg_pathman_init_state.initialization_needed = true; \ } while (0) @@ -85,9 +94,9 @@ void save_pathman_init_state(PathmanInitState *temp_init_state); void restore_pathman_init_state(const PathmanInitState *temp_init_state); /* - * Create main GUC variable. + * Create main GUC variables. */ -void init_main_pathman_toggle(void); +void init_main_pathman_toggles(void); Size estimate_pathman_shmem_size(void); void init_shmem_config(void); diff --git a/src/partition_filter.c b/src/partition_filter.c index fb13963ecd..a7dbcc9389 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -309,7 +309,7 @@ partition_filter_exec(CustomScanState *node) elog(ERROR, "partitioned column's value should not be NULL"); /* Switch to per-tuple context */ - old_cxt = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + old_cxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); /* Search for matching partitions */ parts = find_partitions_for_value(value, prel, econtext, &nparts); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 551c5abedc..570e98bbbc 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -165,7 +165,7 @@ _PG_init(void) ProcessUtility_hook = pathman_process_utility_hook; /* Initialize static data for all subsystems */ - init_main_pathman_toggle(); + init_main_pathman_toggles(); init_runtimeappend_static_data(); init_runtime_merge_append_static_data(); init_partition_filter_static_data(); diff --git a/src/runtime_merge_append.c b/src/runtime_merge_append.c index ad6389336f..3787d5f768 100644 --- a/src/runtime_merge_append.c +++ b/src/runtime_merge_append.c @@ -4,6 +4,8 @@ * RuntimeMergeAppend node's function definitions and global variables * * Copyright (c) 2016, Postgres Professional + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California * * ------------------------------------------------------------------------ */ From fbbbf4b788de39125cdae8d8325df36a94389e17 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 15 Sep 2016 20:12:11 +0300 Subject: [PATCH 134/184] check layout compatibility between AppendPath and MergeAppendPath --- src/hooks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/hooks.c b/src/hooks.c index 6db5434be9..1468c33a5c 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -365,8 +365,16 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb ppi, paramsel); else if (IsA(cur_path, MergeAppendPath) && pg_pathman_enable_runtime_merge_append) + { + /* Check struct layout compatibility */ + if (offsetof(AppendPath, subpaths) != + offsetof(MergeAppendPath, subpaths)) + elog(FATAL, "Struct layouts of AppendPath and " + "MergeAppendPath differ"); + inner_path = create_runtimemergeappend_path(root, cur_path, ppi, paramsel); + } if (inner_path) add_path(rel, inner_path); From 3d0aa71b25043a2b6845516dbfef7ec7bb127426 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 15 Sep 2016 20:15:38 +0300 Subject: [PATCH 135/184] fix regression tests --- expected/pg_pathman.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index ade6d8b711..c1e79e4a96 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -852,7 +852,7 @@ $$ language plpgsql set pg_pathman.enable = true set enable_hashjoin = off set enable_mergejoin = off; -NOTICE: RuntimeAppend, RuntimeMergeAppend and PartitionFilter nodes have been enabled +NOTICE: RuntimeAppend, RuntimeMergeAppend and PartitionFilter nodes and some other options have been enabled create table test.run_values as select generate_series(1, 10000) val; create table test.runtime_test_1(id serial primary key, val real); insert into test.runtime_test_1 select generate_series(1, 10000), random(); From 7c15a2a400c0a8af2033907c1042b332d4b0e1c1 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 15 Sep 2016 20:24:45 +0300 Subject: [PATCH 136/184] fix warnings reported by clang-analyzer --- src/copy_stmt_hooking.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c index c8b5af097e..96ed1f57f7 100644 --- a/src/copy_stmt_hooking.c +++ b/src/copy_stmt_hooking.c @@ -160,7 +160,6 @@ PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) bool is_from = stmt->is_from; bool pipe = (stmt->filename == NULL); Relation rel; - Oid relid; Node *query = NULL; List *range_table = NIL; @@ -194,8 +193,6 @@ PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) /* Open the relation (we've locked it in is_pathman_related_copy()) */ rel = heap_openrv(stmt->relation, NoLock); - relid = RelationGetRelid(rel); - rte = makeNode(RangeTblEntry); rte->rtekind = RTE_RELATION; rte->relid = RelationGetRelid(rel); @@ -280,7 +277,6 @@ PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) Assert(stmt->query); query = stmt->query; - relid = InvalidOid; rel = NULL; } @@ -291,7 +287,7 @@ PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) Assert(rel); /* check read-only transaction and parallel mode */ - if (XactReadOnly && !rel->rd_islocaltemp) + if (XactReadOnly && rel && !rel->rd_islocaltemp) PreventCommandIfReadOnly("PATHMAN COPY FROM"); PreventCommandIfParallelMode("PATHMAN COPY FROM"); From 0a3b7e597fd045ff6aefdbee657623cd6288abbb Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 16 Sep 2016 17:37:27 +0300 Subject: [PATCH 137/184] on partition created callback --- expected/pg_pathman.out | 56 +++++++++++++++++++++++++ init.sql | 47 ++++++++++++++++----- range.sql | 6 ++- sql/pg_pathman.sql | 24 +++++++++++ src/pathman.h | 3 +- src/pl_funcs.c | 92 ++++++++++++++++++++++++++++++++++++++++- src/relation_info.c | 2 + src/relation_info.h | 1 + src/utils.c | 80 +++++++++++++++++++++++++++++++++++ src/utils.h | 4 ++ 10 files changed, 301 insertions(+), 14 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index ade6d8b711..6616f3af50 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1768,3 +1768,59 @@ NOTICE: 100 rows copied from test_fkey_0 10 (1 row) +/* Check callbacks */ +CREATE TABLE log(id serial, message text); +CREATE FUNCTION abc_on_partition_created_callback(args jsonb) +RETURNS VOID AS $$ +DECLARE + start_value TEXT := args->>'start'; + end_value TEXT := args::jsonb->'end'; +BEGIN + -- raise notice 'callback: %', args->start; + INSERT INTO log(message) + VALUES (start_value || '-' || end_value); +END +$$ language plpgsql; +CREATE TABLE abc(a serial, b int); +SELECT create_range_partitions('abc', 'a', 1, 100, 2); +NOTICE: sequence "abc_seq" does not exist, skipping + create_range_partitions +------------------------- + 2 +(1 row) + +SELECT set_callback('abc', 'abc_on_partition_created_callback'); + set_callback +-------------- + +(1 row) + +INSERT INTO abc VALUES (123, 1); +INSERT INTO abc VALUES (223, 1); +SELECT append_range_partition('abc'); + append_range_partition +------------------------ + public.abc_4 +(1 row) + +SELECT prepend_range_partition('abc'); + prepend_range_partition +------------------------- + public.abc_5 +(1 row) + +SELECT add_range_partition('abc', 401, 501); + add_range_partition +--------------------- + public.abc_6 +(1 row) + +SELECT message FROM log; + message +--------- + 201-301 + 301-401 + -99-1 + 401-501 +(4 rows) + diff --git a/init.sql b/init.sql index 7c47b7da49..dddf610455 100644 --- a/init.sql +++ b/init.sql @@ -31,23 +31,17 @@ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config ( * partrel - regclass (relation type, stored as Oid) * enable_parent - add parent table to plan * auto - enable automatic partition creation + * callback - */ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config_params ( partrel REGCLASS NOT NULL PRIMARY KEY, enable_parent BOOLEAN NOT NULL DEFAULT TRUE, - auto BOOLEAN NOT NULL DEFAULT TRUE + auto BOOLEAN NOT NULL DEFAULT TRUE, + callback REGPROCEDURE ); CREATE UNIQUE INDEX i_pathman_config_params ON @extschema@.pathman_config_params(partrel); -CREATE TYPE partition AS ( - parent REGCLASS, - parttype INTEGER, - child REGCLASS, - start_value TEXT, - end_value TEXT -); - /* * Invalidate relcache every time someone changes parameters config. */ @@ -101,7 +95,7 @@ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION @extschema@.pathman_set_param( relation REGCLASS, param TEXT, - value BOOLEAN) + value ANYELEMENT) RETURNS VOID AS $$ BEGIN @@ -161,6 +155,19 @@ END $$ LANGUAGE plpgsql; +/* + * Set partition creation callback + */ +CREATE OR REPLACE FUNCTION @extschema@.set_callback(relation REGCLASS, callback REGPROC) +RETURNS VOID AS +$$ +BEGIN + PERFORM @extschema@.validate_on_partition_created_callback(callback); + PERFORM @extschema@.pathman_set_param(relation, 'callback', callback); +END +$$ +LANGUAGE plpgsql; + /* * Show all existing concurrent partitioning tasks. */ @@ -710,3 +717,23 @@ LANGUAGE C STRICT; CREATE OR REPLACE FUNCTION @extschema@.get_rel_tablespace_name(relation REGCLASS) RETURNS TEXT AS 'pg_pathman', 'get_rel_tablespace_name' LANGUAGE C STRICT; + +/* + * Checks that callback function meets specific requirements. Particularly it + * must have the only JSONB argument and VOID return type + */ +CREATE OR REPLACE FUNCTION @extschema@.validate_on_partition_created_callback(callback REGPROC) +RETURNS VOID AS 'pg_pathman', 'validate_on_partition_created_callback' +LANGUAGE C STRICT; + +/* + * Builds JSONB object containing new partition parameters and invoke the + * callback + */ +CREATE OR REPLACE FUNCTION @extschema@.invoke_on_partition_created_callback( + parent REGCLASS, + partition REGCLASS, + start_value ANYELEMENT, + end_value ANYELEMENT) +RETURNS VOID AS 'pg_pathman', 'invoke_on_partition_created_callback' +LANGUAGE C STRICT; diff --git a/range.sql b/range.sql index 1bbc79c6d3..bde6969fa8 100644 --- a/range.sql +++ b/range.sql @@ -504,7 +504,6 @@ BEGIN if NOT tablespace IS NULL THEN v_create_table_query := v_create_table_query || ' TABLESPACE ' ||tablespace; END IF; - RAISE NOTICE 'query: %', v_create_table_query; EXECUTE format(v_create_table_query, v_child_relname, @@ -519,6 +518,10 @@ BEGIN p_end_value)); PERFORM @extschema@.copy_foreign_keys(parent_relid, v_child_relname::REGCLASS); + PERFORM @extschema@.invoke_on_partition_created_callback(parent_relid, + v_child_relname, + p_start_value, + p_end_value); RETURN v_child_relname; END @@ -1234,7 +1237,6 @@ BEGIN END $$ LANGUAGE plpgsql; - /* * Construct CHECK constraint condition for a range partition. */ diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 6a8a424188..2bd11696be 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -665,3 +665,27 @@ SELECT create_hash_partitions('test_fkey', 'id', 10); INSERT INTO test_fkey VALUES(1, 'wrong'); INSERT INTO test_fkey VALUES(1, 'test'); SELECT drop_partitions('test_fkey'); + +/* Check callbacks */ +CREATE TABLE log(id serial, message text); + +CREATE OR REPLACE FUNCTION abc_on_partition_created_callback(args jsonb) +RETURNS VOID AS $$ +DECLARE + start_value TEXT := args->>'start'; + end_value TEXT := args::jsonb->'end'; +BEGIN + INSERT INTO log(message) + VALUES (start_value || '-' || end_value); +END +$$ language plpgsql; + +CREATE TABLE abc(a serial, b int); +SELECT create_range_partitions('abc', 'a', 1, 100, 2); +SELECT set_callback('abc', 'abc_on_partition_created_callback'); +INSERT INTO abc VALUES (123, 1); +INSERT INTO abc VALUES (223, 1); +SELECT append_range_partition('abc'); +SELECT prepend_range_partition('abc'); +SELECT add_range_partition('abc', 401, 501); +SELECT message FROM log; diff --git a/src/pathman.h b/src/pathman.h index 2c66589560..631454cd05 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -55,10 +55,11 @@ * Definitions for the "pathman_config_params" table */ #define PATHMAN_CONFIG_PARAMS "pathman_config_params" -#define Natts_pathman_config_params 3 +#define Natts_pathman_config_params 4 #define Anum_pathman_config_params_partrel 1 /* primary key */ #define Anum_pathman_config_params_enable_parent 2 /* include parent into plan */ #define Anum_pathman_config_params_auto 3 /* auto partitions creation */ +#define Anum_pathman_config_params_callback 4 /* auto partitions creation */ /* * Cache current PATHMAN_CONFIG relid (set during load_config()). diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 038f789291..3589a221cb 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -13,11 +13,14 @@ #include "relation_info.h" #include "utils.h" #include "xact_handling.h" +#include "fmgr.h" #include "access/htup_details.h" #include "access/nbtree.h" #include "access/xact.h" #include "catalog/indexing.h" +#include "catalog/pg_type.h" +#include "catalog/pg_proc.h" #include "commands/sequence.h" #include "commands/tablespace.h" #include "miscadmin.h" @@ -28,6 +31,8 @@ #include "utils/lsyscache.h" #include "utils/syscache.h" #include "utils/typcache.h" +#include "utils/jsonb.h" +#include "utils/fmgroids.h" /* declarations */ @@ -57,7 +62,8 @@ PG_FUNCTION_INFO_V1( lock_partitioned_relation ); PG_FUNCTION_INFO_V1( prevent_relation_modification ); PG_FUNCTION_INFO_V1( debug_capture ); PG_FUNCTION_INFO_V1( get_rel_tablespace_name ); - +PG_FUNCTION_INFO_V1( validate_on_partition_created_callback ); +PG_FUNCTION_INFO_V1( invoke_on_partition_created_callback ); static void on_partitions_created_internal(Oid partitioned_table, bool add_callbacks); static void on_partitions_updated_internal(Oid partitioned_table, bool add_callbacks); @@ -776,3 +782,87 @@ get_rel_tablespace_name(PG_FUNCTION_ARGS) result = get_tablespace_name(tablespace_id); PG_RETURN_TEXT_P(cstring_to_text(result)); } + +/* + * Checks that callback function meets specific requirements. Particularly it + * must have the only JSONB argument and VOID return type + */ +Datum +validate_on_partition_created_callback(PG_FUNCTION_ARGS) +{ + HeapTuple tp; + Oid callback = PG_GETARG_OID(0); + Form_pg_proc functup; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(callback)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", callback); + functup = (Form_pg_proc) GETSTRUCT(tp); + + if (functup->pronargs != 1 || functup->proargtypes.values[0] != JSONBOID || + functup->prorettype != VOIDOID) + elog(ERROR, + "Callback function must have only one JSNOB argument " + "and return VOID"); + + ReleaseSysCache(tp); + PG_RETURN_VOID(); +} + +/* + * Builds JSONB object containing new partition parameters and invoke the + * callback + */ +Datum +invoke_on_partition_created_callback(PG_FUNCTION_ARGS) +{ + char *json; + Datum jsonb; + Oid parent_oid = PG_GETARG_OID(0); + Oid partition_oid = PG_GETARG_OID(1); + Oid type = get_fn_expr_argtype(fcinfo->flinfo, 2); + Datum start_value = PG_GETARG_DATUM(2); + Datum end_value = PG_GETARG_DATUM(3); + const PartRelationInfo *prel; + + if ((prel = get_pathman_relation_info(parent_oid)) == NULL) + elog(ERROR, + "Relation %s isn't partitioned by pg_pathman", + get_rel_name(parent_oid)); + + /* If there is no callback function specified then we're done */ + if (!prel->callback) + PG_RETURN_VOID(); + + /* Convert ANYELEMENT arguments to jsonb */ + start_value = convert_to_jsonb(start_value, type); + end_value = convert_to_jsonb(end_value, type); + + /* + * Build jsonb object to pass into callback + * + * XXX it would be nice to have this rewrited with pushJsonbValue() to get + * rid of string formatting and parsing. See jsonb_build_object() for + * example + */ + json = psprintf("{" + "\"parent\": %u," + "\"partition\": %u," + "\"part_type\": %u," + "\"start\": %s," + "\"end\": %s," + "\"value_type\": %u}", + parent_oid, + partition_oid, + prel->parttype, + datum_to_cstring(start_value, JSONBOID), + datum_to_cstring(end_value, JSONBOID), + type + ); + jsonb = OidFunctionCall1(F_JSONB_IN, CStringGetDatum(json)); + + /* Invoke callback */ + OidFunctionCall1(prel->callback, JsonbGetDatum(jsonb)); + + PG_RETURN_JSONB(jsonb); +} diff --git a/src/relation_info.c b/src/relation_info.c index aaa3fd6036..8cc25b9f30 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -165,12 +165,14 @@ refresh_pathman_relation_info(Oid relid, { prel->enable_parent = param_values[Anum_pathman_config_params_enable_parent - 1]; prel->auto_partition = param_values[Anum_pathman_config_params_auto - 1]; + prel->callback = param_values[Anum_pathman_config_params_callback - 1]; } /* Else set default values if they cannot be found */ else { prel->enable_parent = false; prel->auto_partition = true; + prel->callback = InvalidOid; } /* We've successfully built a cache entry */ diff --git a/src/relation_info.h b/src/relation_info.h index 1ed9993338..0c37dfb548 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -49,6 +49,7 @@ typedef struct bool valid; /* is this entry valid? */ bool enable_parent; /* include parent to the plan */ bool auto_partition; /* auto partition creation */ + Oid callback; /* callback for partition creation */ uint32 children_count; Oid *children; /* Oids of child partitions */ diff --git a/src/utils.c b/src/utils.c index c7651f81e7..3d3d70f5cd 100644 --- a/src/utils.c +++ b/src/utils.c @@ -17,6 +17,7 @@ #include "catalog/heap.h" #include "catalog/pg_type.h" #include "catalog/pg_extension.h" +#include "catalog/pg_proc.h" #include "commands/extension.h" #include "miscadmin.h" #include "optimizer/var.h" @@ -627,6 +628,85 @@ datum_to_cstring(Datum datum, Oid typid) return result; } +/* + * Converts datum to jsonb type + * This function is a wrapper to to_jsonb() + */ +Datum +convert_to_jsonb(Datum datum, Oid typid) +{ + List *args; + FuncExpr *fexpr; + FmgrInfo flinfo; + Const *constval; + + /* Build const value to use in the FuncExpr node. */ + constval = makeConstFromDatum(datum, typid); + + /* Function takes single argument */ + args = list_make1(constval); + + /* Build function expression */ + fexpr = makeFuncNode(F_TO_JSONB, args); + fmgr_info(F_TO_JSONB, &flinfo); + flinfo.fn_expr = (Node *) fexpr; + + return FunctionCall1(&flinfo, datum); +} + +/* + * Builds Const from specified datum and type oid + */ +Const * +makeConstFromDatum(Datum datum, Oid typid) +{ + HeapTuple tp; + Const *constval; + Form_pg_type typtup; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for type %u", typid); + typtup = (Form_pg_type) GETSTRUCT(tp); + constval = makeConst( + typid, + typtup->typtypmod, + typtup->typcollation, + typtup->typlen, + datum, + false, + typtup->typbyval); + ReleaseSysCache(tp); + + return constval; +} + +/* + * Builds function expression + */ +FuncExpr * +makeFuncNode(Oid funcid, List *args) +{ + HeapTuple tp; + FuncExpr *fexpr; + Form_pg_proc functup; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + functup = (Form_pg_proc) GETSTRUCT(tp); + fexpr = makeFuncExpr(funcid, + functup->prorettype, + args, + InvalidOid, + InvalidOid, + COERCE_EXPLICIT_CALL); + ReleaseSysCache(tp); + fexpr->funcvariadic = false; + + return fexpr; +} + /* * Try to get relname or at least relid as cstring. */ diff --git a/src/utils.h b/src/utils.h index d355113943..db074e103c 100644 --- a/src/utils.h +++ b/src/utils.h @@ -65,6 +65,10 @@ void fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, Oid type2); char * datum_to_cstring(Datum datum, Oid typid); +Datum datum_in(char *str, Oid typid); +Datum convert_to_jsonb(Datum datum, Oid typid); +Const *makeConstFromDatum(Datum datum, Oid typid); +FuncExpr *makeFuncNode(Oid funcid, List *args); #if PG_VERSION_NUM < 90600 char get_rel_persistence(Oid relid); From ef2be57c85a90651ba3e8766bd0cef784c1cb2a7 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Fri, 16 Sep 2016 18:56:32 +0300 Subject: [PATCH 138/184] fixed regression tests --- expected/pg_pathman.out | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 6616f3af50..8cd92914d0 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1770,15 +1770,14 @@ NOTICE: 100 rows copied from test_fkey_0 /* Check callbacks */ CREATE TABLE log(id serial, message text); -CREATE FUNCTION abc_on_partition_created_callback(args jsonb) +CREATE OR REPLACE FUNCTION abc_on_partition_created_callback(args jsonb) RETURNS VOID AS $$ DECLARE - start_value TEXT := args->>'start'; - end_value TEXT := args::jsonb->'end'; + start_value TEXT := args->>'start'; + end_value TEXT := args::jsonb->'end'; BEGIN - -- raise notice 'callback: %', args->start; - INSERT INTO log(message) - VALUES (start_value || '-' || end_value); + INSERT INTO log(message) + VALUES (start_value || '-' || end_value); END $$ language plpgsql; CREATE TABLE abc(a serial, b int); From 4d154b44b2be0f8992303c4258f6170ae77749a7 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 16 Sep 2016 20:26:29 +0300 Subject: [PATCH 139/184] WIP working prototype of COPY FROM for partitioned tables (PathmanCopyFrom), refactoring --- src/copy_stmt_hooking.c | 249 +++++++++++++++++++++++++++++++++++++++- src/partition_filter.c | 113 +++++++++++++----- src/partition_filter.h | 49 ++++++-- 3 files changed, 366 insertions(+), 45 deletions(-) diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c index 96ed1f57f7..7a66d8d76a 100644 --- a/src/copy_stmt_hooking.c +++ b/src/copy_stmt_hooking.c @@ -12,6 +12,7 @@ #include "copy_stmt_hooking.h" #include "init.h" +#include "partition_filter.h" #include "relation_info.h" #include "access/htup_details.h" @@ -26,12 +27,22 @@ #include "nodes/makefuncs.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/memutils.h" #include "utils/rel.h" #include "utils/rls.h" #include "libpq/libpq.h" +static uint64 PathmanCopyFrom(CopyState cstate, + Relation parent_rel, + List *range_table, + bool old_protocol); +static ResultRelInfoHolder *select_partition_for_copy(const PartRelationInfo *prel, + ResultPartsStorage *parts_storage, + Datum value, EState *estate); + + /* * Is pg_pathman supposed to handle this COPY stmt? */ @@ -283,6 +294,11 @@ PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) /* COPY ... FROM ... */ if (is_from) { + bool is_old_protocol; + + is_old_protocol = PG_PROTOCOL_MAJOR(FrontendProtocol) < 3 && + stmt->filename == NULL; + /* There should be relation */ Assert(rel); @@ -293,9 +309,7 @@ PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) cstate = BeginCopyFrom(rel, stmt->filename, stmt->is_program, stmt->attlist, stmt->options); - /* TODO: copy files to DB */ - heap_close(rel, NoLock); - *processed = 0; + *processed = PathmanCopyFrom(cstate, rel, range_table, is_old_protocol); EndCopyFrom(cstate); } /* COPY ... TO ... */ @@ -314,4 +328,233 @@ PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) /* Call standard DoCopy using a new CopyStmt */ DoCopy(&modified_copy_stmt, queryString, processed); } + + /* + * Close the relation. If reading, we can release the AccessShareLock we + * got; if writing, we should hold the lock until end of transaction to + * ensure that updates will be committed before lock is released. + */ + if (rel != NULL) + heap_close(rel, (is_from ? NoLock : AccessShareLock)); +} + +/* + * Copy FROM file to relation. + */ +static uint64 +PathmanCopyFrom(CopyState cstate, Relation parent_rel, + List *range_table, bool old_protocol) +{ + HeapTuple tuple; + TupleDesc tupDesc; + Datum *values; + bool *nulls; + + ResultPartsStorage parts_storage; + ResultRelInfo *parent_result_rel; + + EState *estate = CreateExecutorState(); /* for ExecConstraints() */ + ExprContext *econtext; + TupleTableSlot *myslot; + MemoryContext oldcontext = CurrentMemoryContext; + + uint64 processed = 0; + + + tupDesc = RelationGetDescr(parent_rel); + + parent_result_rel = makeNode(ResultRelInfo); + InitResultRelInfo(parent_result_rel, + parent_rel, + 1, /* dummy rangetable index */ + 0); + ExecOpenIndices(parent_result_rel, false); + + estate->es_result_relations = parent_result_rel; + estate->es_num_result_relations = 1; + estate->es_result_relation_info = parent_result_rel; + estate->es_range_table = range_table; + + /* Initialize ResultPartsStorage */ + init_result_parts_storage(&parts_storage, estate, false, + ResultPartsStorageStandard, + check_acl_for_partition, NULL); + parts_storage.saved_rel_info = parent_result_rel; + + /* Set up a tuple slot too */ + myslot = ExecInitExtraTupleSlot(estate); + ExecSetSlotDescriptor(myslot, tupDesc); + /* Triggers might need a slot as well */ + estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate); + + /* Prepare to catch AFTER triggers. */ + AfterTriggerBeginQuery(); + + /* + * Check BEFORE STATEMENT insertion triggers. It's debatable whether we + * should do this for COPY, since it's not really an "INSERT" statement as + * such. However, executing these triggers maintains consistency with the + * EACH ROW triggers that we already fire on COPY. + */ + ExecBSInsertTriggers(estate, parent_result_rel); + + values = (Datum *) palloc(tupDesc->natts * sizeof(Datum)); + nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); + + econtext = GetPerTupleExprContext(estate); + + for (;;) + { + TupleTableSlot *slot; + bool skip_tuple; + Oid tuple_oid = InvalidOid; + + const PartRelationInfo *prel; + ResultRelInfoHolder *rri_holder_child; + ResultRelInfo *child_result_rel; + + CHECK_FOR_INTERRUPTS(); + + ResetPerTupleExprContext(estate); + + /* Fetch PartRelationInfo for parent relation */ + prel = get_pathman_relation_info(RelationGetRelid(parent_rel)); + + /* Switch into its memory context */ + MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + + if (!NextCopyFrom(cstate, econtext, values, nulls, &tuple_oid)) + break; + + /* Search for a matching partition */ + rri_holder_child = select_partition_for_copy(prel, &parts_storage, + values[prel->attnum - 1], + estate); + child_result_rel = rri_holder_child->result_rel_info; + estate->es_result_relation_info = child_result_rel; + + /* And now we can form the input tuple. */ + tuple = heap_form_tuple(tupDesc, values, nulls); + if (tuple_oid != InvalidOid) + HeapTupleSetOid(tuple, tuple_oid); + + /* + * Constraints might reference the tableoid column, so initialize + * t_tableOid before evaluating them. + */ + tuple->t_tableOid = RelationGetRelid(child_result_rel->ri_RelationDesc); + + /* Triggers and stuff need to be invoked in query context. */ + MemoryContextSwitchTo(oldcontext); + + /* Place tuple in tuple slot --- but slot shouldn't free it */ + slot = myslot; + ExecStoreTuple(tuple, slot, InvalidBuffer, false); + + skip_tuple = false; + + /* BEFORE ROW INSERT Triggers */ + if (child_result_rel->ri_TrigDesc && + child_result_rel->ri_TrigDesc->trig_insert_before_row) + { + slot = ExecBRInsertTriggers(estate, child_result_rel, slot); + + if (slot == NULL) /* "do nothing" */ + skip_tuple = true; + else /* trigger might have changed tuple */ + tuple = ExecMaterializeSlot(slot); + } + + /* Proceed if we still have a tuple */ + if (!skip_tuple) + { + List *recheckIndexes = NIL; + + /* Check the constraints of the tuple */ + if (child_result_rel->ri_RelationDesc->rd_att->constr) + ExecConstraints(child_result_rel, slot, estate); + + /* OK, store the tuple and create index entries for it */ + simple_heap_insert(child_result_rel->ri_RelationDesc, tuple); + + if (child_result_rel->ri_NumIndices > 0) + recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), + estate, false, NULL, + NIL); + + /* AFTER ROW INSERT Triggers */ + ExecARInsertTriggers(estate, child_result_rel, tuple, + recheckIndexes); + + list_free(recheckIndexes); + + /* + * We count only tuples not suppressed by a BEFORE INSERT trigger; + * this is the same definition used by execMain.c for counting + * tuples inserted by an INSERT command. + */ + processed++; + } + } + + MemoryContextSwitchTo(oldcontext); + + /* + * In the old protocol, tell pqcomm that we can process normal protocol + * messages again. + */ + if (old_protocol) + pq_endmsgread(); + + /* Execute AFTER STATEMENT insertion triggers */ + ExecASInsertTriggers(estate, parent_result_rel); + + /* Handle queued AFTER triggers */ + AfterTriggerEndQuery(estate); + + pfree(values); + pfree(nulls); + + ExecResetTupleTable(estate->es_tupleTable, false); + fini_result_parts_storage(&parts_storage); + + FreeExecutorState(estate); + + return processed; +} + +/* + * Smart wrapper for scan_result_parts_storage(). + */ +static ResultRelInfoHolder * +select_partition_for_copy(const PartRelationInfo *prel, + ResultPartsStorage *parts_storage, + Datum value, EState *estate) +{ + ExprContext *econtext; + ResultRelInfoHolder *rri_holder; + Oid selected_partid = InvalidOid; + Oid *parts; + int nparts; + + econtext = GetPerTupleExprContext(estate); + + /* Search for matching partitions using partitioned column */ + parts = find_partitions_for_value(value, prel, econtext, &nparts); + + if (nparts > 1) + elog(ERROR, "PATHMAN COPY selected more than one partition"); + else if (nparts == 0) + elog(ERROR, + "There is no suitable partition for key '%s'", + datum_to_cstring(value, prel->atttype)); + else + selected_partid = parts[0]; + + /* Replace parent table with a suitable partition */ + MemoryContextSwitchTo(estate->es_query_cxt); + rri_holder = scan_result_parts_storage(selected_partid, parts_storage); + MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + + return rri_holder; } diff --git a/src/partition_filter.c b/src/partition_filter.c index a7dbcc9389..424ef208c2 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -67,24 +67,64 @@ add_partition_filters(List *rtable, Plan *plan) plan_tree_walker(plan, partition_filter_visitor, rtable); } +/* + * This callback adds a new RangeTblEntry once + * partition is opened for an INSERT. + */ +void +check_acl_for_partition(EState *estate, + ResultRelInfoHolder *rri_holder, + void *arg) +{ + RangeTblEntry *rte; + Relation part_rel = rri_holder->result_rel_info->ri_RelationDesc; + + rte = makeNode(RangeTblEntry); + + rte->rtekind = RTE_RELATION; + rte->relid = rri_holder->partid; + rte->relkind = part_rel->rd_rel->relkind; + rte->requiredPerms = ACL_INSERT; + + /* Check permissions for current partition */ + ExecCheckRTPerms(list_make1(rte), true); + + /* TODO: append RTE to estate->es_range_table */ +} + /* * Initialize ResultPartsStorage (hash table etc). */ void -init_result_parts_storage(ResultPartsStorage *parts_storage) +init_result_parts_storage(ResultPartsStorage *parts_storage, + EState *estate, + bool speculative_inserts, + Size table_entry_size, + on_new_rri_holder on_new_rri_holder_cb, + void *on_new_rri_holder_cb_arg) { HASHCTL *result_rels_table_config = &parts_storage->result_rels_table_config; memset(result_rels_table_config, 0, sizeof(HASHCTL)); result_rels_table_config->keysize = sizeof(Oid); - result_rels_table_config->entrysize = sizeof(ResultRelInfoHolder); + + /* Use sizeof(ResultRelInfoHolder) if table_entry_size is 0 */ + if (table_entry_size == ResultPartsStorageStandard) + result_rels_table_config->entrysize = sizeof(ResultRelInfoHolder); + else + result_rels_table_config->entrysize = table_entry_size; parts_storage->result_rels_table = hash_create("ResultRelInfo storage", 10, result_rels_table_config, HASH_ELEM | HASH_BLOBS); - + parts_storage->estate = estate; + parts_storage->es_alloc_result_rels = estate->es_num_result_relations; + parts_storage->speculative_inserts = speculative_inserts; parts_storage->saved_rel_info = NULL; + + parts_storage->on_new_rri_holder_callback = on_new_rri_holder_cb; + parts_storage->callback_arg = on_new_rri_holder_cb_arg; } /* @@ -94,13 +134,13 @@ void fini_result_parts_storage(ResultPartsStorage *parts_storage) { HASH_SEQ_STATUS stat; - ResultRelInfoHolder *rri_handle; /* ResultRelInfo holder */ + ResultRelInfoHolder *rri_holder; /* ResultRelInfo holder */ hash_seq_init(&stat, parts_storage->result_rels_table); - while ((rri_handle = (ResultRelInfoHolder *) hash_seq_search(&stat)) != NULL) + while ((rri_holder = (ResultRelInfoHolder *) hash_seq_search(&stat)) != NULL) { - ExecCloseIndices(rri_handle->result_rel_info); - heap_close(rri_handle->result_rel_info->ri_RelationDesc, + ExecCloseIndices(rri_holder->result_rel_info); + heap_close(rri_holder->result_rel_info->ri_RelationDesc, RowExclusiveLock); } hash_destroy(parts_storage->result_rels_table); @@ -109,32 +149,34 @@ fini_result_parts_storage(ResultPartsStorage *parts_storage) /* * Find a ResultRelInfo for the partition using ResultPartsStorage. */ -ResultRelInfo * -scan_result_parts_storage(Oid partid, - ResultPartsStorage *storage, - bool speculative_insertion) +ResultRelInfoHolder * +scan_result_parts_storage(Oid partid, ResultPartsStorage *storage) { #define CopyToResultRelInfo(field_name) \ - ( resultRelInfo->field_name = storage->saved_rel_info->field_name ) + ( part_result_rel_info->field_name = storage->saved_rel_info->field_name ) - ResultRelInfoHolder *resultRelInfoHolder; + ResultRelInfoHolder *rri_holder; bool found; - resultRelInfoHolder = hash_search(storage->result_rels_table, - (const void *) &partid, - HASH_ENTER, &found); + rri_holder = hash_search(storage->result_rels_table, + (const void *) &partid, + HASH_ENTER, &found); /* If not found, create & cache new ResultRelInfo */ if (!found) { - ResultRelInfo *resultRelInfo = (ResultRelInfo *) palloc(sizeof(ResultRelInfo)); + ResultRelInfo *part_result_rel_info = makeNode(ResultRelInfo); - InitResultRelInfo(resultRelInfo, + InitResultRelInfo(part_result_rel_info, heap_open(partid, RowExclusiveLock), 0, 0); /* TODO: select suitable options */ - ExecOpenIndices(resultRelInfo, speculative_insertion); + ExecOpenIndices(part_result_rel_info, storage->speculative_inserts); + + /* Check that 'saved_rel_info' is set */ + if (!storage->saved_rel_info) + elog(ERROR, "ResultPartsStorage contains no saved_rel_info"); /* Copy necessary fields from saved ResultRelInfo */ CopyToResultRelInfo(ri_WithCheckOptions); @@ -145,18 +187,23 @@ scan_result_parts_storage(Oid partid, CopyToResultRelInfo(ri_onConflictSetWhere); /* ri_ConstraintExprs will be initialized by ExecRelCheck() */ - resultRelInfo->ri_ConstraintExprs = NULL; + part_result_rel_info->ri_ConstraintExprs = NULL; /* Make 'range table index' point to the parent relation */ - resultRelInfo->ri_RangeTableIndex = + part_result_rel_info->ri_RangeTableIndex = storage->saved_rel_info->ri_RangeTableIndex; /* Now fill the ResultRelInfo holder */ - resultRelInfoHolder->partid = partid; - resultRelInfoHolder->result_rel_info = resultRelInfo; + rri_holder->partid = partid; + rri_holder->result_rel_info = part_result_rel_info; + + /* Call on_new_rri_holder_callback() if needed */ + if (storage->on_new_rri_holder_callback) + storage->on_new_rri_holder_callback(storage->estate, rri_holder, + storage->callback_arg); } - return resultRelInfoHolder->result_rel_info; + return rri_holder; } @@ -255,7 +302,12 @@ partition_filter_begin(CustomScanState *node, EState *estate, int eflags) /* It's convenient to store PlanState in 'custom_ps' */ node->custom_ps = list_make1(ExecInitNode(state->subplan, estate, eflags)); - init_result_parts_storage(&state->result_parts); + /* Init ResultRelInfo cache */ + init_result_parts_storage(&state->result_parts, estate, + state->on_conflict_action != ONCONFLICT_NONE, + ResultPartsStorageStandard, + check_acl_for_partition, NULL); + state->warning_triggered = false; } @@ -281,10 +333,10 @@ partition_filter_exec(CustomScanState *node) MemoryContext old_cxt; + ResultRelInfoHolder *result_part_holder; + Oid selected_partid; int nparts; Oid *parts; - Oid selected_partid; - bool speculative_insert; bool isnull; Datum value; @@ -344,10 +396,9 @@ partition_filter_exec(CustomScanState *node) /* Replace parent table with a suitable partition */ old_cxt = MemoryContextSwitchTo(estate->es_query_cxt); - speculative_insert = state->on_conflict_action != ONCONFLICT_NONE; - estate->es_result_relation_info = scan_result_parts_storage(selected_partid, - &state->result_parts, - speculative_insert); + result_part_holder = scan_result_parts_storage(selected_partid, + &state->result_parts); + estate->es_result_relation_info = result_part_holder->result_rel_info; MemoryContextSwitchTo(old_cxt); return slot; diff --git a/src/partition_filter.h b/src/partition_filter.h index afe238041c..e3d4fcca30 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -24,20 +24,40 @@ */ typedef struct { - Oid partid; - ResultRelInfo *result_rel_info; + Oid partid; /* partition's relid */ + ResultRelInfo *result_rel_info; /* cached ResultRelInfo */ } ResultRelInfoHolder; +/* + * Callback to be fired at rri_holder creation. + */ +typedef void (*on_new_rri_holder)(EState *estate, + ResultRelInfoHolder *rri_holder, + void *arg); + /* * Cached ResultRelInfos of partitions. */ typedef struct { - ResultRelInfo *saved_rel_info; + ResultRelInfo *saved_rel_info; /* original ResultRelInfo (parent) */ HTAB *result_rels_table; HASHCTL result_rels_table_config; + + bool speculative_inserts; /* for ExecOpenIndices() */ + + on_new_rri_holder on_new_rri_holder_callback; + void *callback_arg; + + EState *estate; + int es_alloc_result_rels; /* number of allocated result rels */ } ResultPartsStorage; +/* + * Standard size of ResultPartsStorage entry. + */ +#define ResultPartsStorageStandard 0 + typedef struct { CustomScanState css; @@ -45,10 +65,10 @@ typedef struct Oid partitioned_table; OnConflictAction on_conflict_action; - Plan *subplan; /* proxy variable to store subplan */ - ResultPartsStorage result_parts; + Plan *subplan; /* proxy variable to store subplan */ + ResultPartsStorage result_parts; /* partition ResultRelInfo cache */ - bool warning_triggered; + bool warning_triggered; /* WARNING message counter */ } PartitionFilterState; @@ -61,15 +81,22 @@ extern CustomExecMethods partition_filter_exec_methods; void init_partition_filter_static_data(void); void add_partition_filters(List *rtable, Plan *plan); +void check_acl_for_partition(EState *estate, + ResultRelInfoHolder *rri_holder, + void *arg); /* ResultPartsStorage init\fini\scan function */ -void init_result_parts_storage(ResultPartsStorage *parts_storage); +void init_result_parts_storage(ResultPartsStorage *parts_storage, + EState *estate, + bool speculative_inserts, + Size table_entry_size, + on_new_rri_holder on_new_rri_holder_cb, + void *on_new_rri_holder_cb_arg); void fini_result_parts_storage(ResultPartsStorage *parts_storage); -ResultRelInfo * scan_result_parts_storage(Oid partid, - ResultPartsStorage *storage, - bool speculative_insertion); +ResultRelInfoHolder * scan_result_parts_storage(Oid partid, + ResultPartsStorage *storage); -/* */ +/* Find suitable partition using 'value' */ Oid *find_partitions_for_value(Datum value, const PartRelationInfo *prel, ExprContext *econtext, int *nparts); From 9fd99a37fe53e27b17283cc898e27974ac8541dc Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 16 Sep 2016 23:36:43 +0300 Subject: [PATCH 140/184] add ResultRelInfos for partitions to estate->es_result_relations with append_rri_to_estate(), add 'close_rels' param to fini_result_parts_storage(), don't close partitions till xact end after INSERT, fixes --- src/copy_stmt_hooking.c | 4 +- src/nodes_common.c | 22 ++++----- src/partition_filter.c | 103 ++++++++++++++++++++++++++++------------ src/partition_filter.h | 10 ++-- src/pg_pathman.c | 1 + 5 files changed, 94 insertions(+), 46 deletions(-) diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c index 7a66d8d76a..ee5220d493 100644 --- a/src/copy_stmt_hooking.c +++ b/src/copy_stmt_hooking.c @@ -516,7 +516,9 @@ PathmanCopyFrom(CopyState cstate, Relation parent_rel, pfree(nulls); ExecResetTupleTable(estate->es_tupleTable, false); - fini_result_parts_storage(&parts_storage); + + /* Close partitions and destroy hash table */ + fini_result_parts_storage(&parts_storage, true); FreeExecutorState(estate); diff --git a/src/nodes_common.c b/src/nodes_common.c index 7a9b35fd22..f75bd2f123 100644 --- a/src/nodes_common.c +++ b/src/nodes_common.c @@ -84,8 +84,8 @@ transform_plans_into_states(RuntimeAppendState *scan_state, static ChildScanCommon * select_required_plans(HTAB *children_table, Oid *parts, int nparts, int *nres) { - int allocated = INITIAL_ALLOC_NUM; - int used = 0; + uint32 allocated = INITIAL_ALLOC_NUM, + used = 0; ChildScanCommon *result; int i; @@ -101,7 +101,7 @@ select_required_plans(HTAB *children_table, Oid *parts, int nparts, int *nres) if (allocated <= used) { - allocated *= ALLOC_EXP; + allocated = allocated * ALLOC_EXP + 1; result = repalloc(result, allocated * sizeof(ChildScanCommon)); } @@ -289,8 +289,8 @@ get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel, bool include_parent) { ListCell *range_cell; - uint32 allocated = INITIAL_ALLOC_NUM; - uint32 used = 0; + uint32 allocated = INITIAL_ALLOC_NUM, + used = 0; Oid *result = (Oid *) palloc(allocated * sizeof(Oid)); Oid *children = PrelGetChildrenArray(prel); @@ -310,7 +310,7 @@ get_partition_oids(List *ranges, int *n, const PartRelationInfo *prel, { if (allocated <= used) { - allocated *= ALLOC_EXP; + allocated = allocated * ALLOC_EXP + 1; result = repalloc(result, allocated * sizeof(Oid)); } @@ -595,10 +595,10 @@ explain_append_common(CustomScanState *node, HTAB *children_table, ExplainState /* Construct excess PlanStates */ if (!es->analyze) { - int allocated = INITIAL_ALLOC_NUM; - int used = 0; - ChildScanCommon *custom_ps; - ChildScanCommon child; + uint32 allocated = INITIAL_ALLOC_NUM, + used = 0; + ChildScanCommon *custom_ps, + child; HASH_SEQ_STATUS seqstat; int i; @@ -614,7 +614,7 @@ explain_append_common(CustomScanState *node, HTAB *children_table, ExplainState { if (allocated <= used) { - allocated *= ALLOC_EXP; + allocated = allocated * ALLOC_EXP + 1; custom_ps = repalloc(custom_ps, allocated * sizeof(ChildScanCommon)); } diff --git a/src/partition_filter.c b/src/partition_filter.c index 424ef208c2..a000444898 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -19,6 +19,9 @@ #include "utils/lsyscache.h" +#define ALLOC_EXP 2 + + bool pg_pathman_enable_partition_filter = true; CustomScanMethods partition_filter_plan_methods; @@ -27,6 +30,7 @@ CustomExecMethods partition_filter_exec_methods; static void partition_filter_visitor(Plan *plan, void *context); static List * pfilter_build_tlist(List *tlist); +static int append_rri_to_estate(EState *estate, ResultRelInfo *rri, int cur_allocated); void @@ -86,7 +90,7 @@ check_acl_for_partition(EState *estate, rte->relkind = part_rel->rd_rel->relkind; rte->requiredPerms = ACL_INSERT; - /* Check permissions for current partition */ + /* FIXME: Check permissions for partition */ ExecCheckRTPerms(list_make1(rte), true); /* TODO: append RTE to estate->es_range_table */ @@ -125,24 +129,35 @@ init_result_parts_storage(ResultPartsStorage *parts_storage, parts_storage->on_new_rri_holder_callback = on_new_rri_holder_cb; parts_storage->callback_arg = on_new_rri_holder_cb_arg; + + /* Partitions must remain locked till transaction's end */ + parts_storage->head_open_lock_mode = RowExclusiveLock; + parts_storage->heap_close_lock_mode = NoLock; } /* * Free ResultPartsStorage (close relations etc). */ void -fini_result_parts_storage(ResultPartsStorage *parts_storage) +fini_result_parts_storage(ResultPartsStorage *parts_storage, bool close_rels) { - HASH_SEQ_STATUS stat; - ResultRelInfoHolder *rri_holder; /* ResultRelInfo holder */ - - hash_seq_init(&stat, parts_storage->result_rels_table); - while ((rri_holder = (ResultRelInfoHolder *) hash_seq_search(&stat)) != NULL) + /* Close partitions and their indices if asked to */ + if (close_rels) { - ExecCloseIndices(rri_holder->result_rel_info); - heap_close(rri_holder->result_rel_info->ri_RelationDesc, - RowExclusiveLock); + HASH_SEQ_STATUS stat; + ResultRelInfoHolder *rri_holder; /* ResultRelInfo holder */ + + hash_seq_init(&stat, parts_storage->result_rels_table); + while ((rri_holder = (ResultRelInfoHolder *) hash_seq_search(&stat)) != NULL) + { + ExecCloseIndices(rri_holder->result_rel_info); + + heap_close(rri_holder->result_rel_info->ri_RelationDesc, + parts_storage->heap_close_lock_mode); + } } + + /* Finally destroy hash table */ hash_destroy(parts_storage->result_rels_table); } @@ -150,15 +165,15 @@ fini_result_parts_storage(ResultPartsStorage *parts_storage) * Find a ResultRelInfo for the partition using ResultPartsStorage. */ ResultRelInfoHolder * -scan_result_parts_storage(Oid partid, ResultPartsStorage *storage) +scan_result_parts_storage(Oid partid, ResultPartsStorage *parts_storage) { #define CopyToResultRelInfo(field_name) \ - ( part_result_rel_info->field_name = storage->saved_rel_info->field_name ) + ( part_result_rel_info->field_name = parts_storage->saved_rel_info->field_name ) ResultRelInfoHolder *rri_holder; bool found; - rri_holder = hash_search(storage->result_rels_table, + rri_holder = hash_search(parts_storage->result_rels_table, (const void *) &partid, HASH_ENTER, &found); @@ -167,16 +182,16 @@ scan_result_parts_storage(Oid partid, ResultPartsStorage *storage) { ResultRelInfo *part_result_rel_info = makeNode(ResultRelInfo); + /* Check that 'saved_rel_info' is set */ + if (!parts_storage->saved_rel_info) + elog(ERROR, "ResultPartsStorage contains no saved_rel_info"); + InitResultRelInfo(part_result_rel_info, - heap_open(partid, RowExclusiveLock), - 0, + heap_open(partid, parts_storage->head_open_lock_mode), + parts_storage->saved_rel_info->ri_RangeTableIndex, 0); /* TODO: select suitable options */ - ExecOpenIndices(part_result_rel_info, storage->speculative_inserts); - - /* Check that 'saved_rel_info' is set */ - if (!storage->saved_rel_info) - elog(ERROR, "ResultPartsStorage contains no saved_rel_info"); + ExecOpenIndices(part_result_rel_info, parts_storage->speculative_inserts); /* Copy necessary fields from saved ResultRelInfo */ CopyToResultRelInfo(ri_WithCheckOptions); @@ -189,18 +204,21 @@ scan_result_parts_storage(Oid partid, ResultPartsStorage *storage) /* ri_ConstraintExprs will be initialized by ExecRelCheck() */ part_result_rel_info->ri_ConstraintExprs = NULL; - /* Make 'range table index' point to the parent relation */ - part_result_rel_info->ri_RangeTableIndex = - storage->saved_rel_info->ri_RangeTableIndex; - /* Now fill the ResultRelInfo holder */ rri_holder->partid = partid; rri_holder->result_rel_info = part_result_rel_info; + /* Add ResultRelInfo to storage->es_alloc_result_rels */ + parts_storage->es_alloc_result_rels = + append_rri_to_estate(parts_storage->estate, + part_result_rel_info, + parts_storage->es_alloc_result_rels); + /* Call on_new_rri_holder_callback() if needed */ - if (storage->on_new_rri_holder_callback) - storage->on_new_rri_holder_callback(storage->estate, rri_holder, - storage->callback_arg); + if (parts_storage->on_new_rri_holder_callback) + parts_storage->on_new_rri_holder_callback(parts_storage->estate, + rri_holder, + parts_storage->callback_arg); } return rri_holder; @@ -412,8 +430,8 @@ partition_filter_end(CustomScanState *node) { PartitionFilterState *state = (PartitionFilterState *) node; - /* Close cached relations */ - fini_result_parts_storage(&state->result_parts); + /* Executor will close rels via estate->es_result_relations */ + fini_result_parts_storage(&state->result_parts, false); Assert(list_length(node->custom_ps) == 1); ExecEndNode((PlanState *) linitial(node->custom_ps)); @@ -432,6 +450,29 @@ partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *e /* Nothing to do here now */ } +static int +append_rri_to_estate(EState *estate, ResultRelInfo *rri, int cur_allocated) +{ + int result_rels_allocated = cur_allocated; + + if (result_rels_allocated <= estate->es_num_result_relations) + { + ResultRelInfo *rri_array = estate->es_result_relations; + + result_rels_allocated = result_rels_allocated * ALLOC_EXP + 1; + estate->es_result_relations = palloc(result_rels_allocated * + sizeof(ResultRelInfo)); + memcpy(estate->es_result_relations, + rri_array, + estate->es_num_result_relations * sizeof(ResultRelInfo)); + } + + /* Append ResultRelInfo to 'es_result_relations' array */ + estate->es_result_relations[estate->es_num_result_relations++] = *rri; + + return result_rels_allocated; +} + /* * Build partition filter's target list pointing to subplan tuple's elements */ @@ -465,9 +506,9 @@ pfilter_build_tlist(List *tlist) } /* - * Add partition filters to ModifyTable node's children + * Add partition filters to ModifyTable node's children. * - * 'context' should point to the PlannedStmt->rtable + * 'context' should point to the PlannedStmt->rtable. */ static void partition_filter_visitor(Plan *plan, void *context) diff --git a/src/partition_filter.h b/src/partition_filter.h index e3d4fcca30..370e587d89 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -12,7 +12,7 @@ #define RUNTIME_INSERT_H #include "relation_info.h" -#include "pathman.h" +#include "utils.h" #include "postgres.h" #include "commands/explain.h" @@ -51,6 +51,9 @@ typedef struct EState *estate; int es_alloc_result_rels; /* number of allocated result rels */ + + LOCKMODE head_open_lock_mode; + LOCKMODE heap_close_lock_mode; } ResultPartsStorage; /* @@ -68,7 +71,7 @@ typedef struct Plan *subplan; /* proxy variable to store subplan */ ResultPartsStorage result_parts; /* partition ResultRelInfo cache */ - bool warning_triggered; /* WARNING message counter */ + bool warning_triggered; /* warning message counter */ } PartitionFilterState; @@ -92,7 +95,8 @@ void init_result_parts_storage(ResultPartsStorage *parts_storage, Size table_entry_size, on_new_rri_holder on_new_rri_holder_cb, void *on_new_rri_holder_cb_arg); -void fini_result_parts_storage(ResultPartsStorage *parts_storage); +void fini_result_parts_storage(ResultPartsStorage *parts_storage, + bool close_rels); ResultRelInfoHolder * scan_result_parts_storage(Oid partid, ResultPartsStorage *storage); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 570e98bbbc..8d8412d9b1 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -406,6 +406,7 @@ append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, PlanRowMark *child_rowmark; AttrNumber i; + /* FIXME: acquire a suitable lock on partition */ newrelation = heap_open(childOid, NoLock); /* From 95016ee69d7801af15fadd64c8f1f6a8cdc2f689 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 16 Sep 2016 23:52:20 +0300 Subject: [PATCH 141/184] make clang-analyzer happy about 'rel' --- src/copy_stmt_hooking.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c index ee5220d493..807eb9c6c2 100644 --- a/src/copy_stmt_hooking.c +++ b/src/copy_stmt_hooking.c @@ -294,13 +294,11 @@ PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) /* COPY ... FROM ... */ if (is_from) { - bool is_old_protocol; - - is_old_protocol = PG_PROTOCOL_MAJOR(FrontendProtocol) < 3 && - stmt->filename == NULL; + bool is_old_protocol = PG_PROTOCOL_MAJOR(FrontendProtocol) < 3 && + stmt->filename == NULL; /* There should be relation */ - Assert(rel); + if (!rel) elog(FATAL, "No relation for PATHMAN COPY FROM"); /* check read-only transaction and parallel mode */ if (XactReadOnly && rel && !rel->rd_islocaltemp) From fd0cdeb1544f0ffeef6e5ce8d1703c7ecf86ee4a Mon Sep 17 00:00:00 2001 From: Maksim Milyutin Date: Mon, 19 Sep 2016 17:24:51 +0300 Subject: [PATCH 142/184] Replace multiplication ops on multiple additions --- range.sql | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/range.sql b/range.sql index d837df0c10..1f116c0bad 100644 --- a/range.sql +++ b/range.sql @@ -91,6 +91,7 @@ DECLARE v_rows_count INTEGER; v_max p_start_value%TYPE; v_cur_value p_start_value%TYPE := p_start_value; + p_end_value p_start_value%TYPE; i INTEGER; BEGIN @@ -132,12 +133,19 @@ BEGIN * and specifies partition count as 0 then do not check boundaries */ IF p_count != 0 THEN + /* compute right bound of partitioning through additions */ + p_end_value := p_start_value; + FOR i IN 1..p_count + LOOP + p_end_value := p_end_value + p_interval; + END LOOP; + /* Check boundaries */ EXECUTE format('SELECT @extschema@.check_boundaries(''%s'', ''%s'', ''%s'', ''%s''::%s)', parent_relid, p_attribute, p_start_value, - p_start_value + p_interval * p_count, + p_end_value, pg_typeof(p_start_value)); END IF; @@ -190,6 +198,7 @@ DECLARE v_rows_count INTEGER; v_max p_start_value%TYPE; v_cur_value p_start_value%TYPE := p_start_value; + p_end_value p_start_value%TYPE; i INTEGER; BEGIN @@ -235,11 +244,18 @@ BEGIN * and specifies partition count as 0 then do not check boundaries */ IF p_count != 0 THEN + /* compute right bound of partitioning through additions */ + p_end_value := p_start_value; + FOR i IN 1..p_count + LOOP + p_end_value := p_end_value + p_interval; + END LOOP; + /* check boundaries */ PERFORM @extschema@.check_boundaries(parent_relid, p_attribute, p_start_value, - p_start_value + p_interval * p_count); + p_end_value); END IF; /* Create sequence for child partitions names */ From 348c50754a3866346c748f6a93a9a0f811b270df Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 21 Sep 2016 14:46:25 +0300 Subject: [PATCH 143/184] remove useless rel check --- src/copy_stmt_hooking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c index 807eb9c6c2..ab8db87da7 100644 --- a/src/copy_stmt_hooking.c +++ b/src/copy_stmt_hooking.c @@ -301,7 +301,7 @@ PathmanDoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) if (!rel) elog(FATAL, "No relation for PATHMAN COPY FROM"); /* check read-only transaction and parallel mode */ - if (XactReadOnly && rel && !rel->rd_islocaltemp) + if (XactReadOnly && !rel->rd_islocaltemp) PreventCommandIfReadOnly("PATHMAN COPY FROM"); PreventCommandIfParallelMode("PATHMAN COPY FROM"); From 6ce63549fb2c41c38419146f8f8d4a96074b3a8b Mon Sep 17 00:00:00 2001 From: Maksim Milyutin Date: Wed, 21 Sep 2016 20:42:34 +0300 Subject: [PATCH 144/184] Migrate extansion to 9.6 --- src/hooks.c | 4 ++-- src/init.c | 1 + src/nodes_common.h | 1 + src/partition_filter.h | 2 +- src/pg_pathman.c | 28 ++++++++++++++++------------ src/runtime_merge_append.c | 31 ++++++++++++++++++++++++++----- 6 files changed, 47 insertions(+), 20 deletions(-) diff --git a/src/hooks.c b/src/hooks.c index 6db5434be9..8c3afe39f0 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -169,8 +169,8 @@ pathman_join_pathlist_hook(PlannerInfo *root, */ nest_path->path.rows = get_parameterized_joinrel_size(root, joinrel, - outer->rows, - inner->rows, + outer, + inner, extra->sjinfo, filtered_joinclauses); diff --git a/src/init.c b/src/init.c index 60eff1adb3..17a23b44e5 100644 --- a/src/init.c +++ b/src/init.c @@ -22,6 +22,7 @@ #include "access/sysattr.h" #include "catalog/indexing.h" #include "catalog/pg_constraint.h" +#include "catalog/pg_constraint_fn.h" #include "catalog/pg_inherits.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_type.h" diff --git a/src/nodes_common.h b/src/nodes_common.h index 51087bfab2..30693dd8e6 100644 --- a/src/nodes_common.h +++ b/src/nodes_common.h @@ -16,6 +16,7 @@ #include "postgres.h" #include "commands/explain.h" #include "optimizer/planner.h" +#include "nodes/extensible.h" /* diff --git a/src/partition_filter.h b/src/partition_filter.h index d16cb0c0d4..cbbc61ed5d 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -17,7 +17,7 @@ #include "postgres.h" #include "commands/explain.h" #include "optimizer/planner.h" - +#include "nodes/extensible.h" typedef struct { diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 1c6e8bf5c8..d05cc3a06a 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -376,11 +376,11 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Assert(childrel->rows > 0); parent_rows += childrel->rows; - parent_size += childrel->width * childrel->rows; + parent_size += childrel->reltarget->width * childrel->rows; } rel->rows = parent_rows; - rel->width = rint(parent_size / parent_rows); + rel->reltarget->width = rint(parent_size / parent_rows); rel->tuples = parent_rows; } @@ -423,15 +423,19 @@ append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, childrel = build_simple_rel(root, childRTindex, RELOPT_OTHER_MEMBER_REL); /* Copy targetlist */ - childrel->reltargetlist = NIL; - foreach(lc, rel->reltargetlist) + childrel->reltarget->exprs = NIL; + childrel->reltarget->sortgrouprefs = (Index *) palloc( + list_length(rel->reltarget->exprs) * sizeof(Index)); + foreach(lc, rel->reltarget->exprs) { Node *new_target; node = (Node *) lfirst(lc); new_target = copyObject(node); change_varnos(new_target, rel->relid, childrel->relid); - childrel->reltargetlist = lappend(childrel->reltargetlist, new_target); + childrel->reltarget->exprs = lappend(childrel->reltarget->exprs, + new_target); + /* childrel->reltarget->sortgrouprefs[i++] = */ } /* Copy attr_needed & attr_widths */ @@ -1640,7 +1644,7 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) * Test any partial indexes of rel for applicability. We must do this * first since partial unique indexes can affect size estimates. */ - check_partial_indexes(root, rel); + check_index_predicates(root, rel); /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); @@ -1854,7 +1858,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, * if we have zero or one live subpath due to constraint exclusion.) */ if (subpaths_valid) - add_path(rel, (Path *) create_append_path(rel, subpaths, NULL)); + add_path(rel, (Path *) create_append_path(rel, subpaths, NULL, 0)); /* * Also build unparameterized MergeAppend paths based on the collected @@ -1905,7 +1909,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, if (subpaths_valid) add_path(rel, (Path *) - create_append_path(rel, subpaths, required_outer)); + create_append_path(rel, subpaths, required_outer, 0)); } } @@ -2083,13 +2087,13 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, { Path *path; - path = (Path *) create_append_path(rel, startup_subpaths, NULL); + path = (Path *) create_append_path(rel, startup_subpaths, NULL, 0); path->pathkeys = pathkeys; add_path(rel, path); if (startup_neq_total) { - path = (Path *) create_append_path(rel, total_subpaths, NULL); + path = (Path *) create_append_path(rel, total_subpaths, NULL, 0); path->pathkeys = pathkeys; add_path(rel, path); } @@ -2103,14 +2107,14 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, Path *path; path = (Path *) create_append_path(rel, - list_reverse(startup_subpaths), NULL); + list_reverse(startup_subpaths), NULL, 0); path->pathkeys = pathkeys; add_path(rel, path); if (startup_neq_total) { path = (Path *) create_append_path(rel, - list_reverse(total_subpaths), NULL); + list_reverse(total_subpaths), NULL, 0); path->pathkeys = pathkeys; add_path(rel, path); } diff --git a/src/runtime_merge_append.c b/src/runtime_merge_append.c index ad6389336f..427ecd912a 100644 --- a/src/runtime_merge_append.c +++ b/src/runtime_merge_append.c @@ -15,6 +15,7 @@ #include "catalog/pg_collation.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" +#include "nodes/plannodes.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/planmain.h" @@ -601,6 +602,27 @@ find_ec_member_for_tle(EquivalenceClass *ec, return NULL; } +/* + * make_result + * Build a Result plan node + */ +static Result * +make_result(List *tlist, + Node *resconstantqual, + Plan *subplan) +{ + Result *node = makeNode(Result); + Plan *plan = &node->plan; + + plan->targetlist = tlist; + plan->qual = NIL; + plan->lefttree = subplan; + plan->righttree = NULL; + node->resconstantqual = resconstantqual; + + return node; +} + static Plan * prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, Relids relids, @@ -727,6 +749,7 @@ prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, EquivalenceMember *em = (EquivalenceMember *) lfirst(j); List *exprvars; ListCell *k; + int varflag; /* * We shouldn't be trying to sort by an equivalence class that @@ -745,9 +768,8 @@ prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, continue; sortexpr = em->em_expr; - exprvars = pull_var_clause((Node *) sortexpr, - PVC_INCLUDE_AGGREGATES, - PVC_INCLUDE_PLACEHOLDERS); + varflag = PVC_INCLUDE_AGGREGATES | PVC_INCLUDE_PLACEHOLDERS; + exprvars = pull_var_clause((Node *) sortexpr, varflag); foreach(k, exprvars) { if (!tlist_member_ignore_relabel(lfirst(k), tlist)) @@ -771,8 +793,7 @@ prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, { /* copy needed so we don't modify input's tlist below */ tlist = copyObject(tlist); - lefttree = (Plan *) make_result(root, tlist, NULL, - lefttree); + lefttree = (Plan *) make_result(tlist, NULL, lefttree); } /* Don't bother testing is_projection_capable_plan again */ From 8e24c017839d5178a1309df4139aeadaf47d6623 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 23 Sep 2016 17:48:10 +0300 Subject: [PATCH 145/184] tiny hack: bind custom data to EState using MemoryContext callbacks (estate_mod_data), refactoring, check that partition exists when INSERTing --- expected/pg_pathman.out | 2 +- src/copy_stmt_hooking.c | 55 ++------ src/partition_filter.c | 292 ++++++++++++++++++++++++++++------------ src/partition_filter.h | 17 ++- 4 files changed, 227 insertions(+), 139 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index c1e79e4a96..4a613ca346 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1267,7 +1267,7 @@ SELECT pathman.disable_auto('test.range_rel'); (1 row) INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); -ERROR: There is no suitable partition for key 'Mon Jun 01 00:00:00 2015' +ERROR: no suitable partition for key 'Mon Jun 01 00:00:00 2015' SELECT pathman.enable_auto('test.range_rel'); enable_auto ------------- diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c index ab8db87da7..1e99b13f91 100644 --- a/src/copy_stmt_hooking.c +++ b/src/copy_stmt_hooking.c @@ -38,9 +38,6 @@ static uint64 PathmanCopyFrom(CopyState cstate, Relation parent_rel, List *range_table, bool old_protocol); -static ResultRelInfoHolder *select_partition_for_copy(const PartRelationInfo *prel, - ResultPartsStorage *parts_storage, - Datum value, EState *estate); /* @@ -376,7 +373,7 @@ PathmanCopyFrom(CopyState cstate, Relation parent_rel, /* Initialize ResultPartsStorage */ init_result_parts_storage(&parts_storage, estate, false, ResultPartsStorageStandard, - check_acl_for_partition, NULL); + NULL, NULL); parts_storage.saved_rel_info = parent_result_rel; /* Set up a tuple slot too */ @@ -418,16 +415,19 @@ PathmanCopyFrom(CopyState cstate, Relation parent_rel, /* Fetch PartRelationInfo for parent relation */ prel = get_pathman_relation_info(RelationGetRelid(parent_rel)); - /* Switch into its memory context */ + /* Switch into per tuple memory context */ MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); if (!NextCopyFrom(cstate, econtext, values, nulls, &tuple_oid)) break; + if (nulls[prel->attnum - 1]) + elog(ERROR, ERR_PART_ATTR_NULL); + /* Search for a matching partition */ - rri_holder_child = select_partition_for_copy(prel, &parts_storage, - values[prel->attnum - 1], - estate); + rri_holder_child = select_partition_for_insert(prel, &parts_storage, + values[prel->attnum - 1], + estate, false); child_result_rel = rri_holder_child->result_rel_info; estate->es_result_relation_info = child_result_rel; @@ -477,8 +477,7 @@ PathmanCopyFrom(CopyState cstate, Relation parent_rel, if (child_result_rel->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false, NULL, - NIL); + estate, false, NULL, NIL); /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, child_result_rel, tuple, @@ -522,39 +521,3 @@ PathmanCopyFrom(CopyState cstate, Relation parent_rel, return processed; } - -/* - * Smart wrapper for scan_result_parts_storage(). - */ -static ResultRelInfoHolder * -select_partition_for_copy(const PartRelationInfo *prel, - ResultPartsStorage *parts_storage, - Datum value, EState *estate) -{ - ExprContext *econtext; - ResultRelInfoHolder *rri_holder; - Oid selected_partid = InvalidOid; - Oid *parts; - int nparts; - - econtext = GetPerTupleExprContext(estate); - - /* Search for matching partitions using partitioned column */ - parts = find_partitions_for_value(value, prel, econtext, &nparts); - - if (nparts > 1) - elog(ERROR, "PATHMAN COPY selected more than one partition"); - else if (nparts == 0) - elog(ERROR, - "There is no suitable partition for key '%s'", - datum_to_cstring(value, prel->atttype)); - else - selected_partid = parts[0]; - - /* Replace parent table with a suitable partition */ - MemoryContextSwitchTo(estate->es_query_cxt); - rri_holder = scan_result_parts_storage(selected_partid, parts_storage); - MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - - return rri_holder; -} diff --git a/src/partition_filter.c b/src/partition_filter.c index a000444898..fff243f2d3 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -17,20 +17,36 @@ #include "utils/memutils.h" #include "nodes/nodeFuncs.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #define ALLOC_EXP 2 +/* + * We use this struct as an argument for fake + * MemoryContextCallback pf_memcxt_callback() + * in order to attach some additional info to + * EState (estate->es_query_cxt is involved). + */ +typedef struct +{ + int estate_alloc_result_rels; /* number of allocated result rels */ + bool estate_not_modified; /* did we modify EState somehow? */ +} estate_mod_data; + + bool pg_pathman_enable_partition_filter = true; CustomScanMethods partition_filter_plan_methods; CustomExecMethods partition_filter_exec_methods; +static estate_mod_data * fetch_estate_mod_data(EState *estate); static void partition_filter_visitor(Plan *plan, void *context); static List * pfilter_build_tlist(List *tlist); -static int append_rri_to_estate(EState *estate, ResultRelInfo *rri, int cur_allocated); +static Index append_rte_to_estate(EState *estate, RangeTblEntry *rte); +static int append_rri_to_estate(EState *estate, ResultRelInfo *rri); void @@ -71,31 +87,6 @@ add_partition_filters(List *rtable, Plan *plan) plan_tree_walker(plan, partition_filter_visitor, rtable); } -/* - * This callback adds a new RangeTblEntry once - * partition is opened for an INSERT. - */ -void -check_acl_for_partition(EState *estate, - ResultRelInfoHolder *rri_holder, - void *arg) -{ - RangeTblEntry *rte; - Relation part_rel = rri_holder->result_rel_info->ri_RelationDesc; - - rte = makeNode(RangeTblEntry); - - rte->rtekind = RTE_RELATION; - rte->relid = rri_holder->partid; - rte->relkind = part_rel->rd_rel->relkind; - rte->requiredPerms = ACL_INSERT; - - /* FIXME: Check permissions for partition */ - ExecCheckRTPerms(list_make1(rte), true); - - /* TODO: append RTE to estate->es_range_table */ -} - /* * Initialize ResultPartsStorage (hash table etc). @@ -123,13 +114,15 @@ init_result_parts_storage(ResultPartsStorage *parts_storage, result_rels_table_config, HASH_ELEM | HASH_BLOBS); parts_storage->estate = estate; - parts_storage->es_alloc_result_rels = estate->es_num_result_relations; - parts_storage->speculative_inserts = speculative_inserts; parts_storage->saved_rel_info = NULL; parts_storage->on_new_rri_holder_callback = on_new_rri_holder_cb; parts_storage->callback_arg = on_new_rri_holder_cb_arg; + /* Currenly ResultPartsStorage is used only for INSERTs */ + parts_storage->command_type = CMD_INSERT; + parts_storage->speculative_inserts = speculative_inserts; + /* Partitions must remain locked till transaction's end */ parts_storage->head_open_lock_mode = RowExclusiveLock; parts_storage->heap_close_lock_mode = NoLock; @@ -180,18 +173,57 @@ scan_result_parts_storage(Oid partid, ResultPartsStorage *parts_storage) /* If not found, create & cache new ResultRelInfo */ if (!found) { - ResultRelInfo *part_result_rel_info = makeNode(ResultRelInfo); + Relation child_rel; + RangeTblEntry *child_rte, + *parent_rte; + Index child_rte_idx; + ResultRelInfo *part_result_rel_info; + + LockRelationOid(partid, parts_storage->head_open_lock_mode); + if(!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(partid))) + { + UnlockRelationOid(partid, parts_storage->head_open_lock_mode); + return NULL; + } + + parent_rte = rt_fetch(parts_storage->saved_rel_info->ri_RangeTableIndex, + parts_storage->estate->es_range_table); + + /* Open relation and check if it is a valid target */ + child_rel = heap_open(partid, NoLock); + CheckValidResultRel(child_rel, parts_storage->command_type); + + /* Create RangeTblEntry for partition */ + child_rte = makeNode(RangeTblEntry); + + child_rte->rtekind = RTE_RELATION; + child_rte->relid = partid; + child_rte->relkind = child_rel->rd_rel->relkind; + child_rte->eref = parent_rte->eref; + child_rte->requiredPerms = parent_rte->requiredPerms; + child_rte->checkAsUser = parent_rte->checkAsUser; + child_rte->insertedCols = parent_rte->insertedCols; + + /* Check permissions for partition */ + ExecCheckRTPerms(list_make1(child_rte), true); + + /* Append RangeTblEntry to estate->es_range_table */ + child_rte_idx = append_rte_to_estate(parts_storage->estate, child_rte); + + /* Create ResultRelInfo for partition */ + part_result_rel_info = makeNode(ResultRelInfo); /* Check that 'saved_rel_info' is set */ if (!parts_storage->saved_rel_info) elog(ERROR, "ResultPartsStorage contains no saved_rel_info"); InitResultRelInfo(part_result_rel_info, - heap_open(partid, parts_storage->head_open_lock_mode), - parts_storage->saved_rel_info->ri_RangeTableIndex, - 0); /* TODO: select suitable options */ + child_rel, + child_rte_idx, + parts_storage->estate->es_instrument); - ExecOpenIndices(part_result_rel_info, parts_storage->speculative_inserts); + if (parts_storage->command_type != CMD_DELETE) + ExecOpenIndices(part_result_rel_info, parts_storage->speculative_inserts); /* Copy necessary fields from saved ResultRelInfo */ CopyToResultRelInfo(ri_WithCheckOptions); @@ -208,11 +240,8 @@ scan_result_parts_storage(Oid partid, ResultPartsStorage *parts_storage) rri_holder->partid = partid; rri_holder->result_rel_info = part_result_rel_info; - /* Add ResultRelInfo to storage->es_alloc_result_rels */ - parts_storage->es_alloc_result_rels = - append_rri_to_estate(parts_storage->estate, - part_result_rel_info, - parts_storage->es_alloc_result_rels); + /* Append ResultRelInfo to storage->es_alloc_result_rels */ + append_rri_to_estate(parts_storage->estate, part_result_rel_info); /* Call on_new_rri_holder_callback() if needed */ if (parts_storage->on_new_rri_holder_callback) @@ -224,7 +253,6 @@ scan_result_parts_storage(Oid partid, ResultPartsStorage *parts_storage) return rri_holder; } - /* * Find matching partitions for 'value' using PartRelationInfo. */ @@ -323,8 +351,7 @@ partition_filter_begin(CustomScanState *node, EState *estate, int eflags) /* Init ResultRelInfo cache */ init_result_parts_storage(&state->result_parts, estate, state->on_conflict_action != ONCONFLICT_NONE, - ResultPartsStorageStandard, - check_acl_for_partition, NULL); + ResultPartsStorageStandard, NULL, NULL); state->warning_triggered = false; } @@ -347,15 +374,9 @@ partition_filter_exec(CustomScanState *node) if (!TupIsNull(slot)) { - const PartRelationInfo *prel; - MemoryContext old_cxt; - - ResultRelInfoHolder *result_part_holder; - Oid selected_partid; - int nparts; - Oid *parts; - + const PartRelationInfo *prel; + ResultRelInfoHolder *rri_holder; bool isnull; Datum value; @@ -376,49 +397,21 @@ partition_filter_exec(CustomScanState *node) attrs[prel->attnum - 1]->atttypid == prel->atttype); value = slot_getattr(slot, prel->attnum, &isnull); if (isnull) - elog(ERROR, "partitioned column's value should not be NULL"); + elog(ERROR, ERR_PART_ATTR_NULL); /* Switch to per-tuple context */ old_cxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - /* Search for matching partitions */ - parts = find_partitions_for_value(value, prel, econtext, &nparts); - - if (nparts > 1) - elog(ERROR, "PartitionFilter selected more than one partition"); - else if (nparts == 0) - { - /* - * If auto partition propagation is enabled then try to create - * new partitions for the key - */ - if (prel->auto_partition && IsAutoPartitionEnabled()) - { - selected_partid = create_partitions(state->partitioned_table, - value, prel->atttype); - - /* get_pathman_relation_info() will refresh this entry */ - invalidate_pathman_relation_info(state->partitioned_table, NULL); - } - else - elog(ERROR, - "There is no suitable partition for key '%s'", - datum_to_cstring(value, prel->atttype)); - } - else - selected_partid = parts[0]; + /* Search for a matching partition */ + rri_holder = select_partition_for_insert(prel, + &state->result_parts, + value, estate, true); + estate->es_result_relation_info = rri_holder->result_rel_info; /* Switch back and clean up per-tuple context */ MemoryContextSwitchTo(old_cxt); ResetExprContext(econtext); - /* Replace parent table with a suitable partition */ - old_cxt = MemoryContextSwitchTo(estate->es_query_cxt); - result_part_holder = scan_result_parts_storage(selected_partid, - &state->result_parts); - estate->es_result_relation_info = result_part_holder->result_rel_info; - MemoryContextSwitchTo(old_cxt); - return slot; } @@ -450,11 +443,132 @@ partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *e /* Nothing to do here now */ } +/* + * Smart wrapper for scan_result_parts_storage(). + */ +ResultRelInfoHolder * +select_partition_for_insert(const PartRelationInfo *prel, + ResultPartsStorage *parts_storage, + Datum value, EState *estate, + bool spawn_partitions) +{ + MemoryContext old_cxt; + ExprContext *econtext; + ResultRelInfoHolder *rri_holder; + Oid selected_partid = InvalidOid; + Oid *parts; + int nparts; + + econtext = GetPerTupleExprContext(estate); + + /* Search for matching partitions */ + parts = find_partitions_for_value(value, prel, econtext, &nparts); + + if (nparts > 1) + elog(ERROR, ERR_PART_ATTR_MULTIPLE); + else if (nparts == 0) + { + /* + * If auto partition propagation is enabled then try to create + * new partitions for the key + */ + if (prel->auto_partition && IsAutoPartitionEnabled() && spawn_partitions) + { + selected_partid = create_partitions(PrelParentRelid(prel), + value, prel->atttype); + + /* get_pathman_relation_info() will refresh this entry */ + invalidate_pathman_relation_info(PrelParentRelid(prel), NULL); + } + else + elog(ERROR, ERR_PART_ATTR_NO_PART, + datum_to_cstring(value, prel->atttype)); + } + else selected_partid = parts[0]; + + /* Replace parent table with a suitable partition */ + old_cxt = MemoryContextSwitchTo(estate->es_query_cxt); + rri_holder = scan_result_parts_storage(selected_partid, parts_storage); + MemoryContextSwitchTo(old_cxt); + + /* Could not find suitable partition */ + if (rri_holder == NULL) + elog(ERROR, ERR_PART_ATTR_NO_PART, + datum_to_cstring(value, prel->atttype)); + + return rri_holder; +} + +/* + * Used by fetch_estate_mod_data() to find estate_mod_data. + */ +static void +pf_memcxt_callback(void *arg) { elog(DEBUG1, "EState is destroyed"); } + +/* + * Fetch (or create) a estate_mod_data structure we've hidden inside es_query_cxt. + */ +static estate_mod_data * +fetch_estate_mod_data(EState *estate) +{ + MemoryContext estate_mcxt = estate->es_query_cxt; + estate_mod_data *emd_struct; + MemoryContextCallback *cb = estate_mcxt->reset_cbs; + + /* Go through callback list */ + while (cb != NULL) + { + /* This is the dummy callback we're looking for! */ + if (cb->func == pf_memcxt_callback) + return (estate_mod_data *) cb->arg; + + cb = estate_mcxt->reset_cbs->next; + } + + /* Have to create a new one */ + emd_struct = MemoryContextAlloc(estate_mcxt, sizeof(estate_mod_data)); + emd_struct->estate_not_modified = true; + emd_struct->estate_alloc_result_rels = estate->es_num_result_relations; + + cb = MemoryContextAlloc(estate_mcxt, sizeof(MemoryContextCallback)); + cb->func = pf_memcxt_callback; + cb->arg = emd_struct; + + MemoryContextRegisterResetCallback(estate_mcxt, cb); + + return emd_struct; +} + +/* + * Append RangeTblEntry 'rte' to estate->es_range_table. + */ +static Index +append_rte_to_estate(EState *estate, RangeTblEntry *rte) +{ + estate_mod_data *emd_struct = fetch_estate_mod_data(estate); + + /* Copy estate->es_range_table if it's first time expansion */ + if (emd_struct->estate_not_modified) + estate->es_range_table = list_copy(estate->es_range_table); + + estate->es_range_table = lappend(estate->es_range_table, rte); + + /* Update estate_mod_data */ + emd_struct->estate_not_modified = false; + + return list_length(estate->es_range_table); +} + +/* + * Append ResultRelInfo 'rri' to estate->es_result_relations. + */ static int -append_rri_to_estate(EState *estate, ResultRelInfo *rri, int cur_allocated) +append_rri_to_estate(EState *estate, ResultRelInfo *rri) { - int result_rels_allocated = cur_allocated; + estate_mod_data *emd_struct = fetch_estate_mod_data(estate); + int result_rels_allocated = emd_struct->estate_alloc_result_rels; + /* Reallocate estate->es_result_relations if needed */ if (result_rels_allocated <= estate->es_num_result_relations) { ResultRelInfo *rri_array = estate->es_result_relations; @@ -468,9 +582,13 @@ append_rri_to_estate(EState *estate, ResultRelInfo *rri, int cur_allocated) } /* Append ResultRelInfo to 'es_result_relations' array */ - estate->es_result_relations[estate->es_num_result_relations++] = *rri; + estate->es_result_relations[estate->es_num_result_relations] = *rri; + + /* Update estate_mod_data */ + emd_struct->estate_alloc_result_rels = result_rels_allocated; + emd_struct->estate_not_modified = false; - return result_rels_allocated; + return estate->es_num_result_relations++; } /* diff --git a/src/partition_filter.h b/src/partition_filter.h index 370e587d89..70d74ee807 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -19,6 +19,11 @@ #include "optimizer/planner.h" +#define ERR_PART_ATTR_NULL "partitioned column's value should not be NULL" +#define ERR_PART_ATTR_NO_PART "no suitable partition for key '%s'" +#define ERR_PART_ATTR_MULTIPLE "PartitionFilter selected more than one partition" + + /* * Single element of 'result_rels_table'. */ @@ -49,9 +54,9 @@ typedef struct on_new_rri_holder on_new_rri_holder_callback; void *callback_arg; - EState *estate; - int es_alloc_result_rels; /* number of allocated result rels */ + EState *estate; /* pointer to executor's state */ + CmdType command_type; /* currenly we only allow INSERT */ LOCKMODE head_open_lock_mode; LOCKMODE heap_close_lock_mode; } ResultPartsStorage; @@ -84,9 +89,6 @@ extern CustomExecMethods partition_filter_exec_methods; void init_partition_filter_static_data(void); void add_partition_filters(List *rtable, Plan *plan); -void check_acl_for_partition(EState *estate, - ResultRelInfoHolder *rri_holder, - void *arg); /* ResultPartsStorage init\fini\scan function */ void init_result_parts_storage(ResultPartsStorage *parts_storage, @@ -124,4 +126,9 @@ void partition_filter_explain(CustomScanState *node, List *ancestors, ExplainState *es); +ResultRelInfoHolder * select_partition_for_insert(const PartRelationInfo *prel, + ResultPartsStorage *parts_storage, + Datum value, EState *estate, + bool spawn_partitions); + #endif From 5aab5f1b0fdf9c3d1b4515a605e617047f4ddb68 Mon Sep 17 00:00:00 2001 From: Maksim Milyutin Date: Fri, 23 Sep 2016 18:05:56 +0300 Subject: [PATCH 146/184] Add backward compatibility with version 9.5 --- Makefile | 3 +- src/hooks.c | 20 +++--- src/pg_compat.c | 121 +++++++++++++++++++++++++++++++++++++ src/pg_compat.h | 82 +++++++++++++++++++++++++ src/pg_pathman.c | 74 ++++------------------- src/runtime_merge_append.c | 32 +++------- 6 files changed, 237 insertions(+), 95 deletions(-) create mode 100644 src/pg_compat.c create mode 100644 src/pg_compat.h diff --git a/Makefile b/Makefile index 4224a99b2c..f9c07e83b6 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,8 @@ MODULE_big = pg_pathman OBJS = src/init.o src/relation_info.o src/utils.o src/partition_filter.o src/runtimeappend.o \ src/runtime_merge_append.o src/pg_pathman.o src/dsm_array.o src/rangeset.o src/pl_funcs.o \ - src/pathman_workers.o src/hooks.o src/nodes_common.o src/xact_handling.o $(WIN32RES) + src/pathman_workers.o src/hooks.o src/nodes_common.o src/xact_handling.o src/pg_compat.o \ + $(WIN32RES) EXTENSION = pg_pathman EXTVERSION = 1.0 diff --git a/src/hooks.c b/src/hooks.c index 8c3afe39f0..cd844ed656 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -8,6 +8,8 @@ * ------------------------------------------------------------------------ */ +#include "pg_compat.h" + #include "hooks.h" #include "init.h" #include "partition_filter.h" @@ -167,12 +169,13 @@ pathman_join_pathlist_hook(PlannerInfo *root, * Currently we use get_parameterized_joinrel_size() since * it works just fine, but this might change some day. */ - nest_path->path.rows = get_parameterized_joinrel_size(root, - joinrel, - outer, - inner, - extra->sjinfo, - filtered_joinclauses); + nest_path->path.rows = get_parameterized_joinrel_size_compat( + root, + joinrel, + outer, + inner, + extra->sjinfo, + filtered_joinclauses); /* Finally we can add the new NestLoop path */ add_path(joinrel, (Path *) nest_path); @@ -313,7 +316,8 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb IndexRange irange = lfirst_irange(lc); for (i = irange.ir_lower; i <= irange.ir_upper; i++) - append_child_relation(root, rel, rti, rte, i, children[i], wrappers); + append_child_relation(root, rel, rti, rte, i, children[i], + wrappers); } /* Clear old path list */ @@ -321,7 +325,7 @@ pathman_rel_pathlist_hook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb rel->pathlist = NIL; set_append_rel_pathlist(root, rel, rti, rte, pathkeyAsc, pathkeyDesc); - set_append_rel_size(root, rel, rti, rte); + set_append_rel_size_compat(root, rel, rti, rte); /* No need to go further (both nodes are disabled), return */ if (!(pg_pathman_enable_runtimeappend || diff --git a/src/pg_compat.c b/src/pg_compat.c new file mode 100644 index 0000000000..f79a8107b7 --- /dev/null +++ b/src/pg_compat.c @@ -0,0 +1,121 @@ +#include "pg_compat.h" + +#include "optimizer/pathnode.h" +#include "port.h" +#include "utils.h" + +#include + +/* +double +get_parameterized_joinrel_size_compat(PlannerInfo *root, RelOptInfo *rel, + Path *outer_path, Path *inner_path, + SpecialJoinInfo *sjinfo, + List *restrict_clauses) +{ +#if PG_VERSION_NUM >= 90600 + return get_parameterized_joinrel_size(root, rel, outer_path, inner_path, + sjinfo, restrict_clauses); +#else + return get_parameterized_joinrel_size(root, rel, outer_path->rows, + inner_path->rows, sjinfo, +#endif + restrict_clauses); +} +*/ + +void +set_append_rel_size_compat(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + double parent_rows = 0; + double parent_size = 0; + ListCell *l; + + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + Index childRTindex, + parentRTindex = rti; + RelOptInfo *childrel; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + childRTindex = appinfo->child_relid; + + childrel = find_base_rel(root, childRTindex); + Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); + + /* + * Accumulate size information from each live child. + */ + Assert(childrel->rows > 0); + + parent_rows += childrel->rows; +#if PG_VERSION_NUM >= 90600 + parent_size += childrel->reltarget->width * childrel->rows; +#else + parent_size += childrel->width * childrel->rows; +#endif + } + + rel->rows = parent_rows; +#if PG_VERSION_NUM >= 90600 + rel->reltarget->width = rint(parent_size / parent_rows); +#else + rel->width = rint(parent_size / parent_rows); +#endif + rel->tuples = parent_rows; +} + +extern +void copy_targetlist_compat(RelOptInfo *dest, RelOptInfo *rel) +{ + ListCell *lc; + +#if PG_VERSION_NUM >= 90600 + dest->reltarget->exprs = NIL; + foreach(lc, rel->reltarget->exprs) +#else + dest->reltargetlist = NIL; + foreach(lc, rel->reltargetlist) +#endif + { + Node *new_target; + Node *node; + + node = (Node *) lfirst(lc); + new_target = copyObject(node); + change_varnos(new_target, rel->relid, dest->relid); +#if PG_VERSION_NUM >= 90600 + dest->reltarget->exprs = lappend(dest->reltarget->exprs, new_target); +#else + dest->reltargetlist = lappend(dest->reltargetlist, new_target); +#endif + } +} + +#if PG_VERSION_NUM >= 90600 +/* + * make_result + * Build a Result plan node + */ +Result * +make_result(List *tlist, + Node *resconstantqual, + Plan *subplan) +{ + Result *node = makeNode(Result); + Plan *plan = &node->plan; + + plan->targetlist = tlist; + plan->qual = NIL; + plan->lefttree = subplan; + plan->righttree = NULL; + node->resconstantqual = resconstantqual; + + return node; +} +#endif diff --git a/src/pg_compat.h b/src/pg_compat.h new file mode 100644 index 0000000000..718853d7fd --- /dev/null +++ b/src/pg_compat.h @@ -0,0 +1,82 @@ +#ifndef PG_COMPAT_H +#define PG_COMPAT_H + +#include "postgres.h" + +#include "nodes/relation.h" +#include "nodes/pg_list.h" +#include "optimizer/cost.h" +#include "optimizer/paths.h" + +/* +extern double get_parameterized_joinrel_size_compat(PlannerInfo *root, + RelOptInfo *rel, + Path *outer_path, + Path *inner_path, + SpecialJoinInfo *sjinfo, + List *restrict_clauses); +*/ +extern void set_append_rel_size_compat(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +extern void copy_targetlist_compat(RelOptInfo *dest, RelOptInfo *rel); + +#if PG_VERSION_NUM >= 90600 + +#define get_parameterized_joinrel_size_compat(root, \ + rel, \ + outer_path, \ + inner_path, \ + sjinfo, \ + restrict_clauses) \ + get_parameterized_joinrel_size(root, \ + rel, \ + outer_path, \ + inner_path, \ + sjinfo, \ + restrict_clauses) + +#define check_index_predicates_compat(rool, rel) \ + check_index_predicates(root, rel) + +#define create_append_path_compat(rel, subpaths, required_outer, \ + parallel_workers) \ + create_append_path(rel, subpaths, required_outer, parallel_workers) + +#define pull_var_clause_compat(node, aggbehavior, phbehavior) \ + pull_var_clause(node, aggbehavior | phbehavior) + +extern Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan); +#define make_result_compat(root, tlist, resconstantqual, subplan) \ + make_result(tlist, resconstantqual, subplan) + +#else + +#define get_parameterized_joinrel_size_compat(root, \ + rel, \ + outer_path, \ + inner_path, \ + sjinfo, \ + restrict_clauses) \ + get_parameterized_joinrel_size(root, \ + rel, \ + (outer_path)->rows, \ + (inner_path)->rows, \ + sjinfo, \ + restrict_clauses) + +#define check_index_predicates_compat(rool, rel) \ + check_partial_indexes(root, rel) + +#define create_append_path_compat(rel, subpaths, required_outer, \ + parallel_workers) \ + create_append_path(rel, subpaths, required_outer) + +#define pull_var_clause_compat(node, aggbehavior, phbehavior) \ + pull_var_clause(node, aggbehavior, phbehavior) + +#define make_result_compat(root, tlist, resconstantqual, subplan) \ + make_result(root, tlist, resconstantqual, subplan) + +#endif + +#endif diff --git a/src/pg_pathman.c b/src/pg_pathman.c index d05cc3a06a..5ae16c16c5 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -9,6 +9,8 @@ * ------------------------------------------------------------------------ */ +#include "pg_compat.h" + #include "pathman.h" #include "init.h" #include "hooks.h" @@ -43,7 +45,6 @@ #include "utils/snapmgr.h" #include "utils/typcache.h" - PG_MODULE_MAGIC; @@ -346,44 +347,6 @@ handle_modification_query(Query *parse) return; } -void -set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) -{ - double parent_rows = 0; - double parent_size = 0; - ListCell *l; - - foreach(l, root->append_rel_list) - { - AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); - Index childRTindex, - parentRTindex = rti; - RelOptInfo *childrel; - - /* append_rel_list contains all append rels; ignore others */ - if (appinfo->parent_relid != parentRTindex) - continue; - - childRTindex = appinfo->child_relid; - - childrel = find_base_rel(root, childRTindex); - Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); - - /* - * Accumulate size information from each live child. - */ - Assert(childrel->rows > 0); - - parent_rows += childrel->rows; - parent_size += childrel->reltarget->width * childrel->rows; - } - - rel->rows = parent_rows; - rel->reltarget->width = rint(parent_size / parent_rows); - rel->tuples = parent_rows; -} - /* * Creates child relation and adds it to root. * Returns child index in simple_rel_array @@ -396,7 +359,6 @@ append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, RelOptInfo *childrel; Index childRTindex; AppendRelInfo *appinfo; - Node *node; ListCell *lc, *lc2; Relation newrelation; @@ -423,20 +385,7 @@ append_child_relation(PlannerInfo *root, RelOptInfo *rel, Index rti, childrel = build_simple_rel(root, childRTindex, RELOPT_OTHER_MEMBER_REL); /* Copy targetlist */ - childrel->reltarget->exprs = NIL; - childrel->reltarget->sortgrouprefs = (Index *) palloc( - list_length(rel->reltarget->exprs) * sizeof(Index)); - foreach(lc, rel->reltarget->exprs) - { - Node *new_target; - - node = (Node *) lfirst(lc); - new_target = copyObject(node); - change_varnos(new_target, rel->relid, childrel->relid); - childrel->reltarget->exprs = lappend(childrel->reltarget->exprs, - new_target); - /* childrel->reltarget->sortgrouprefs[i++] = */ - } + copy_targetlist_compat(childrel, rel); /* Copy attr_needed & attr_widths */ childrel->attr_needed = (Relids *) @@ -1644,7 +1593,7 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) * Test any partial indexes of rel for applicability. We must do this * first since partial unique indexes can affect size estimates. */ - check_index_predicates(root, rel); + check_index_predicates_compat(root, rel); /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); @@ -1858,7 +1807,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, * if we have zero or one live subpath due to constraint exclusion.) */ if (subpaths_valid) - add_path(rel, (Path *) create_append_path(rel, subpaths, NULL, 0)); + add_path(rel, + (Path *) create_append_path_compat(rel, subpaths, NULL, 0)); /* * Also build unparameterized MergeAppend paths based on the collected @@ -1909,7 +1859,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, if (subpaths_valid) add_path(rel, (Path *) - create_append_path(rel, subpaths, required_outer, 0)); + create_append_path_compat(rel, subpaths, required_outer, 0)); } } @@ -2087,13 +2037,15 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, { Path *path; - path = (Path *) create_append_path(rel, startup_subpaths, NULL, 0); + path = (Path *) create_append_path_compat(rel, startup_subpaths, + NULL, 0); path->pathkeys = pathkeys; add_path(rel, path); if (startup_neq_total) { - path = (Path *) create_append_path(rel, total_subpaths, NULL, 0); + path = (Path *) create_append_path_compat(rel, total_subpaths, + NULL, 0); path->pathkeys = pathkeys; add_path(rel, path); } @@ -2106,14 +2058,14 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, */ Path *path; - path = (Path *) create_append_path(rel, + path = (Path *) create_append_path_compat(rel, list_reverse(startup_subpaths), NULL, 0); path->pathkeys = pathkeys; add_path(rel, path); if (startup_neq_total) { - path = (Path *) create_append_path(rel, + path = (Path *) create_append_path_compat(rel, list_reverse(total_subpaths), NULL, 0); path->pathkeys = pathkeys; add_path(rel, path); diff --git a/src/runtime_merge_append.c b/src/runtime_merge_append.c index 427ecd912a..0aec21772e 100644 --- a/src/runtime_merge_append.c +++ b/src/runtime_merge_append.c @@ -8,6 +8,8 @@ * ------------------------------------------------------------------------ */ +#include "pg_compat.h" + #include "runtime_merge_append.h" #include "pathman.h" @@ -602,27 +604,6 @@ find_ec_member_for_tle(EquivalenceClass *ec, return NULL; } -/* - * make_result - * Build a Result plan node - */ -static Result * -make_result(List *tlist, - Node *resconstantqual, - Plan *subplan) -{ - Result *node = makeNode(Result); - Plan *plan = &node->plan; - - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = subplan; - plan->righttree = NULL; - node->resconstantqual = resconstantqual; - - return node; -} - static Plan * prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, Relids relids, @@ -749,7 +730,6 @@ prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, EquivalenceMember *em = (EquivalenceMember *) lfirst(j); List *exprvars; ListCell *k; - int varflag; /* * We shouldn't be trying to sort by an equivalence class that @@ -768,8 +748,9 @@ prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, continue; sortexpr = em->em_expr; - varflag = PVC_INCLUDE_AGGREGATES | PVC_INCLUDE_PLACEHOLDERS; - exprvars = pull_var_clause((Node *) sortexpr, varflag); + exprvars = pull_var_clause_compat((Node *) sortexpr, + PVC_INCLUDE_AGGREGATES, + PVC_INCLUDE_PLACEHOLDERS); foreach(k, exprvars) { if (!tlist_member_ignore_relabel(lfirst(k), tlist)) @@ -793,7 +774,8 @@ prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, { /* copy needed so we don't modify input's tlist below */ tlist = copyObject(tlist); - lefttree = (Plan *) make_result(tlist, NULL, lefttree); + lefttree = (Plan *) make_result_compat(root, tlist, NULL, + lefttree); } /* Don't bother testing is_projection_capable_plan again */ From d7fd9b9de892c33bba0ae9404e1f4d228d20e2dc Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 23 Sep 2016 18:13:12 +0300 Subject: [PATCH 147/184] don't allow freeze for COPY FROM on partitioned table --- src/copy_stmt_hooking.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c index 1e99b13f91..1b44ecd56a 100644 --- a/src/copy_stmt_hooking.c +++ b/src/copy_stmt_hooking.c @@ -73,7 +73,21 @@ is_pathman_related_copy(Node *parsetree) /* Check that relation is partitioned */ if (get_pathman_relation_info(partitioned_table)) { - elog(DEBUG1, "Overriding default behavior for COPY (%u)", partitioned_table); + ListCell *lc; + + /* Analyze options list */ + foreach (lc, copy_stmt->options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + Assert(IsA(defel, DefElem)); + + /* We do not support freeze */ + if (strcmp(defel->defname, "freeze") == 0) + elog(ERROR, "freeze is not supported for partitioned tables"); + } + + elog(DEBUG1, "Overriding default behavior for COPY [%u]", partitioned_table); return true; } From e678c3e9e9b126d804b6097ede5c96e77240e711 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 23 Sep 2016 19:40:29 +0300 Subject: [PATCH 148/184] clean pg_compat.c & pg_compat.h, add PostgreSQL 9.6 to Travis CI --- .travis.yml | 2 ++ src/init.c | 5 +++- src/nodes_common.h | 3 +++ src/partition_filter.h | 4 +++ src/pg_compat.c | 27 ++++++++------------ src/pg_compat.h | 56 ++++++++++++++++++------------------------ 6 files changed, 47 insertions(+), 50 deletions(-) diff --git a/.travis.yml b/.travis.yml index 047a1c52d4..36b5bc04ab 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,8 @@ before_install: - sudo sh ./travis/apt.postgresql.org.sh env: + - PGVERSION=9.6 CHECK_CODE=true + - PGVERSION=9.6 CHECK_CODE=false - PGVERSION=9.5 CHECK_CODE=true - PGVERSION=9.5 CHECK_CODE=false diff --git a/src/init.c b/src/init.c index 17a23b44e5..d175b5b44c 100644 --- a/src/init.c +++ b/src/init.c @@ -22,7 +22,6 @@ #include "access/sysattr.h" #include "catalog/indexing.h" #include "catalog/pg_constraint.h" -#include "catalog/pg_constraint_fn.h" #include "catalog/pg_inherits.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_type.h" @@ -38,6 +37,10 @@ #include "utils/syscache.h" #include "utils/typcache.h" +#if PG_VERSION_NUM >= 90600 +#include "catalog/pg_constraint_fn.h" +#endif + /* Help user in case of emergency */ #define INIT_ERROR_HINT "pg_pathman will be disabled to allow you to resolve this issue" diff --git a/src/nodes_common.h b/src/nodes_common.h index 30693dd8e6..f0423a48e8 100644 --- a/src/nodes_common.h +++ b/src/nodes_common.h @@ -16,7 +16,10 @@ #include "postgres.h" #include "commands/explain.h" #include "optimizer/planner.h" + +#if PG_VERSION_NUM >= 90600 #include "nodes/extensible.h" +#endif /* diff --git a/src/partition_filter.h b/src/partition_filter.h index cbbc61ed5d..68de9f1b2d 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -17,7 +17,11 @@ #include "postgres.h" #include "commands/explain.h" #include "optimizer/planner.h" + +#if PG_VERSION_NUM >= 90600 #include "nodes/extensible.h" +#endif + typedef struct { diff --git a/src/pg_compat.c b/src/pg_compat.c index f79a8107b7..c0232417f9 100644 --- a/src/pg_compat.c +++ b/src/pg_compat.c @@ -1,3 +1,13 @@ +/* ------------------------------------------------------------------------ + * + * pg_compat.c + * Compatibility tools + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + #include "pg_compat.h" #include "optimizer/pathnode.h" @@ -6,23 +16,6 @@ #include -/* -double -get_parameterized_joinrel_size_compat(PlannerInfo *root, RelOptInfo *rel, - Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses) -{ -#if PG_VERSION_NUM >= 90600 - return get_parameterized_joinrel_size(root, rel, outer_path, inner_path, - sjinfo, restrict_clauses); -#else - return get_parameterized_joinrel_size(root, rel, outer_path->rows, - inner_path->rows, sjinfo, -#endif - restrict_clauses); -} -*/ void set_append_rel_size_compat(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/pg_compat.h b/src/pg_compat.h index 718853d7fd..7bef6778ee 100644 --- a/src/pg_compat.h +++ b/src/pg_compat.h @@ -1,3 +1,13 @@ +/* ------------------------------------------------------------------------ + * + * pg_compat.h + * Compatibility tools + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + #ifndef PG_COMPAT_H #define PG_COMPAT_H @@ -8,38 +18,24 @@ #include "optimizer/cost.h" #include "optimizer/paths.h" -/* -extern double get_parameterized_joinrel_size_compat(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -*/ + extern void set_append_rel_size_compat(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); extern void copy_targetlist_compat(RelOptInfo *dest, RelOptInfo *rel); #if PG_VERSION_NUM >= 90600 -#define get_parameterized_joinrel_size_compat(root, \ - rel, \ - outer_path, \ - inner_path, \ - sjinfo, \ +#define get_parameterized_joinrel_size_compat(root, rel, outer_path, \ + inner_path, sjinfo, \ restrict_clauses) \ - get_parameterized_joinrel_size(root, \ - rel, \ - outer_path, \ - inner_path, \ - sjinfo, \ + get_parameterized_joinrel_size(root, rel, outer_path, \ + inner_path, sjinfo, \ restrict_clauses) #define check_index_predicates_compat(rool, rel) \ check_index_predicates(root, rel) -#define create_append_path_compat(rel, subpaths, required_outer, \ - parallel_workers) \ +#define create_append_path_compat(rel, subpaths, required_outer, parallel_workers) \ create_append_path(rel, subpaths, required_outer, parallel_workers) #define pull_var_clause_compat(node, aggbehavior, phbehavior) \ @@ -49,26 +45,21 @@ extern Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan); #define make_result_compat(root, tlist, resconstantqual, subplan) \ make_result(tlist, resconstantqual, subplan) -#else +#else /* PG_VERSION_NUM >= 90500 */ -#define get_parameterized_joinrel_size_compat(root, \ - rel, \ +#define get_parameterized_joinrel_size_compat(root, rel, \ outer_path, \ inner_path, \ - sjinfo, \ - restrict_clauses) \ - get_parameterized_joinrel_size(root, \ - rel, \ + sjinfo, restrict_clauses) \ + get_parameterized_joinrel_size(root, rel, \ (outer_path)->rows, \ (inner_path)->rows, \ - sjinfo, \ - restrict_clauses) + sjinfo, restrict_clauses) #define check_index_predicates_compat(rool, rel) \ check_partial_indexes(root, rel) -#define create_append_path_compat(rel, subpaths, required_outer, \ - parallel_workers) \ +#define create_append_path_compat(rel, subpaths, required_outer, parallel_workers) \ create_append_path(rel, subpaths, required_outer) #define pull_var_clause_compat(node, aggbehavior, phbehavior) \ @@ -79,4 +70,5 @@ extern Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan); #endif -#endif + +#endif /* PG_COMPAT_H */ From 1e903c2f00f8b94ae34a8503f63e37b2963eb2cf Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 26 Sep 2016 11:50:35 -0400 Subject: [PATCH 149/184] Document has_type_privilege(). Evidently an oversight in commit 729205571. Back-patch to 9.2 where privileges for types were introduced. Report: <20160922173517.8214.88959@wrigleys.postgresql.org> --- doc/src/sgml/func.sgml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 1355ecf728..94f10881b2 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -15293,6 +15293,21 @@ SET search_path TO schema , schema, .. boolean does current user have privilege for tablespace + + has_type_privilege(user, + type, + privilege) + + boolean + does user have privilege for type + + + has_type_privilege(type, + privilege) + + boolean + does current user have privilege for type + pg_has_role(user, role, @@ -15351,6 +15366,9 @@ SET search_path TO schema , schema, .. has_tablespace_privilege + + has_type_privilege + pg_has_role @@ -15505,6 +15523,18 @@ SELECT has_function_privilege('joeuser', 'myfunc(int, text)', 'execute'); CREATE. + + has_type_privilege checks whether a user + can access a type in a particular way. + Its argument possibilities + are analogous to has_table_privilege. + When specifying a type by a text string rather than by OID, + the allowed input is the same as for the regtype data type + (see ). + The desired access privilege type must evaluate to + USAGE. + + pg_has_role checks whether a user can access a role in a particular way. From 62f118420c8259074407bf90120996ed58b39286 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 27 Sep 2016 02:54:56 +0300 Subject: [PATCH 150/184] implement basic FDW support for INSERTs, introduce GUC pg_pathman.insert_into_fdw, restrict FDW INSERTs for COPY FROM stmt, other fixes --- src/copy_stmt_hooking.c | 25 ++++- src/partition_filter.c | 200 ++++++++++++++++++++++++++++++++++++++-- src/partition_filter.h | 1 + src/pg_pathman.c | 8 +- src/utils.c | 4 +- 5 files changed, 222 insertions(+), 16 deletions(-) diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c index 1b44ecd56a..788442f10b 100644 --- a/src/copy_stmt_hooking.c +++ b/src/copy_stmt_hooking.c @@ -23,6 +23,7 @@ #include "commands/copy.h" #include "commands/trigger.h" #include "executor/executor.h" +#include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "utils/builtins.h" @@ -39,6 +40,10 @@ static uint64 PathmanCopyFrom(CopyState cstate, List *range_table, bool old_protocol); +static void prepare_rri_fdw_for_copy(EState *estate, + ResultRelInfoHolder *rri_holder, + void *arg); + /* * Is pg_pathman supposed to handle this COPY stmt? @@ -63,7 +68,7 @@ is_pathman_related_copy(Node *parsetree) if (!copy_stmt->relation) return false; - /* TODO: select appropriate lock for COPY */ + /* Get partition's Oid while locking it */ partitioned_table = RangeVarGetRelid(copy_stmt->relation, (copy_stmt->is_from ? RowExclusiveLock : @@ -387,7 +392,7 @@ PathmanCopyFrom(CopyState cstate, Relation parent_rel, /* Initialize ResultPartsStorage */ init_result_parts_storage(&parts_storage, estate, false, ResultPartsStorageStandard, - NULL, NULL); + prepare_rri_fdw_for_copy, NULL); parts_storage.saved_rel_info = parent_result_rel; /* Set up a tuple slot too */ @@ -535,3 +540,19 @@ PathmanCopyFrom(CopyState cstate, Relation parent_rel, return processed; } + +/* + * COPY FROM does not support FDWs, emit ERROR. + */ +static void +prepare_rri_fdw_for_copy(EState *estate, + ResultRelInfoHolder *rri_holder, + void *arg) +{ + ResultRelInfo *rri = rri_holder->result_rel_info; + FdwRoutine *fdw_routine = rri->ri_FdwRoutine; + + if (fdw_routine != NULL) + elog(ERROR, "cannot copy to foreign partition \"%s\"", + get_rel_name(RelationGetRelid(rri->ri_RelationDesc))); +} diff --git a/src/partition_filter.c b/src/partition_filter.c index fff243f2d3..51f09923e0 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -8,14 +8,16 @@ * ------------------------------------------------------------------------ */ -#include "partition_filter.h" +#include "init.h" #include "nodes_common.h" +#include "partition_filter.h" #include "utils.h" -#include "init.h" +#include "foreign/fdwapi.h" +#include "foreign/foreign.h" +#include "nodes/nodeFuncs.h" #include "utils/guc.h" #include "utils/memutils.h" -#include "nodes/nodeFuncs.h" #include "utils/lsyscache.h" #include "utils/syscache.h" @@ -35,8 +37,26 @@ typedef struct bool estate_not_modified; /* did we modify EState somehow? */ } estate_mod_data; +/* + * Allow INSERTs into any FDW \ postgres_fdw \ no FDWs at all. + */ +typedef enum +{ + PF_FDW_INSERT_DISABLED = 0, /* INSERTs into FDWs are prohibited */ + PF_FDW_INSERT_POSTGRES, /* INSERTs into postgres_fdw are OK */ + PF_FDW_INSERT_ANY_FDW /* INSERTs into any FDWs are OK */ +} PF_insert_fdw_mode; + +static const struct config_enum_entry pg_pathman_insert_into_fdw_options[] = { + { "disabled", PF_FDW_INSERT_DISABLED, false }, + { "postgres", PF_FDW_INSERT_POSTGRES, false }, + { "any_fdw", PF_FDW_INSERT_ANY_FDW, false }, + { NULL, 0, false } +}; + bool pg_pathman_enable_partition_filter = true; +int pg_pathman_insert_into_fdw = PF_FDW_INSERT_POSTGRES; CustomScanMethods partition_filter_plan_methods; CustomExecMethods partition_filter_exec_methods; @@ -47,6 +67,9 @@ static void partition_filter_visitor(Plan *plan, void *context); static List * pfilter_build_tlist(List *tlist); static Index append_rte_to_estate(EState *estate, RangeTblEntry *rte); static int append_rri_to_estate(EState *estate, ResultRelInfo *rri); +static void prepare_rri_fdw_for_insert(EState *estate, + ResultRelInfoHolder *rri_holder, + void *arg); void @@ -74,6 +97,18 @@ init_partition_filter_static_data(void) NULL, NULL, NULL); + + DefineCustomEnumVariable("pg_pathman.insert_into_fdw", + "Allow INSERTS into FDW partitions.", + NULL, + &pg_pathman_insert_into_fdw, + PF_FDW_INSERT_POSTGRES, + pg_pathman_insert_into_fdw_options, + PGC_SUSET, + 0, + NULL, + NULL, + NULL); } @@ -179,6 +214,7 @@ scan_result_parts_storage(Oid partid, ResultPartsStorage *parts_storage) Index child_rte_idx; ResultRelInfo *part_result_rel_info; + /* Lock partition and check if it exists */ LockRelationOid(partid, parts_storage->head_open_lock_mode); if(!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(partid))) { @@ -236,18 +272,18 @@ scan_result_parts_storage(Oid partid, ResultPartsStorage *parts_storage) /* ri_ConstraintExprs will be initialized by ExecRelCheck() */ part_result_rel_info->ri_ConstraintExprs = NULL; - /* Now fill the ResultRelInfo holder */ + /* Finally fill the ResultRelInfo holder */ rri_holder->partid = partid; rri_holder->result_rel_info = part_result_rel_info; - /* Append ResultRelInfo to storage->es_alloc_result_rels */ - append_rri_to_estate(parts_storage->estate, part_result_rel_info); - /* Call on_new_rri_holder_callback() if needed */ if (parts_storage->on_new_rri_holder_callback) parts_storage->on_new_rri_holder_callback(parts_storage->estate, rri_holder, parts_storage->callback_arg); + + /* Append ResultRelInfo to storage->es_alloc_result_rels */ + append_rri_to_estate(parts_storage->estate, part_result_rel_info); } return rri_holder; @@ -351,7 +387,7 @@ partition_filter_begin(CustomScanState *node, EState *estate, int eflags) /* Init ResultRelInfo cache */ init_result_parts_storage(&state->result_parts, estate, state->on_conflict_action != ONCONFLICT_NONE, - ResultPartsStorageStandard, NULL, NULL); + ResultPartsStorageStandard, prepare_rri_fdw_for_insert, NULL); state->warning_triggered = false; } @@ -499,6 +535,148 @@ select_partition_for_insert(const PartRelationInfo *prel, return rri_holder; } +/* + * Callback to be executed on FDW partitions. + */ +static void +prepare_rri_fdw_for_insert(EState *estate, + ResultRelInfoHolder *rri_holder, + void *arg) +{ + ResultRelInfo *rri = rri_holder->result_rel_info; + FdwRoutine *fdw_routine = rri->ri_FdwRoutine; + Oid partid; + + /* Nothing to do if not FDW */ + if (fdw_routine == NULL) + return; + + partid = RelationGetRelid(rri->ri_RelationDesc); + + /* Perform some checks according to 'pg_pathman_insert_into_fdw' */ + switch (pg_pathman_insert_into_fdw) + { + case PF_FDW_INSERT_DISABLED: + elog(ERROR, "INSERTs into FDW partitions are disabled"); + break; + + case PF_FDW_INSERT_POSTGRES: + { + ForeignDataWrapper *fdw; + ForeignServer *fserver; + + /* Check if it's PostgreSQL FDW */ + fserver = GetForeignServer(GetForeignTable(partid)->serverid); + fdw = GetForeignDataWrapper(fserver->fdwid); + if (strcmp("postgres_fdw", fdw->fdwname) != 0) + elog(ERROR, "FDWs other than postgres_fdw are restricted"); + } + break; + + case PF_FDW_INSERT_ANY_FDW: + { + ForeignDataWrapper *fdw; + ForeignServer *fserver; + + fserver = GetForeignServer(GetForeignTable(partid)->serverid); + fdw = GetForeignDataWrapper(fserver->fdwid); + if (strcmp("postgres_fdw", fdw->fdwname) != 0) + elog(WARNING, "unrestricted FDW mode may lead to \"%s\" crashes", + fdw->fdwname); + } + break; /* do nothing */ + + default: + elog(ERROR, "Mode is not implemented yet"); + break; + } + + if (fdw_routine->PlanForeignModify) + { + RangeTblEntry *rte; + ModifyTableState mtstate; + List *fdw_private; + Query query; + PlannedStmt *plan; + TupleDesc tupdesc; + int i, + target_attr; + + /* Fetch RangeTblEntry for partition */ + rte = rt_fetch(rri->ri_RangeTableIndex, estate->es_range_table); + + /* Fetch tuple descriptor */ + tupdesc = RelationGetDescr(rri->ri_RelationDesc); + + /* Create fake Query node */ + memset((void *) &query, 0, sizeof(Query)); + NodeSetTag(&query, T_Query); + + query.commandType = CMD_INSERT; + query.querySource = QSRC_ORIGINAL; + query.resultRelation = 1; + query.rtable = list_make1(copyObject(rte)); + query.jointree = makeNode(FromExpr); + + query.targetList = NIL; + query.returningList = NIL; + + /* Generate 'query.targetList' using 'tupdesc' */ + target_attr = 1; + for (i = 0; i < tupdesc->natts; i++) + { + Form_pg_attribute attr; + TargetEntry *te; + Param *param; + + attr = tupdesc->attrs[i]; + + if (attr->attisdropped) + continue; + + param = makeNode(Param); + param->paramkind = PARAM_EXTERN; + param->paramid = target_attr; + param->paramtype = attr->atttypid; + param->paramtypmod = attr->atttypmod; + param->paramcollid = attr->attcollation; + param->location = -1; + + te = makeTargetEntry((Expr *) param, target_attr, + pstrdup(NameStr(attr->attname)), + false); + + query.targetList = lappend(query.targetList, te); + + target_attr++; + } + + /* Create fake ModifyTableState */ + memset((void *) &mtstate, 0, sizeof(ModifyTableState)); + NodeSetTag(&mtstate, T_ModifyTableState); + mtstate.ps.state = estate; + mtstate.operation = CMD_INSERT; + mtstate.resultRelInfo = rri; + mtstate.mt_onconflict = ONCONFLICT_NONE; + + /* Plan fake query in for FDW access to be planned as well */ + elog(DEBUG1, "FDW(%u): plan fake query for fdw_private", partid); + plan = standard_planner(&query, 0, NULL); + + /* Extract fdw_private from useless plan */ + elog(DEBUG1, "FDW(%u): extract fdw_private", partid); + fdw_private = (List *) + linitial(((ModifyTable *) plan->planTree)->fdwPrivLists); + + /* call BeginForeignModify on 'rri' */ + elog(DEBUG1, "FDW(%u): call BeginForeignModify on a fake INSERT node", partid); + fdw_routine->BeginForeignModify(&mtstate, rri, fdw_private, 0, 0); + + /* Report success */ + elog(DEBUG1, "FDW(%u): success", partid); + } +} + /* * Used by fetch_estate_mod_data() to find estate_mod_data. */ @@ -581,7 +759,11 @@ append_rri_to_estate(EState *estate, ResultRelInfo *rri) estate->es_num_result_relations * sizeof(ResultRelInfo)); } - /* Append ResultRelInfo to 'es_result_relations' array */ + /* + * Append ResultRelInfo to 'es_result_relations' array. + * NOTE: this is probably safe since ResultRelInfo + * contains nothing but pointers to various structs. + */ estate->es_result_relations[estate->es_num_result_relations] = *rri; /* Update estate_mod_data */ diff --git a/src/partition_filter.h b/src/partition_filter.h index 70d74ee807..5575bd0388 100644 --- a/src/partition_filter.h +++ b/src/partition_filter.h @@ -81,6 +81,7 @@ typedef struct extern bool pg_pathman_enable_partition_filter; +extern int pg_pathman_insert_into_fdw; extern CustomScanMethods partition_filter_plan_methods; extern CustomExecMethods partition_filter_exec_methods; diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 8d8412d9b1..2171267fd0 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -904,8 +904,8 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) shout_if_prel_is_invalid(relid, prel, PT_RANGE); /* Read max & min range values from PartRelationInfo */ - min_rvalue = prel->ranges[0].min; - max_rvalue = prel->ranges[PrelLastChild(prel)].max; + min_rvalue = PrelGetRangesArray(prel)[0].min; + max_rvalue = PrelGetRangesArray(prel)[PrelLastChild(prel)].max; /* Retrieve interval as TEXT from tuple */ interval_text = values[Anum_pathman_config_range_interval - 1]; @@ -1222,7 +1222,7 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, { select_range_partitions(c->constvalue, &cmp_func, - context->prel->ranges, + PrelGetRangesArray(context->prel), PrelChildrenCount(context->prel), strategy, result); @@ -1383,7 +1383,7 @@ handle_const(const Const *c, WalkerContext *context) select_range_partitions(c->constvalue, &tce->cmp_proc_finfo, - context->prel->ranges, + PrelGetRangesArray(context->prel), PrelChildrenCount(context->prel), BTEqualStrategyNumber, result); diff --git a/src/utils.c b/src/utils.c index 8be3c67b4b..f46b9b87d4 100644 --- a/src/utils.c +++ b/src/utils.c @@ -154,7 +154,9 @@ lock_rows_visitor(Plan *plan, void *context) } } -/* NOTE: Used for debug */ +/* + * Print Bitmapset as cstring. + */ #ifdef __GNUC__ __attribute__((unused)) #endif From 8d11c303d01087186605d7ea53bfc4b7f53725f4 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Tue, 27 Sep 2016 01:05:21 -0300 Subject: [PATCH 151/184] Include where needed is required by POSIX.1-2001 to get the prototype of select(2), but nearly no systems enforce that because older standards let you get away with including some other headers. Recent OpenBSD hacking has removed that frail touch of friendliness, however, which broke some compiles; fix all the way back to 9.1 by adding the required standard. Only vacuumdb.c was reported to fail, but it seems easier to fix the whole lot in a fell swoop. Per bug #14334 by Sean Farrell. --- src/backend/libpq/auth.c | 3 +++ src/backend/postmaster/pgstat.c | 3 +++ src/bin/pg_basebackup/pg_basebackup.c | 4 +++- src/bin/pg_basebackup/pg_recvlogical.c | 3 +++ src/bin/pg_basebackup/receivelog.c | 3 +++ src/bin/pg_dump/parallel.c | 4 ++++ src/bin/scripts/vacuumdb.c | 4 ++++ src/port/pgsleep.c | 3 +++ src/test/examples/testlibpq2.c | 4 ++++ 9 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/backend/libpq/auth.c b/src/backend/libpq/auth.c index dd8d2e9ff3..936a7ccae5 100644 --- a/src/backend/libpq/auth.c +++ b/src/backend/libpq/auth.c @@ -20,6 +20,9 @@ #include #include #include +#ifdef HAVE_SYS_SELECT_H +#include +#endif #include "libpq/auth.h" #include "libpq/crypt.h" diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 3c862feb92..0440f4a1d4 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -28,6 +28,9 @@ #include #include #include +#ifdef HAVE_SYS_SELECT_H +#include +#endif #include "pgstat.h" diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 84eef6466d..fd93a3bc09 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -20,7 +20,9 @@ #include #include #include - +#ifdef HAVE_SYS_SELECT_H +#include +#endif #ifdef HAVE_LIBZ #include #endif diff --git a/src/bin/pg_basebackup/pg_recvlogical.c b/src/bin/pg_basebackup/pg_recvlogical.c index 73625256ac..5907d607b0 100644 --- a/src/bin/pg_basebackup/pg_recvlogical.c +++ b/src/bin/pg_basebackup/pg_recvlogical.c @@ -15,6 +15,9 @@ #include #include #include +#ifdef HAVE_SYS_SELECT_H +#include +#endif /* local includes */ #include "streamutil.h" diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index f8bd551ef9..406c01bfcc 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -16,6 +16,9 @@ #include #include +#ifdef HAVE_SYS_SELECT_H +#include +#endif /* local includes */ #include "receivelog.h" diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c index 51a8eee369..ce3a06ae81 100644 --- a/src/bin/pg_dump/parallel.c +++ b/src/bin/pg_dump/parallel.c @@ -59,6 +59,10 @@ #include "postgres_fe.h" +#ifdef HAVE_SYS_SELECT_H +#include +#endif + #include "parallel.h" #include "pg_backup_utils.h" diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index f99be3bf7a..2125f42c99 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -12,6 +12,10 @@ #include "postgres_fe.h" +#ifdef HAVE_SYS_SELECT_H +#include +#endif + #include "common.h" #include "dumputils.h" diff --git a/src/port/pgsleep.c b/src/port/pgsleep.c index 89a12b9da7..3f84d8f240 100644 --- a/src/port/pgsleep.c +++ b/src/port/pgsleep.c @@ -14,6 +14,9 @@ #include #include +#ifdef HAVE_SYS_SELECT_H +#include +#endif /* * In a Windows backend, we don't use this implementation, but rather diff --git a/src/test/examples/testlibpq2.c b/src/test/examples/testlibpq2.c index 850993f6e8..07c6317a21 100644 --- a/src/test/examples/testlibpq2.c +++ b/src/test/examples/testlibpq2.c @@ -34,6 +34,10 @@ #include #include #include +#ifdef HAVE_SYS_SELECT_H +#include +#endif + #include "libpq-fe.h" static void From 36f934612ca0c941809f9bf35720b4de48504a58 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 27 Sep 2016 12:07:35 +0300 Subject: [PATCH 152/184] fix success check in SpawnPartitionsWorker --- src/pathman_workers.c | 2 +- src/pg_pathman.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pathman_workers.c b/src/pathman_workers.c index c038dea976..5001fed84d 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -365,7 +365,7 @@ bgw_main_spawn_partitions(Datum main_arg) args->value_type); /* Finish transaction in an appropriate way */ - if (args->result == InvalidOid) + if (args->result == InvalidOid || IsAbortedTransactionBlockState()) AbortCurrentTransaction(); else CommitTransactionCommand(); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 5ae16c16c5..228ac67b4b 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -876,9 +876,10 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) interval_type, true, &partid); /* while (value < MIN) ... */ - spawn_partitions(PrelParentRelid(prel), value, min_rvalue, - prel->atttype, &interval_type_cmp, interval_binary, - interval_type, false, &partid); + if (partid == InvalidOid) + spawn_partitions(PrelParentRelid(prel), value, min_rvalue, + prel->atttype, &interval_type_cmp, interval_binary, + interval_type, false, &partid); SPI_finish(); /* close SPI connection */ } From 9dd080778dc6f9cac5e9affa4fd82909f1197000 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 27 Sep 2016 12:15:34 +0300 Subject: [PATCH 153/184] reset 'partid' in create_partitions_internal() in case of error --- src/pathman_workers.c | 2 +- src/pg_pathman.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pathman_workers.c b/src/pathman_workers.c index 5001fed84d..c038dea976 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -365,7 +365,7 @@ bgw_main_spawn_partitions(Datum main_arg) args->value_type); /* Finish transaction in an appropriate way */ - if (args->result == InvalidOid || IsAbortedTransactionBlockState()) + if (args->result == InvalidOid) AbortCurrentTransaction(); else CommitTransactionCommand(); diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 228ac67b4b..2b5cfeaaaa 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -902,6 +902,9 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) FreeErrorData(edata); SPI_finish(); /* no problem if not connected */ + + /* Reset 'partid' in case of error */ + partid = InvalidOid; } PG_END_TRY(); From fdaa5777062856e125961501dadfbc27409e9369 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 28 Sep 2016 12:38:33 -0400 Subject: [PATCH 154/184] worker_spi: Call pgstat_report_stat. Without this, statistics changes accumulated by the worker never get reported to the stats collector, which is bad. Julien Rouhaud --- src/test/modules/worker_spi/worker_spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/modules/worker_spi/worker_spi.c b/src/test/modules/worker_spi/worker_spi.c index fcb34ca198..7c655f9021 100644 --- a/src/test/modules/worker_spi/worker_spi.c +++ b/src/test/modules/worker_spi/worker_spi.c @@ -292,6 +292,7 @@ worker_spi_main(Datum main_arg) SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); + pgstat_report_stat(false); pgstat_report_activity(STATE_IDLE, NULL); } From dfbbfebba2c5ffd6ffcd4c5bf0a743643e37bdf6 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 29 Sep 2016 01:12:38 +0300 Subject: [PATCH 155/184] improve partition creation callbacks subsystem (robust JSONB generation), rename some 'params'-related fuctions, remove useless code --- expected/pg_pathman.out | 50 +++++------ hash.sql | 4 +- init.sql | 89 ++++++++----------- range.sql | 20 ++--- sql/pg_pathman.sql | 18 ++-- src/pathman.h | 2 +- src/pl_funcs.c | 183 ++++++++++++++++++++++++++-------------- src/relation_info.c | 2 +- src/utils.c | 114 ++++++++----------------- src/utils.h | 13 ++- 10 files changed, 238 insertions(+), 257 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 8cd92914d0..f45bcc24ea 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -35,9 +35,9 @@ SELECT * FROM test.hash_rel; 3 | 3 (3 rows) -SELECT pathman.disable_parent('test.hash_rel'); - disable_parent ----------------- +SELECT pathman.set_enable_parent('test.hash_rel', false); + set_enable_parent +------------------- (1 row) @@ -55,9 +55,9 @@ SELECT * FROM test.hash_rel; ----+------- (0 rows) -SELECT pathman.enable_parent('test.hash_rel'); - enable_parent ---------------- +SELECT pathman.set_enable_parent('test.hash_rel', true); + set_enable_parent +------------------- (1 row) @@ -1260,17 +1260,17 @@ SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; 74 | Sun Mar 15 00:00:00 2015 (1 row) -SELECT pathman.disable_auto('test.range_rel'); - disable_auto --------------- +SELECT pathman.set_auto_partitioning('test.range_rel', false); + set_auto_partitioning +----------------------- (1 row) INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); ERROR: There is no suitable partition for key 'Mon Jun 01 00:00:00 2015' -SELECT pathman.enable_auto('test.range_rel'); - enable_auto -------------- +SELECT pathman.set_auto_partitioning('test.range_rel', true); + set_auto_partitioning +----------------------- (1 row) @@ -1770,11 +1770,11 @@ NOTICE: 100 rows copied from test_fkey_0 /* Check callbacks */ CREATE TABLE log(id serial, message text); -CREATE OR REPLACE FUNCTION abc_on_partition_created_callback(args jsonb) +CREATE OR REPLACE FUNCTION abc_on_partition_created_callback(args JSONB) RETURNS VOID AS $$ DECLARE - start_value TEXT := args->>'start'; - end_value TEXT := args::jsonb->'end'; + start_value TEXT := args->>'start'; + end_value TEXT := args->'end'; BEGIN INSERT INTO log(message) VALUES (start_value || '-' || end_value); @@ -1788,9 +1788,9 @@ NOTICE: sequence "abc_seq" does not exist, skipping 2 (1 row) -SELECT set_callback('abc', 'abc_on_partition_created_callback'); - set_callback --------------- +SELECT set_part_init_callback('abc', 'abc_on_partition_created_callback'); + set_part_init_callback +------------------------ (1 row) @@ -1814,12 +1814,12 @@ SELECT add_range_partition('abc', 401, 501); public.abc_6 (1 row) -SELECT message FROM log; - message ---------- - 201-301 - 301-401 - -99-1 - 401-501 +SELECT message FROM log ORDER BY id; + message +----------- + 201-"301" + 301-"401" + -99-"1" + 401-"501" (4 rows) diff --git a/hash.sql b/hash.sql index 8c6be12987..67b89b8c0e 100644 --- a/hash.sql +++ b/hash.sql @@ -84,10 +84,10 @@ BEGIN /* Copy data */ IF partition_data = true THEN - PERFORM @extschema@.disable_parent(parent_relid); + PERFORM @extschema@.set_enable_parent(parent_relid, false); PERFORM @extschema@.partition_data(parent_relid); ELSE - PERFORM @extschema@.enable_parent(parent_relid); + PERFORM @extschema@.set_enable_parent(parent_relid, true); END IF; RETURN partitions_count; diff --git a/init.sql b/init.sql index dddf610455..70ffb9ddad 100644 --- a/init.sql +++ b/init.sql @@ -31,13 +31,13 @@ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config ( * partrel - regclass (relation type, stored as Oid) * enable_parent - add parent table to plan * auto - enable automatic partition creation - * callback - + * init_callback - cb to be executed on partition creation */ CREATE TABLE IF NOT EXISTS @extschema@.pathman_config_params ( partrel REGCLASS NOT NULL PRIMARY KEY, enable_parent BOOLEAN NOT NULL DEFAULT TRUE, auto BOOLEAN NOT NULL DEFAULT TRUE, - callback REGPROCEDURE + init_callback REGPROCEDURE NOT NULL DEFAULT 0 ); CREATE UNIQUE INDEX i_pathman_config_params ON @extschema@.pathman_config_params(partrel); @@ -87,7 +87,7 @@ BEGIN RETURN count(*) FROM pg_inherits WHERE inhparent = relation; END $$ -LANGUAGE plpgsql; +LANGUAGE plpgsql STRICT; /* * Add a row describing the optional parameter to pathman_config_params. @@ -108,62 +108,44 @@ $$ LANGUAGE plpgsql; /* - * Include parent relation into query plan's for specified relation. - */ -CREATE OR REPLACE FUNCTION @extschema@.enable_parent(relation REGCLASS) -RETURNS VOID AS -$$ -BEGIN - PERFORM @extschema@.pathman_set_param(relation, 'enable_parent', True); -END -$$ -LANGUAGE plpgsql; - -/* - * Do not include parent relation into query plan's for specified relation. + * Include\exclude parent relation in query plan. */ -CREATE OR REPLACE FUNCTION @extschema@.disable_parent(relation REGCLASS) -RETURNS VOID AS -$$ -BEGIN - PERFORM @extschema@.pathman_set_param(relation, 'enable_parent', False); -END -$$ -LANGUAGE plpgsql; - -/* - * Enable automatic partition creation. - */ -CREATE OR REPLACE FUNCTION @extschema@.enable_auto(relation REGCLASS) +CREATE OR REPLACE FUNCTION @extschema@.set_enable_parent( + relation REGCLASS, + value BOOLEAN) RETURNS VOID AS $$ BEGIN - PERFORM @extschema@.pathman_set_param(relation, 'auto', True); + PERFORM @extschema@.pathman_set_param(relation, 'enable_parent', value); END $$ -LANGUAGE plpgsql; +LANGUAGE plpgsql STRICT; /* - * Disable automatic partition creation. + * Enable\disable automatic partition creation. */ -CREATE OR REPLACE FUNCTION @extschema@.disable_auto(relation REGCLASS) +CREATE OR REPLACE FUNCTION @extschema@.set_auto_partitioning( + relation REGCLASS, + value BOOLEAN) RETURNS VOID AS $$ BEGIN - PERFORM @extschema@.pathman_set_param(relation, 'auto', False); + PERFORM @extschema@.pathman_set_param(relation, 'auto', value); END $$ -LANGUAGE plpgsql; +LANGUAGE plpgsql STRICT; /* * Set partition creation callback */ -CREATE OR REPLACE FUNCTION @extschema@.set_callback(relation REGCLASS, callback REGPROC) +CREATE OR REPLACE FUNCTION @extschema@.set_part_init_callback( + relation REGCLASS, + callback REGPROC) RETURNS VOID AS $$ BEGIN PERFORM @extschema@.validate_on_partition_created_callback(callback); - PERFORM @extschema@.pathman_set_param(relation, 'callback', callback); + PERFORM @extschema@.pathman_set_param(relation, 'init_callback', callback); END $$ LANGUAGE plpgsql; @@ -262,7 +244,7 @@ BEGIN RETURN; END $$ -LANGUAGE plpgsql +LANGUAGE plpgsql STRICT SET pg_pathman.enable_partitionfilter = on; /* ensures that PartitionFilter is ON */ /* @@ -291,7 +273,7 @@ BEGIN RETURN; END $$ -LANGUAGE plpgsql +LANGUAGE plpgsql STRICT SET pg_pathman.enable_partitionfilter = on; /* ensures that PartitionFilter is ON */ /* @@ -311,7 +293,7 @@ BEGIN PERFORM @extschema@.on_remove_partitions(parent_relid); END $$ -LANGUAGE plpgsql; +LANGUAGE plpgsql STRICT; /* * Aggregates several common relation checks before partitioning. @@ -380,7 +362,7 @@ BEGIN INTO schema, relname; END $$ -LANGUAGE plpgsql; +LANGUAGE plpgsql STRICT; /* * Returns schema-qualified name for table @@ -399,7 +381,7 @@ BEGIN WHERE oid = cls::oid); END $$ -LANGUAGE plpgsql; +LANGUAGE plpgsql STRICT; /* * Validates relation name. It must be schema qualified @@ -499,7 +481,7 @@ BEGIN EXECUTE format('DROP FUNCTION IF EXISTS %s() CASCADE', @extschema@.build_update_trigger_func_name(parent_relid)); END -$$ LANGUAGE plpgsql; +$$ LANGUAGE plpgsql STRICT; /* * Drop partitions @@ -584,7 +566,7 @@ BEGIN pg_get_constraintdef(rec.conid)); END LOOP; END -$$ LANGUAGE plpgsql; +$$ LANGUAGE plpgsql STRICT; /* @@ -712,7 +694,7 @@ RETURNS VOID AS 'pg_pathman', 'debug_capture' LANGUAGE C STRICT; /* - * Return tablespace name for specified relation + * Return tablespace name for specified relation. */ CREATE OR REPLACE FUNCTION @extschema@.get_rel_tablespace_name(relation REGCLASS) RETURNS TEXT AS 'pg_pathman', 'get_rel_tablespace_name' @@ -720,20 +702,17 @@ LANGUAGE C STRICT; /* * Checks that callback function meets specific requirements. Particularly it - * must have the only JSONB argument and VOID return type + * must have the only JSONB argument and VOID return type. */ CREATE OR REPLACE FUNCTION @extschema@.validate_on_partition_created_callback(callback REGPROC) -RETURNS VOID AS 'pg_pathman', 'validate_on_partition_created_callback' +RETURNS VOID AS 'pg_pathman', 'validate_on_part_init_callback_pl' LANGUAGE C STRICT; /* - * Builds JSONB object containing new partition parameters and invoke the - * callback + * Builds JSONB object containing new partition parameters and invoke the callback. */ CREATE OR REPLACE FUNCTION @extschema@.invoke_on_partition_created_callback( - parent REGCLASS, - partition REGCLASS, - start_value ANYELEMENT, - end_value ANYELEMENT) -RETURNS VOID AS 'pg_pathman', 'invoke_on_partition_created_callback' -LANGUAGE C STRICT; + parent_relid REGCLASS, + partition REGCLASS) +RETURNS JSONB AS 'pg_pathman', 'invoke_on_partition_created_callback' +LANGUAGE C; diff --git a/range.sql b/range.sql index bde6969fa8..96d6b5f8a9 100644 --- a/range.sql +++ b/range.sql @@ -172,10 +172,10 @@ BEGIN /* Relocate data if asked to */ IF partition_data = true THEN - PERFORM @extschema@.disable_parent(parent_relid); + PERFORM @extschema@.set_enable_parent(parent_relid, false); PERFORM @extschema@.partition_data(parent_relid); ELSE - PERFORM @extschema@.enable_parent(parent_relid); + PERFORM @extschema@.set_enable_parent(parent_relid, true); END IF; RETURN p_count; @@ -279,10 +279,10 @@ BEGIN /* Relocate data if asked to */ IF partition_data = true THEN - PERFORM @extschema@.disable_parent(parent_relid); + PERFORM @extschema@.set_enable_parent(parent_relid, false); PERFORM @extschema@.partition_data(parent_relid); ELSE - PERFORM @extschema@.enable_parent(parent_relid); + PERFORM @extschema@.set_enable_parent(parent_relid, true); END IF; RETURN p_count; @@ -356,10 +356,10 @@ BEGIN /* Relocate data if asked to */ IF partition_data = true THEN - PERFORM @extschema@.disable_parent(parent_relid); + PERFORM @extschema@.set_enable_parent(parent_relid, false); PERFORM @extschema@.partition_data(parent_relid); ELSE - PERFORM @extschema@.enable_parent(parent_relid); + PERFORM @extschema@.set_enable_parent(parent_relid, true); END IF; RETURN part_count; /* number of created partitions */ @@ -432,10 +432,10 @@ BEGIN /* Relocate data if asked to */ IF partition_data = true THEN - PERFORM @extschema@.disable_parent(parent_relid); + PERFORM @extschema@.set_enable_parent(parent_relid, false); PERFORM @extschema@.partition_data(parent_relid); ELSE - PERFORM @extschema@.enable_parent(parent_relid); + PERFORM @extschema@.set_enable_parent(parent_relid, true); END IF; RETURN part_count; /* number of created partitions */ @@ -519,9 +519,7 @@ BEGIN PERFORM @extschema@.copy_foreign_keys(parent_relid, v_child_relname::REGCLASS); PERFORM @extschema@.invoke_on_partition_created_callback(parent_relid, - v_child_relname, - p_start_value, - p_end_value); + v_child_relname::REGCLASS); RETURN v_child_relname; END diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index 2bd11696be..f6cc8ed070 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -15,10 +15,10 @@ ALTER TABLE test.hash_rel ALTER COLUMN value SET NOT NULL; SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3, partition_data:=false); EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; SELECT * FROM test.hash_rel; -SELECT pathman.disable_parent('test.hash_rel'); +SELECT pathman.set_enable_parent('test.hash_rel', false); EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; SELECT * FROM test.hash_rel; -SELECT pathman.enable_parent('test.hash_rel'); +SELECT pathman.set_enable_parent('test.hash_rel', true); EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; SELECT * FROM test.hash_rel; SELECT pathman.drop_partitions('test.hash_rel'); @@ -525,9 +525,9 @@ SELECT * FROM test.range_rel WHERE dt = '2014-12-15'; EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; -SELECT pathman.disable_auto('test.range_rel'); +SELECT pathman.set_auto_partitioning('test.range_rel', false); INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); -SELECT pathman.enable_auto('test.range_rel'); +SELECT pathman.set_auto_partitioning('test.range_rel', true); INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); DROP TABLE test.range_rel CASCADE; @@ -669,11 +669,11 @@ SELECT drop_partitions('test_fkey'); /* Check callbacks */ CREATE TABLE log(id serial, message text); -CREATE OR REPLACE FUNCTION abc_on_partition_created_callback(args jsonb) +CREATE OR REPLACE FUNCTION abc_on_partition_created_callback(args JSONB) RETURNS VOID AS $$ DECLARE - start_value TEXT := args->>'start'; - end_value TEXT := args::jsonb->'end'; + start_value TEXT := args->>'start'; + end_value TEXT := args->'end'; BEGIN INSERT INTO log(message) VALUES (start_value || '-' || end_value); @@ -682,10 +682,10 @@ $$ language plpgsql; CREATE TABLE abc(a serial, b int); SELECT create_range_partitions('abc', 'a', 1, 100, 2); -SELECT set_callback('abc', 'abc_on_partition_created_callback'); +SELECT set_part_init_callback('abc', 'abc_on_partition_created_callback'); INSERT INTO abc VALUES (123, 1); INSERT INTO abc VALUES (223, 1); SELECT append_range_partition('abc'); SELECT prepend_range_partition('abc'); SELECT add_range_partition('abc', 401, 501); -SELECT message FROM log; +SELECT message FROM log ORDER BY id; diff --git a/src/pathman.h b/src/pathman.h index 631454cd05..5c3db46232 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -59,7 +59,7 @@ #define Anum_pathman_config_params_partrel 1 /* primary key */ #define Anum_pathman_config_params_enable_parent 2 /* include parent into plan */ #define Anum_pathman_config_params_auto 3 /* auto partitions creation */ -#define Anum_pathman_config_params_callback 4 /* auto partitions creation */ +#define Anum_pathman_config_params_init_callback 4 /* partition action callback */ /* * Cache current PATHMAN_CONFIG relid (set during load_config()). diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 3589a221cb..824643be62 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -27,12 +27,11 @@ #include "utils/array.h" #include "utils/builtins.h" #include +#include "utils/jsonb.h" #include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/syscache.h" #include "utils/typcache.h" -#include "utils/jsonb.h" -#include "utils/fmgroids.h" /* declarations */ @@ -62,7 +61,7 @@ PG_FUNCTION_INFO_V1( lock_partitioned_relation ); PG_FUNCTION_INFO_V1( prevent_relation_modification ); PG_FUNCTION_INFO_V1( debug_capture ); PG_FUNCTION_INFO_V1( get_rel_tablespace_name ); -PG_FUNCTION_INFO_V1( validate_on_partition_created_callback ); +PG_FUNCTION_INFO_V1( validate_on_part_init_callback_pl ); PG_FUNCTION_INFO_V1( invoke_on_partition_created_callback ); static void on_partitions_created_internal(Oid partitioned_table, bool add_callbacks); @@ -785,27 +784,13 @@ get_rel_tablespace_name(PG_FUNCTION_ARGS) /* * Checks that callback function meets specific requirements. Particularly it - * must have the only JSONB argument and VOID return type + * must have the only JSONB argument and BOOL return type. */ Datum -validate_on_partition_created_callback(PG_FUNCTION_ARGS) +validate_on_part_init_callback_pl(PG_FUNCTION_ARGS) { - HeapTuple tp; - Oid callback = PG_GETARG_OID(0); - Form_pg_proc functup; - - tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(callback)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for function %u", callback); - functup = (Form_pg_proc) GETSTRUCT(tp); - - if (functup->pronargs != 1 || functup->proargtypes.values[0] != JSONBOID || - functup->prorettype != VOIDOID) - elog(ERROR, - "Callback function must have only one JSNOB argument " - "and return VOID"); + validate_on_part_init_cb(PG_GETARG_OID(0), true); - ReleaseSysCache(tp); PG_RETURN_VOID(); } @@ -816,53 +801,121 @@ validate_on_partition_created_callback(PG_FUNCTION_ARGS) Datum invoke_on_partition_created_callback(PG_FUNCTION_ARGS) { - char *json; - Datum jsonb; - Oid parent_oid = PG_GETARG_OID(0); - Oid partition_oid = PG_GETARG_OID(1); - Oid type = get_fn_expr_argtype(fcinfo->flinfo, 2); - Datum start_value = PG_GETARG_DATUM(2); - Datum end_value = PG_GETARG_DATUM(3); - const PartRelationInfo *prel; +#define JSB_INIT_VAL(value, val_type, val_cstring) \ + do { \ + (value)->type = jbvString; \ + (value)->val.string.len = strlen(val_cstring); \ + (value)->val.string.val = val_cstring; \ + pushJsonbValue(&jsonb_state, val_type, (value)); \ + } while (0) - if ((prel = get_pathman_relation_info(parent_oid)) == NULL) - elog(ERROR, - "Relation %s isn't partitioned by pg_pathman", - get_rel_name(parent_oid)); +#define PART_TYPE_STR(part_type) ( #part_type ) + + FmgrInfo cb_flinfo; + FunctionCallInfoData cb_fcinfo; + + const PartRelationInfo *prel; + Oid parent_oid = PG_GETARG_OID(0), + partition_oid = PG_GETARG_OID(1); + uint32 i, + part_idx; + bool part_found = false; + Datum jsonb; + JsonbParseState *jsonb_state = NULL; + JsonbValue *result, + key, + val; + + if (PG_ARGISNULL(0)) + elog(ERROR, "parent_relid should not be null"); + + if (PG_ARGISNULL(1)) + elog(ERROR, "partition should not be null"); + + prel = get_pathman_relation_info(parent_oid); + shout_if_prel_is_invalid(parent_oid, prel, PT_INDIFFERENT); /* If there is no callback function specified then we're done */ - if (!prel->callback) - PG_RETURN_VOID(); - - /* Convert ANYELEMENT arguments to jsonb */ - start_value = convert_to_jsonb(start_value, type); - end_value = convert_to_jsonb(end_value, type); + if (prel->callback == InvalidOid) + PG_RETURN_NULL(); - /* - * Build jsonb object to pass into callback - * - * XXX it would be nice to have this rewrited with pushJsonbValue() to get - * rid of string formatting and parsing. See jsonb_build_object() for - * example - */ - json = psprintf("{" - "\"parent\": %u," - "\"partition\": %u," - "\"part_type\": %u," - "\"start\": %s," - "\"end\": %s," - "\"value_type\": %u}", - parent_oid, - partition_oid, - prel->parttype, - datum_to_cstring(start_value, JSONBOID), - datum_to_cstring(end_value, JSONBOID), - type - ); - jsonb = OidFunctionCall1(F_JSONB_IN, CStringGetDatum(json)); - - /* Invoke callback */ - OidFunctionCall1(prel->callback, JsonbGetDatum(jsonb)); - - PG_RETURN_JSONB(jsonb); + for (i = 0; i < PrelChildrenCount(prel); i++) + { + if (PrelGetChildrenArray(prel)[i] == partition_oid) + { + part_found = true; + part_idx = i; + break; + } + } + + if (!part_found) + elog(ERROR, "cannot find partition %u", partition_oid); + + switch (prel->parttype) + { + case PT_HASH: + { + pushJsonbValue(&jsonb_state, WJB_BEGIN_OBJECT, NULL); + + JSB_INIT_VAL(&key, WJB_KEY, "parent"); + JSB_INIT_VAL(&val, WJB_VALUE, get_rel_name_or_relid(parent_oid)); + JSB_INIT_VAL(&key, WJB_KEY, "partition"); + JSB_INIT_VAL(&val, WJB_VALUE, get_rel_name_or_relid(partition_oid)); + JSB_INIT_VAL(&key, WJB_KEY, "part_type"); + JSB_INIT_VAL(&val, WJB_VALUE, "HASH"); + + result = pushJsonbValue(&jsonb_state, WJB_END_OBJECT, NULL); + } + break; + + case PT_RANGE: + { + RangeEntry *re = &PrelGetRangesArray(prel)[part_idx]; + char *start_value, + *end_value; + + /* Convert min & max to CSTRING */ + start_value = datum_to_cstring(re->min, prel->atttype); + end_value = datum_to_cstring(re->max, prel->atttype); + + pushJsonbValue(&jsonb_state, WJB_BEGIN_OBJECT, NULL); + + JSB_INIT_VAL(&key, WJB_KEY, "parent"); + JSB_INIT_VAL(&val, WJB_VALUE, get_rel_name_or_relid(parent_oid)); + JSB_INIT_VAL(&key, WJB_KEY, "partition"); + JSB_INIT_VAL(&val, WJB_VALUE, get_rel_name_or_relid(partition_oid)); + JSB_INIT_VAL(&key, WJB_KEY, "part_type"); + JSB_INIT_VAL(&val, WJB_VALUE, "RANGE"); + JSB_INIT_VAL(&key, WJB_KEY, "start"); + JSB_INIT_VAL(&val, WJB_VALUE, start_value); + JSB_INIT_VAL(&key, WJB_KEY, "end"); + JSB_INIT_VAL(&val, WJB_VALUE, end_value); + + result = pushJsonbValue(&jsonb_state, WJB_END_OBJECT, NULL); + } + break; + + default: + elog(ERROR, "Unknown partitioning type %u", prel->parttype); + break; + } + + /* Construct JSONB object */ + jsonb = PointerGetDatum(JsonbValueToJsonb(result)); + + /* Validate the callback's signature */ + validate_on_part_init_cb(prel->callback, true); + + fmgr_info(prel->callback, &cb_flinfo); + + InitFunctionCallInfoData(cb_fcinfo, &cb_flinfo, 1, InvalidOid, NULL, NULL); + + cb_fcinfo.arg[0] = jsonb; + cb_fcinfo.argnull[0] = false; + + /* Invoke the callback */ + FunctionCallInvoke(&cb_fcinfo); + + PG_RETURN_DATUM(jsonb); } diff --git a/src/relation_info.c b/src/relation_info.c index 8cc25b9f30..4da14f2897 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -165,7 +165,7 @@ refresh_pathman_relation_info(Oid relid, { prel->enable_parent = param_values[Anum_pathman_config_params_enable_parent - 1]; prel->auto_partition = param_values[Anum_pathman_config_params_auto - 1]; - prel->callback = param_values[Anum_pathman_config_params_callback - 1]; + prel->callback = param_values[Anum_pathman_config_params_init_callback - 1]; } /* Else set default values if they cannot be found */ else diff --git a/src/utils.c b/src/utils.c index 3d3d70f5cd..e863b60654 100644 --- a/src/utils.c +++ b/src/utils.c @@ -618,7 +618,7 @@ datum_to_cstring(Datum datum, Oid typid) if (HeapTupleIsValid(tup)) { - Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tup); + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tup); result = OidOutputFunctionCall(typtup->typoutput, datum); ReleaseSysCache(tup); } @@ -628,85 +628,6 @@ datum_to_cstring(Datum datum, Oid typid) return result; } -/* - * Converts datum to jsonb type - * This function is a wrapper to to_jsonb() - */ -Datum -convert_to_jsonb(Datum datum, Oid typid) -{ - List *args; - FuncExpr *fexpr; - FmgrInfo flinfo; - Const *constval; - - /* Build const value to use in the FuncExpr node. */ - constval = makeConstFromDatum(datum, typid); - - /* Function takes single argument */ - args = list_make1(constval); - - /* Build function expression */ - fexpr = makeFuncNode(F_TO_JSONB, args); - fmgr_info(F_TO_JSONB, &flinfo); - flinfo.fn_expr = (Node *) fexpr; - - return FunctionCall1(&flinfo, datum); -} - -/* - * Builds Const from specified datum and type oid - */ -Const * -makeConstFromDatum(Datum datum, Oid typid) -{ - HeapTuple tp; - Const *constval; - Form_pg_type typtup; - - tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for type %u", typid); - typtup = (Form_pg_type) GETSTRUCT(tp); - constval = makeConst( - typid, - typtup->typtypmod, - typtup->typcollation, - typtup->typlen, - datum, - false, - typtup->typbyval); - ReleaseSysCache(tp); - - return constval; -} - -/* - * Builds function expression - */ -FuncExpr * -makeFuncNode(Oid funcid, List *args) -{ - HeapTuple tp; - FuncExpr *fexpr; - Form_pg_proc functup; - - tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for function %u", funcid); - functup = (Form_pg_proc) GETSTRUCT(tp); - fexpr = makeFuncExpr(funcid, - functup->prorettype, - args, - InvalidOid, - InvalidOid, - COERCE_EXPLICIT_CALL); - ReleaseSysCache(tp); - fexpr->funcvariadic = false; - - return fexpr; -} - /* * Try to get relname or at least relid as cstring. */ @@ -734,6 +655,7 @@ get_rel_persistence(Oid relid) tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for relation %u", relid); + reltup = (Form_pg_class) GETSTRUCT(tp); result = reltup->relpersistence; ReleaseSysCache(tp); @@ -741,3 +663,35 @@ get_rel_persistence(Oid relid) return result; } #endif + +/* + * Checks that callback function meets specific requirements. + * It must have the only JSONB argument and BOOL return type. + */ +bool +validate_on_part_init_cb(Oid procid, bool emit_error) +{ + HeapTuple tp; + Form_pg_proc functup; + bool is_ok = true; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(procid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", procid); + + functup = (Form_pg_proc) GETSTRUCT(tp); + + if (functup->pronargs != 1 || + functup->proargtypes.values[0] != JSONBOID || + functup->prorettype != VOIDOID) + is_ok = false; + + ReleaseSysCache(tp); + + if (emit_error && !is_ok) + elog(ERROR, + "Callback function must have the following signature: " + "callback(arg JSONB) RETURNS VOID"); + + return is_ok; +} diff --git a/src/utils.h b/src/utils.h index db074e103c..21070c7bae 100644 --- a/src/utils.h +++ b/src/utils.h @@ -49,6 +49,7 @@ void postprocess_lock_rows(List *rtable, Plan *plan); bool clause_contains_params(Node *clause); bool is_date_type_internal(Oid typid); bool is_string_type_internal(Oid typid); +bool validate_on_part_init_cb(Oid procid, bool emit_error); /* * Misc. @@ -56,6 +57,10 @@ bool is_string_type_internal(Oid typid); Oid get_pathman_schema(void); List * list_reverse(List *l); +#if PG_VERSION_NUM < 90600 +char get_rel_persistence(Oid relid); +#endif + /* * Handy execution-stage functions. */ @@ -65,13 +70,5 @@ void fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, Oid type2); char * datum_to_cstring(Datum datum, Oid typid); -Datum datum_in(char *str, Oid typid); -Datum convert_to_jsonb(Datum datum, Oid typid); -Const *makeConstFromDatum(Datum datum, Oid typid); -FuncExpr *makeFuncNode(Oid funcid, List *args); - -#if PG_VERSION_NUM < 90600 -char get_rel_persistence(Oid relid); -#endif #endif From 2937ffa5d5790e6bcefae0492388adb192db936b Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Wed, 28 Sep 2016 19:31:58 -0300 Subject: [PATCH 156/184] Silence compiler warnings Reported by Peter Eisentraut. Coding suggested by Tom Lane. --- src/backend/catalog/objectaddress.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index e44d7d09e1..179bf125c5 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -2237,23 +2237,18 @@ get_object_namespace(const ObjectAddress *address) int read_objtype_from_string(const char *objtype) { - ObjectType type; int i; for (i = 0; i < lengthof(ObjectTypeMap); i++) { if (strcmp(ObjectTypeMap[i].tm_name, objtype) == 0) - { - type = ObjectTypeMap[i].tm_type; - break; - } + return ObjectTypeMap[i].tm_type; } - if (i >= lengthof(ObjectTypeMap)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unrecognized object type \"%s\"", objtype))); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized object type \"%s\"", objtype))); - return type; + return -1; /* keep compiler quiet */ } /* From 6b894fcf691be8748af8f24c3cb8c27c89160b96 Mon Sep 17 00:00:00 2001 From: Dmitry Maslyuk Date: Thu, 29 Sep 2016 11:27:51 +0300 Subject: [PATCH 157/184] [Doc]: fix wiki link on SEPostgreSQL --- doc/src/sgml/sepgsql.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/sepgsql.sgml b/doc/src/sgml/sepgsql.sgml index 4758d21d28..c1221c56d6 100644 --- a/doc/src/sgml/sepgsql.sgml +++ b/doc/src/sgml/sepgsql.sgml @@ -753,7 +753,7 @@ ERROR: SELinux: security policy violation External Resources - SE-&productname; Introduction + SE-PostgreSQL Introduction This wiki page provides a brief overview, security design, architecture, From 13852182b2461d741c4cc65bb0bb25cdbbad25c9 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 29 Sep 2016 17:51:27 +0300 Subject: [PATCH 158/184] (performance) do not fetch PartRelationInfo in invoke_on_partition_created_callback(), rename callback -> init_callback, fixes --- expected/pg_pathman.out | 12 +++--- hash.sql | 19 ++++++--- init.sql | 16 ++++++-- range.sql | 78 ++++++++++++++---------------------- sql/pg_pathman.sql | 4 +- src/init.c | 6 +++ src/pl_funcs.c | 87 +++++++++++++++++++---------------------- src/relation_info.c | 4 +- src/relation_info.h | 2 +- 9 files changed, 115 insertions(+), 113 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index f45bcc24ea..49e1622b3d 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -1260,17 +1260,17 @@ SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; 74 | Sun Mar 15 00:00:00 2015 (1 row) -SELECT pathman.set_auto_partitioning('test.range_rel', false); - set_auto_partitioning ------------------------ +SELECT pathman.set_auto('test.range_rel', false); + set_auto +---------- (1 row) INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); ERROR: There is no suitable partition for key 'Mon Jun 01 00:00:00 2015' -SELECT pathman.set_auto_partitioning('test.range_rel', true); - set_auto_partitioning ------------------------ +SELECT pathman.set_auto('test.range_rel', true); + set_auto +---------- (1 row) diff --git a/hash.sql b/hash.sql index 67b89b8c0e..cd1ab469ab 100644 --- a/hash.sql +++ b/hash.sql @@ -24,7 +24,7 @@ DECLARE v_plain_schema TEXT; v_plain_relname TEXT; v_hashfunc TEXT; - v_tablespace TEXT; + v_init_callback REGPROCEDURE; BEGIN IF partition_data = true THEN @@ -50,9 +50,6 @@ BEGIN INSERT INTO @extschema@.pathman_config (partrel, attname, parttype) VALUES (parent_relid, attribute, 1); - /* Determine tablespace of parent table */ - v_tablespace := @extschema@.get_rel_tablespace_name(parent_relid); - /* Create partitions and update pg_pathman configuration */ FOR partnum IN 0..partitions_count-1 LOOP @@ -64,7 +61,7 @@ BEGIN 'CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s) TABLESPACE %s', v_child_relname, parent_relid::TEXT, - v_tablespace); + @extschema@.get_rel_tablespace_name(parent_relid)); EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (@extschema@.get_hash_part_idx(%s(%s), %s) = %s)', @@ -77,6 +74,18 @@ BEGIN partnum); PERFORM @extschema@.copy_foreign_keys(parent_relid, v_child_relname::REGCLASS); + + /* Fetch init_callback from 'params' table */ + WITH stub_callback(stub) as (values (0)) + SELECT coalesce(init_callback, 0::REGPROCEDURE) + FROM stub_callback + LEFT JOIN @extschema@.pathman_config_params AS params + ON params.partrel = parent_relid + INTO v_init_callback; + + PERFORM @extschema@.invoke_on_partition_created_callback(parent_relid, + v_child_relname::REGCLASS, + v_init_callback); END LOOP; /* Notify backend about changes */ diff --git a/init.sql b/init.sql index 70ffb9ddad..1267837206 100644 --- a/init.sql +++ b/init.sql @@ -124,7 +124,7 @@ LANGUAGE plpgsql STRICT; /* * Enable\disable automatic partition creation. */ -CREATE OR REPLACE FUNCTION @extschema@.set_auto_partitioning( +CREATE OR REPLACE FUNCTION @extschema@.set_auto( relation REGCLASS, value BOOLEAN) RETURNS VOID AS @@ -713,6 +713,16 @@ LANGUAGE C STRICT; */ CREATE OR REPLACE FUNCTION @extschema@.invoke_on_partition_created_callback( parent_relid REGCLASS, - partition REGCLASS) -RETURNS JSONB AS 'pg_pathman', 'invoke_on_partition_created_callback' + partition REGCLASS, + init_callback REGPROCEDURE, + start_value ANYELEMENT, + end_value ANYELEMENT) +RETURNS VOID AS 'pg_pathman', 'invoke_on_partition_created_callback' +LANGUAGE C; + +CREATE OR REPLACE FUNCTION @extschema@.invoke_on_partition_created_callback( + parent_relid REGCLASS, + partition REGCLASS, + init_callback REGPROCEDURE) +RETURNS VOID AS 'pg_pathman', 'invoke_on_partition_created_callback' LANGUAGE C; diff --git a/range.sql b/range.sql index 96d6b5f8a9..974412a65d 100644 --- a/range.sql +++ b/range.sql @@ -84,15 +84,15 @@ CREATE OR REPLACE FUNCTION @extschema@.create_range_partitions( p_start_value ANYELEMENT, p_interval INTERVAL, p_count INTEGER DEFAULT NULL, - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT TRUE) RETURNS INTEGER AS $$ DECLARE v_rows_count INTEGER; v_max p_start_value%TYPE; v_cur_value p_start_value%TYPE := p_start_value; - v_tablespace TEXT; i INTEGER; + BEGIN IF partition_data = true THEN /* Acquire data modification lock */ @@ -149,9 +149,6 @@ BEGIN INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); - /* Determine tablespace of parent table */ - v_tablespace := @extschema@.get_rel_tablespace_name(parent_relid); - /* Create first partition */ FOR i IN 1..p_count LOOP @@ -162,7 +159,7 @@ BEGIN parent_relid, p_start_value, p_start_value + p_interval, - v_tablespace; + @extschema@.get_rel_tablespace_name(parent_relid); p_start_value := p_start_value + p_interval; END LOOP; @@ -191,14 +188,13 @@ CREATE OR REPLACE FUNCTION @extschema@.create_range_partitions( p_start_value ANYELEMENT, p_interval ANYELEMENT, p_count INTEGER DEFAULT NULL, - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT TRUE) RETURNS INTEGER AS $$ DECLARE v_rows_count INTEGER; v_max p_start_value%TYPE; v_cur_value p_start_value%TYPE := p_start_value; - v_tablespace TEXT; i INTEGER; BEGIN @@ -259,9 +255,6 @@ BEGIN INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); - /* Determine tablespace of parent table */ - v_tablespace := @extschema@.get_rel_tablespace_name(parent_relid); - /* create first partition */ FOR i IN 1..p_count LOOP @@ -269,7 +262,7 @@ BEGIN parent_relid, p_start_value, p_start_value + p_interval, - tablespace := v_tablespace); + tablespace := @extschema@.get_rel_tablespace_name(parent_relid)); p_start_value := p_start_value + p_interval; END LOOP; @@ -298,12 +291,11 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( p_start_value ANYELEMENT, p_end_value ANYELEMENT, p_interval ANYELEMENT, - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT TRUE) RETURNS INTEGER AS $$ DECLARE part_count INTEGER := 0; - v_tablespace TEXT; BEGIN IF partition_data = true THEN @@ -336,16 +328,13 @@ BEGIN INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); - /* Determine tablespace of parent table */ - v_tablespace := @extschema@.get_rel_tablespace_name(parent_relid); - WHILE p_start_value <= p_end_value LOOP PERFORM @extschema@.create_single_range_partition( parent_relid, p_start_value, p_start_value + p_interval, - tablespace := v_tablespace); + tablespace := @extschema@.get_rel_tablespace_name(parent_relid)); p_start_value := p_start_value + p_interval; part_count := part_count + 1; @@ -375,12 +364,11 @@ CREATE OR REPLACE FUNCTION @extschema@.create_partitions_from_range( p_start_value ANYELEMENT, p_end_value ANYELEMENT, p_interval INTERVAL, - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT TRUE) RETURNS INTEGER AS $$ DECLARE part_count INTEGER := 0; - v_tablespace TEXT; BEGIN IF partition_data = true THEN @@ -409,9 +397,6 @@ BEGIN INSERT INTO @extschema@.pathman_config (partrel, attname, parttype, range_interval) VALUES (parent_relid, p_attribute, 2, p_interval::TEXT); - /* Determine tablespace of parent table */ - v_tablespace := @extschema@.get_rel_tablespace_name(parent_relid); - WHILE p_start_value <= p_end_value LOOP EXECUTE @@ -421,7 +406,7 @@ BEGIN parent_relid, p_start_value, p_start_value + p_interval, - v_tablespace; + @extschema@.get_rel_tablespace_name(parent_relid); p_start_value := p_start_value + p_interval; part_count := part_count + 1; @@ -463,7 +448,8 @@ DECLARE v_plain_relname TEXT; v_child_relname_exists BOOL; v_seq_name TEXT; - v_create_table_query TEXT; + v_init_callback REGPROCEDURE; + BEGIN v_attname := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; @@ -498,16 +484,15 @@ BEGIN v_child_relname := partition_name; END IF; - v_create_table_query := 'CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) INHERITS (%2$s)'; - - /* If tablespace is specified then add it to a create query */ - if NOT tablespace IS NULL THEN - v_create_table_query := v_create_table_query || ' TABLESPACE ' ||tablespace; + IF tablespace IS NULL THEN + tablespace := @extschema@.get_rel_tablespace_name(parent_relid); END IF; - EXECUTE format(v_create_table_query, + EXECUTE format('CREATE TABLE %1$s (LIKE %2$s INCLUDING ALL) + INHERITS (%2$s) TABLESPACE %3$s', v_child_relname, - parent_relid::TEXT); + parent_relid::TEXT, + tablespace); EXECUTE format('ALTER TABLE %s ADD CONSTRAINT %s CHECK (%s)', v_child_relname, @@ -518,8 +503,20 @@ BEGIN p_end_value)); PERFORM @extschema@.copy_foreign_keys(parent_relid, v_child_relname::REGCLASS); + + /* Fetch init_callback from 'params' table */ + WITH stub_callback(stub) as (values (0)) + SELECT coalesce(init_callback, 0::REGPROCEDURE) + FROM stub_callback + LEFT JOIN @extschema@.pathman_config_params AS params + ON params.partrel = parent_relid + INTO v_init_callback; + PERFORM @extschema@.invoke_on_partition_created_callback(parent_relid, - v_child_relname::REGCLASS); + v_child_relname::REGCLASS, + v_init_callback, + p_start_value, + p_end_value); RETURN v_child_relname; END @@ -817,11 +814,6 @@ BEGIN RAISE EXCEPTION 'Cannot append to empty partitions set'; END IF; - /* If tablespace isn't specified then choose parent's tablespace */ - IF tablespace IS NULL THEN - tablespace := @extschema@.get_rel_tablespace_name(parent_relid); - END IF; - p_range := @extschema@.get_range_by_idx(parent_relid, -1, 0); IF @extschema@.is_date_type(p_atttype::regtype) THEN @@ -922,11 +914,6 @@ BEGIN RAISE EXCEPTION 'Cannot prepend to empty partitions set'; END IF; - /* If tablespace isn't specified then choose parent's tablespace */ - IF tablespace IS NULL THEN - tablespace := @extschema@.get_rel_tablespace_name(parent_relid); - END IF; - p_range := @extschema@.get_range_by_idx(parent_relid, 0, 0); IF @extschema@.is_date_type(p_atttype::regtype) THEN @@ -985,11 +972,6 @@ BEGIN RAISE EXCEPTION 'Specified range overlaps with existing partitions'; END IF; - /* If tablespace isn't specified then choose parent's tablespace */ - IF tablespace IS NULL THEN - tablespace := @extschema@.get_rel_tablespace_name(parent_relid); - END IF; - /* Create new partition */ v_part_name := @extschema@.create_single_range_partition(parent_relid, p_start_value, diff --git a/sql/pg_pathman.sql b/sql/pg_pathman.sql index f6cc8ed070..1b6f1c164c 100644 --- a/sql/pg_pathman.sql +++ b/sql/pg_pathman.sql @@ -525,9 +525,9 @@ SELECT * FROM test.range_rel WHERE dt = '2014-12-15'; EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; -SELECT pathman.set_auto_partitioning('test.range_rel', false); +SELECT pathman.set_auto('test.range_rel', false); INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); -SELECT pathman.set_auto_partitioning('test.range_rel', true); +SELECT pathman.set_auto('test.range_rel', true); INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); DROP TABLE test.range_rel CASCADE; diff --git a/src/init.c b/src/init.c index 60eff1adb3..ec8e58ef0d 100644 --- a/src/init.c +++ b/src/init.c @@ -661,6 +661,12 @@ read_pathman_params(Oid relid, Datum *values, bool *isnull) /* Extract data if necessary */ heap_deform_tuple(htup, RelationGetDescr(rel), values, isnull); row_found = true; + + /* Perform checks for non-NULL columns */ + Assert(!isnull[Anum_pathman_config_params_partrel - 1]); + Assert(!isnull[Anum_pathman_config_params_enable_parent - 1]); + Assert(!isnull[Anum_pathman_config_params_auto - 1]); + Assert(!isnull[Anum_pathman_config_params_init_callback - 1]); } /* Clean resources */ diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 824643be62..db68ff3867 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -64,6 +64,7 @@ PG_FUNCTION_INFO_V1( get_rel_tablespace_name ); PG_FUNCTION_INFO_V1( validate_on_part_init_callback_pl ); PG_FUNCTION_INFO_V1( invoke_on_partition_created_callback ); + static void on_partitions_created_internal(Oid partitioned_table, bool add_callbacks); static void on_partitions_updated_internal(Oid partitioned_table, bool add_callbacks); static void on_partitions_removed_internal(Oid partitioned_table, bool add_callbacks); @@ -783,8 +784,8 @@ get_rel_tablespace_name(PG_FUNCTION_ARGS) } /* - * Checks that callback function meets specific requirements. Particularly it - * must have the only JSONB argument and BOOL return type. + * Checks that callback function meets specific requirements. + * It must have the only JSONB argument and BOOL return type. */ Datum validate_on_part_init_callback_pl(PG_FUNCTION_ARGS) @@ -795,8 +796,8 @@ validate_on_part_init_callback_pl(PG_FUNCTION_ARGS) } /* - * Builds JSONB object containing new partition parameters and invoke the - * callback + * Builds JSONB object containing new partition parameters + * and invokes the callback. */ Datum invoke_on_partition_created_callback(PG_FUNCTION_ARGS) @@ -809,50 +810,48 @@ invoke_on_partition_created_callback(PG_FUNCTION_ARGS) pushJsonbValue(&jsonb_state, val_type, (value)); \ } while (0) -#define PART_TYPE_STR(part_type) ( #part_type ) +#define ARG_PARENT 0 /* parent table */ +#define ARG_CHILD 1 /* partition */ +#define ARG_CALLBACK 2 /* callback to be invoked */ +#define ARG_RANGE_START 3 /* start_value */ +#define ARG_RANGE_END 4 /* end_value */ + + Oid parent_oid = PG_GETARG_OID(ARG_PARENT), + partition_oid = PG_GETARG_OID(ARG_CHILD); + PartType part_type; + Oid cb_oid = PG_GETARG_OID(ARG_CALLBACK); FmgrInfo cb_flinfo; FunctionCallInfoData cb_fcinfo; - const PartRelationInfo *prel; - Oid parent_oid = PG_GETARG_OID(0), - partition_oid = PG_GETARG_OID(1); - uint32 i, - part_idx; - bool part_found = false; - Datum jsonb; JsonbParseState *jsonb_state = NULL; JsonbValue *result, key, val; - if (PG_ARGISNULL(0)) + /* If there's no callback function specified, we're done */ + if (cb_oid == InvalidOid) + PG_RETURN_VOID(); + + if (PG_ARGISNULL(ARG_PARENT)) elog(ERROR, "parent_relid should not be null"); - if (PG_ARGISNULL(1)) + if (PG_ARGISNULL(ARG_CHILD)) elog(ERROR, "partition should not be null"); - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_INDIFFERENT); - - /* If there is no callback function specified then we're done */ - if (prel->callback == InvalidOid) - PG_RETURN_NULL(); + /* Both RANGE_START & RANGE_END are not available (HASH) */ + if (PG_ARGISNULL(ARG_RANGE_START) && PG_ARGISNULL(ARG_RANGE_START)) + part_type = PT_HASH; - for (i = 0; i < PrelChildrenCount(prel); i++) - { - if (PrelGetChildrenArray(prel)[i] == partition_oid) - { - part_found = true; - part_idx = i; - break; - } - } + /* Either RANGE_START or RANGE_END is missing */ + if (PG_ARGISNULL(ARG_RANGE_START) || PG_ARGISNULL(ARG_RANGE_START)) + elog(ERROR, "both boundaries must be provided for RANGE partition"); - if (!part_found) - elog(ERROR, "cannot find partition %u", partition_oid); + /* Both RANGE_START & RANGE_END are provided */ + else part_type = PT_RANGE; - switch (prel->parttype) + /* Build JSONB according to partitioning type */ + switch (part_type) { case PT_HASH: { @@ -871,13 +870,13 @@ invoke_on_partition_created_callback(PG_FUNCTION_ARGS) case PT_RANGE: { - RangeEntry *re = &PrelGetRangesArray(prel)[part_idx]; - char *start_value, - *end_value; + char *start_value, + *end_value; + Oid type = get_fn_expr_argtype(fcinfo->flinfo, ARG_RANGE_START); /* Convert min & max to CSTRING */ - start_value = datum_to_cstring(re->min, prel->atttype); - end_value = datum_to_cstring(re->max, prel->atttype); + start_value = datum_to_cstring(PG_GETARG_DATUM(ARG_RANGE_START), type); + end_value = datum_to_cstring(PG_GETARG_DATUM(ARG_RANGE_END), type); pushJsonbValue(&jsonb_state, WJB_BEGIN_OBJECT, NULL); @@ -897,25 +896,21 @@ invoke_on_partition_created_callback(PG_FUNCTION_ARGS) break; default: - elog(ERROR, "Unknown partitioning type %u", prel->parttype); + elog(ERROR, "Unknown partitioning type %u", part_type); break; } - /* Construct JSONB object */ - jsonb = PointerGetDatum(JsonbValueToJsonb(result)); - /* Validate the callback's signature */ - validate_on_part_init_cb(prel->callback, true); + validate_on_part_init_cb(cb_oid, true); - fmgr_info(prel->callback, &cb_flinfo); + fmgr_info(cb_oid, &cb_flinfo); InitFunctionCallInfoData(cb_fcinfo, &cb_flinfo, 1, InvalidOid, NULL, NULL); - - cb_fcinfo.arg[0] = jsonb; + cb_fcinfo.arg[0] = PointerGetDatum(JsonbValueToJsonb(result)); cb_fcinfo.argnull[0] = false; /* Invoke the callback */ FunctionCallInvoke(&cb_fcinfo); - PG_RETURN_DATUM(jsonb); + PG_RETURN_VOID(); } diff --git a/src/relation_info.c b/src/relation_info.c index 4da14f2897..8a298a94d6 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -165,14 +165,14 @@ refresh_pathman_relation_info(Oid relid, { prel->enable_parent = param_values[Anum_pathman_config_params_enable_parent - 1]; prel->auto_partition = param_values[Anum_pathman_config_params_auto - 1]; - prel->callback = param_values[Anum_pathman_config_params_init_callback - 1]; + prel->init_callback = param_values[Anum_pathman_config_params_init_callback - 1]; } /* Else set default values if they cannot be found */ else { prel->enable_parent = false; prel->auto_partition = true; - prel->callback = InvalidOid; + prel->init_callback = InvalidOid; } /* We've successfully built a cache entry */ diff --git a/src/relation_info.h b/src/relation_info.h index 0c37dfb548..6eba1900a7 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -49,7 +49,7 @@ typedef struct bool valid; /* is this entry valid? */ bool enable_parent; /* include parent to the plan */ bool auto_partition; /* auto partition creation */ - Oid callback; /* callback for partition creation */ + Oid init_callback; /* callback for partition creation */ uint32 children_count; Oid *children; /* Oids of child partitions */ From 86d609a7dcc742fcaa318cedcb72611fe580e04d Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 29 Sep 2016 19:32:58 +0300 Subject: [PATCH 159/184] _partition_data_concurrent() should not be STRICT, fix ARGISNULL 'else if' checks in invoke_on_partition_created_callback() --- init.sql | 9 +++++++-- src/pl_funcs.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/init.sql b/init.sql index 1267837206..b28fa3e6ae 100644 --- a/init.sql +++ b/init.sql @@ -198,6 +198,7 @@ DECLARE v_limit_clause TEXT := ''; v_where_clause TEXT := ''; ctids TID[]; + BEGIN SELECT attname INTO v_attr FROM @extschema@.pathman_config WHERE partrel = p_relation; @@ -244,7 +245,7 @@ BEGIN RETURN; END $$ -LANGUAGE plpgsql STRICT +LANGUAGE plpgsql SET pg_pathman.enable_partitionfilter = on; /* ensures that PartitionFilter is ON */ /* @@ -708,8 +709,9 @@ CREATE OR REPLACE FUNCTION @extschema@.validate_on_partition_created_callback(ca RETURNS VOID AS 'pg_pathman', 'validate_on_part_init_callback_pl' LANGUAGE C STRICT; + /* - * Builds JSONB object containing new partition parameters and invoke the callback. + * Invoke init_callback on RANGE partition. */ CREATE OR REPLACE FUNCTION @extschema@.invoke_on_partition_created_callback( parent_relid REGCLASS, @@ -720,6 +722,9 @@ CREATE OR REPLACE FUNCTION @extschema@.invoke_on_partition_created_callback( RETURNS VOID AS 'pg_pathman', 'invoke_on_partition_created_callback' LANGUAGE C; +/* + * Invoke init_callback on HASH partition. + */ CREATE OR REPLACE FUNCTION @extschema@.invoke_on_partition_created_callback( parent_relid REGCLASS, partition REGCLASS, diff --git a/src/pl_funcs.c b/src/pl_funcs.c index db68ff3867..14fd4a6e22 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -844,7 +844,7 @@ invoke_on_partition_created_callback(PG_FUNCTION_ARGS) part_type = PT_HASH; /* Either RANGE_START or RANGE_END is missing */ - if (PG_ARGISNULL(ARG_RANGE_START) || PG_ARGISNULL(ARG_RANGE_START)) + else if (PG_ARGISNULL(ARG_RANGE_START) || PG_ARGISNULL(ARG_RANGE_START)) elog(ERROR, "both boundaries must be provided for RANGE partition"); /* Both RANGE_START & RANGE_END are provided */ From d8b4c3490c8ada0e39727475328a67c2e510f2f2 Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Fri, 30 Sep 2016 11:19:30 +0200 Subject: [PATCH 160/184] Retry opening new segments in pg_xlogdump --folllow There is a small window between when the server closes out the existing segment and the new one is created. Put a loop around the open call in this case to make sure we wait for the new file to actually appear. --- src/bin/pg_xlogdump/pg_xlogdump.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/bin/pg_xlogdump/pg_xlogdump.c b/src/bin/pg_xlogdump/pg_xlogdump.c index dbaf727cd8..49e9a34b52 100644 --- a/src/bin/pg_xlogdump/pg_xlogdump.c +++ b/src/bin/pg_xlogdump/pg_xlogdump.c @@ -249,6 +249,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo)) { char fname[MAXFNAMELEN]; + int tries; /* Switch to another logfile segment */ if (sendFile >= 0) @@ -258,7 +259,30 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, XLogFileName(fname, timeline_id, sendSegNo); - sendFile = fuzzy_open_file(directory, fname); + /* + * In follow mode there is a short period of time after the + * server has written the end of the previous file before the + * new file is available. So we loop for 5 seconds looking + * for the file to appear before giving up. + */ + for (tries = 0; tries < 10; tries++) + { + sendFile = fuzzy_open_file(directory, fname); + if (sendFile >= 0) + break; + if (errno == ENOENT) + { + int save_errno = errno; + + /* File not there yet, try again */ + pg_usleep(500 * 1000); + + errno = save_errno; + continue; + } + /* Any other error, fall through and fail */ + break; + } if (sendFile < 0) fatal_error("could not find file \"%s\": %s", From e12b83abb5fed85f4bbb9625ebd221b880a4c692 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 1 Oct 2016 17:15:10 -0400 Subject: [PATCH 161/184] Do ClosePostmasterPorts() earlier in SubPostmasterMain(). In standard Unix builds, postmaster child processes do ClosePostmasterPorts immediately after InitPostmasterChild, that is almost immediately after being spawned. This is important because we don't want children holding open the postmaster's end of the postmaster death watch pipe. However, in EXEC_BACKEND builds, SubPostmasterMain was postponing this responsibility significantly, in order to make it slightly more convenient to pass the right flag value to ClosePostmasterPorts. This is bad, particularly seeing that process_shared_preload_libraries() might invoke nearly-arbitrary code. Rearrange so that we do it as soon as we've fetched the socket FDs via read_backend_variables(). Also move the comment explaining about randomize_va_space to before the call of PGSharedMemoryReAttach, which is where it's relevant. The old placement was appropriate when the reattach happened inside CreateSharedMemoryAndSemaphores, but that was a long time ago. Back-patch to 9.3; the patch doesn't apply cleanly before that, and it doesn't seem worth a lot of effort given that we've had no actual field complaints traceable to this. Discussion: <4157.1475178360@sss.pgh.pa.us> --- src/backend/postmaster/postmaster.c | 57 +++++++++-------------------- 1 file changed, 18 insertions(+), 39 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index f16a63aade..5db878f9b4 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -4618,10 +4618,17 @@ SubPostmasterMain(int argc, char *argv[]) /* Setup essential subsystems (to ensure elog() behaves sanely) */ InitializeGUCOptions(); + /* Check we got appropriate args */ + if (argc < 3) + elog(FATAL, "invalid subpostmaster invocation"); + /* Read in the variables file */ memset(&port, 0, sizeof(Port)); read_backend_variables(argv[2], &port); + /* Close the postmaster's sockets (as soon as we know them) */ + ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0); + /* * Set reference point for stack-depth checking */ @@ -4639,15 +4646,21 @@ SubPostmasterMain(int argc, char *argv[]) errmsg("out of memory"))); #endif - /* Check we got appropriate args */ - if (argc < 3) - elog(FATAL, "invalid subpostmaster invocation"); - /* * If appropriate, physically re-attach to shared memory segment. We want * to do this before going any further to ensure that we can attach at the * same address the postmaster used. On the other hand, if we choose not * to re-attach, we may have other cleanup to do. + * + * If testing EXEC_BACKEND on Linux, you should run this as root before + * starting the postmaster: + * + * echo 0 >/proc/sys/kernel/randomize_va_space + * + * This prevents using randomized stack and code addresses that cause the + * child process's memory map to be different from the parent's, making it + * sometimes impossible to attach to shared memory at the desired address. + * Return the setting to its old value (usually '1' or '2') when finished. */ if (strcmp(argv[1], "--forkbackend") == 0 || strcmp(argv[1], "--forkavlauncher") == 0 || @@ -4693,9 +4706,6 @@ SubPostmasterMain(int argc, char *argv[]) { Assert(argc == 3); /* shouldn't be any more args */ - /* Close the postmaster's sockets */ - ClosePostmasterPorts(false); - /* * Need to reinitialize the SSL library in the backend, since the * context structures contain function pointers and cannot be passed @@ -4726,17 +4736,7 @@ SubPostmasterMain(int argc, char *argv[]) /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ InitProcess(); - /* - * Attach process to shared data structures. If testing EXEC_BACKEND - * on Linux, you must run this as root before starting the postmaster: - * - * echo 0 >/proc/sys/kernel/randomize_va_space - * - * This prevents a randomized stack base address that causes child - * shared memory to be at a different address than the parent, making - * it impossible to attached to shared memory. Return the value to - * '1' when finished. - */ + /* Attach process to shared data structures */ CreateSharedMemoryAndSemaphores(false, 0); /* And run the backend */ @@ -4744,9 +4744,6 @@ SubPostmasterMain(int argc, char *argv[]) } if (strcmp(argv[1], "--forkboot") == 0) { - /* Close the postmaster's sockets */ - ClosePostmasterPorts(false); - /* Restore basic shared memory pointers */ InitShmemAccess(UsedShmemSegAddr); @@ -4760,9 +4757,6 @@ SubPostmasterMain(int argc, char *argv[]) } if (strcmp(argv[1], "--forkavlauncher") == 0) { - /* Close the postmaster's sockets */ - ClosePostmasterPorts(false); - /* Restore basic shared memory pointers */ InitShmemAccess(UsedShmemSegAddr); @@ -4776,9 +4770,6 @@ SubPostmasterMain(int argc, char *argv[]) } if (strcmp(argv[1], "--forkavworker") == 0) { - /* Close the postmaster's sockets */ - ClosePostmasterPorts(false); - /* Restore basic shared memory pointers */ InitShmemAccess(UsedShmemSegAddr); @@ -4797,9 +4788,6 @@ SubPostmasterMain(int argc, char *argv[]) /* do this as early as possible; in particular, before InitProcess() */ IsBackgroundWorker = true; - /* Close the postmaster's sockets */ - ClosePostmasterPorts(false); - /* Restore basic shared memory pointers */ InitShmemAccess(UsedShmemSegAddr); @@ -4817,27 +4805,18 @@ SubPostmasterMain(int argc, char *argv[]) } if (strcmp(argv[1], "--forkarch") == 0) { - /* Close the postmaster's sockets */ - ClosePostmasterPorts(false); - /* Do not want to attach to shared memory */ PgArchiverMain(argc, argv); /* does not return */ } if (strcmp(argv[1], "--forkcol") == 0) { - /* Close the postmaster's sockets */ - ClosePostmasterPorts(false); - /* Do not want to attach to shared memory */ PgstatCollectorMain(argc, argv); /* does not return */ } if (strcmp(argv[1], "--forklog") == 0) { - /* Close the postmaster's sockets */ - ClosePostmasterPorts(true); - /* Do not want to attach to shared memory */ SysLoggerMain(argc, argv); /* does not return */ From 56bfc94f8ecb3a0d4ac9bff9eb575e978bd5516b Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 3 Oct 2016 12:45:19 +0300 Subject: [PATCH 162/184] remove dsm_array subsystem --- Makefile | 6 +- src/dsm_array.c | 321 -------------------------------------------- src/dsm_array.h | 47 ------- src/hooks.c | 1 - src/init.c | 3 +- src/relation_info.h | 2 - 6 files changed, 4 insertions(+), 376 deletions(-) delete mode 100644 src/dsm_array.c delete mode 100644 src/dsm_array.h diff --git a/Makefile b/Makefile index 7248a145d8..4eb347ea0f 100644 --- a/Makefile +++ b/Makefile @@ -2,9 +2,9 @@ MODULE_big = pg_pathman OBJS = src/init.o src/relation_info.o src/utils.o src/partition_filter.o src/runtimeappend.o \ - src/runtime_merge_append.o src/pg_pathman.o src/dsm_array.o src/rangeset.o src/pl_funcs.o \ - src/pathman_workers.o src/hooks.o src/nodes_common.o src/xact_handling.o src/copy_stmt_hooking.o \ - src/pg_compat.o $(WIN32RES) + src/runtime_merge_append.o src/pg_pathman.o src/rangeset.o src/pl_funcs.o \ + src/pathman_workers.o src/hooks.o src/nodes_common.o src/xact_handling.o \ + src/copy_stmt_hooking.o src/pg_compat.o $(WIN32RES) EXTENSION = pg_pathman EXTVERSION = 1.0 diff --git a/src/dsm_array.c b/src/dsm_array.c deleted file mode 100644 index 62039fb895..0000000000 --- a/src/dsm_array.c +++ /dev/null @@ -1,321 +0,0 @@ -/* ------------------------------------------------------------------------ - * - * dsm_array.c - * Allocate data in shared memory - * - * Copyright (c) 2015-2016, Postgres Professional - * - * ------------------------------------------------------------------------ - */ - -#include "pathman.h" -#include "dsm_array.h" - -#include "storage/shmem.h" -#include "storage/dsm.h" - - -static dsm_segment *segment = NULL; - -typedef struct DsmConfig -{ - dsm_handle segment_handle; - size_t block_size; - size_t blocks_count; - size_t first_free; -} DsmConfig; - -static DsmConfig *dsm_cfg = NULL; - - -/* - * Block header - * - * Its size must be equal to 4 bytes for 32bit and 8 bytes for 64bit. - * Otherwise it could screw up an alignment (for example on Sparc9) - */ -typedef uintptr_t BlockHeader; -typedef BlockHeader* BlockHeaderPtr; - -#define FREE_BIT 0x80000000 -#define is_free(header) \ - ((*header) & FREE_BIT) -#define set_free(header) \ - ((*header) | FREE_BIT) -#define set_used(header) \ - ((*header) & ~FREE_BIT) -#define get_length(header) \ - ((*header) & ~FREE_BIT) -#define set_length(header, length) \ - ((length) | ((*header) & FREE_BIT)) - -/* - * Amount of memory that need to be requested - * for shared memory to store dsm config - */ -Size -estimate_dsm_config_size() -{ - return (Size) MAXALIGN(sizeof(DsmConfig)); -} - -/* - * Initialize dsm config for arrays - */ -void -init_dsm_config() -{ - bool found; - dsm_cfg = ShmemInitStruct("pathman dsm_array config", sizeof(DsmConfig), &found); - if (!found) - { - dsm_cfg->segment_handle = 0; - dsm_cfg->block_size = 0; - dsm_cfg->blocks_count = INITIAL_BLOCKS_COUNT; - dsm_cfg->first_free = 0; - } -} - -/* - * Attach process to dsm_array segment. This function is used for - * background workers only. Use init_dsm_segment() in backend processes. - */ -void -attach_dsm_array_segment() -{ - segment = dsm_attach(dsm_cfg->segment_handle); -} - -/* - * Initialize dsm segment. Returns true if new segment was created and - * false if attached to existing segment - */ -bool -init_dsm_segment(size_t blocks_count, size_t block_size) -{ - bool ret; - - /* if there is already an existing segment then attach to it */ - if (dsm_cfg->segment_handle != 0) - { - ret = false; - segment = dsm_attach(dsm_cfg->segment_handle); - } - - /* - * If segment hasn't been created yet or has already been destroyed - * (it happens when last session detaches segment) then create new one - */ - if (dsm_cfg->segment_handle == 0 || segment == NULL) - { - /* create segment */ - segment = dsm_create(block_size * blocks_count, 0); - dsm_cfg->segment_handle = dsm_segment_handle(segment); - dsm_cfg->first_free = 0; - dsm_cfg->block_size = block_size; - dsm_cfg->blocks_count = blocks_count; - init_dsm_table(block_size, 0, dsm_cfg->blocks_count); - ret = true; - } - - /* - * Keep mapping till the end of the session. Otherwise it would be - * destroyed by the end of transaction - */ - dsm_pin_mapping(segment); - - return ret; -} - -/* - * Initialize allocated segment with block structure - */ -void -init_dsm_table(size_t block_size, size_t start, size_t end) -{ - size_t i; - BlockHeaderPtr header; - char *ptr = dsm_segment_address(segment); - - /* create blocks */ - for (i = start; i < end; i++) - { - header = (BlockHeaderPtr) &ptr[i * block_size]; - *header = set_free(header); - *header = set_length(header, 1); - } - - return; -} - -/* - * Allocate array inside dsm_segment - */ -void -alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count) -{ - size_t i = 0; - size_t size_requested = entry_size * elem_count; - size_t min_pos = 0; - size_t max_pos = 0; - bool found = false; - bool collecting_blocks = false; - size_t offset = -1; - size_t total_length = 0; - BlockHeaderPtr header; - char *ptr = dsm_segment_address(segment); - - arr->entry_size = entry_size; - - for (i = dsm_cfg->first_free; i < dsm_cfg->blocks_count; ) - { - header = (BlockHeaderPtr) &ptr[i * dsm_cfg->block_size]; - if (is_free(header)) - { - if (!collecting_blocks) - { - offset = i * dsm_cfg->block_size; - total_length = dsm_cfg->block_size - sizeof(BlockHeader); - min_pos = i; - collecting_blocks = true; - } - else - { - total_length += dsm_cfg->block_size; - } - i++; - } - else - { - collecting_blocks = false; - offset = 0; - total_length = 0; - i += get_length(header); - } - - if (total_length >= size_requested) - { - max_pos = i-1; - found = true; - break; - } - } - - /* - * If dsm segment size is not enough then resize it (or allocate bigger - * for segment SysV and Windows, not implemented yet) - */ - if (!found) - { - size_t new_blocks_count = dsm_cfg->blocks_count * 2; - - dsm_resize(segment, new_blocks_count * dsm_cfg->block_size); - init_dsm_table(dsm_cfg->block_size, dsm_cfg->blocks_count, new_blocks_count); - dsm_cfg->blocks_count = new_blocks_count; - - /* try again */ - return alloc_dsm_array(arr, entry_size, elem_count); - } - - /* look up for first free block */ - if (dsm_cfg->first_free == min_pos) - { - for (; iblocks_count; ) - { - header = (BlockHeaderPtr) &ptr[i * dsm_cfg->block_size]; - if (is_free(header)) - { - dsm_cfg->first_free = i; - break; - } - else - { - i += get_length(header); - } - } - } - - /* if we found enough of space */ - if (total_length >= size_requested) - { - header = (BlockHeaderPtr) &ptr[min_pos * dsm_cfg->block_size]; - *header = set_used(header); - *header = set_length(header, max_pos - min_pos + 1); - - arr->offset = offset; - arr->elem_count = elem_count; - } -} - -void -free_dsm_array(DsmArray *arr) -{ - size_t i = 0, - start = arr->offset / dsm_cfg->block_size; - char *ptr = dsm_segment_address(segment); - BlockHeaderPtr header = (BlockHeaderPtr) &ptr[start * dsm_cfg->block_size]; - size_t blocks_count = get_length(header); - - /* set blocks free */ - for(; i < blocks_count; i++) - { - header = (BlockHeaderPtr) &ptr[(start + i) * dsm_cfg->block_size]; - *header = set_free(header); - *header = set_length(header, 1); - } - - if (start < dsm_cfg->first_free) - dsm_cfg->first_free = start; - - arr->offset = 0; - arr->elem_count = 0; -} - -void -resize_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count) -{ - void *array_data; - size_t array_data_size; - void *buffer; - - /* Copy data from array to temporary buffer */ - array_data = dsm_array_get_pointer(arr, false); - array_data_size = arr->elem_count * entry_size; - buffer = palloc(array_data_size); - memcpy(buffer, array_data, array_data_size); - - /* Free array */ - free_dsm_array(arr); - - /* Allocate new array */ - alloc_dsm_array(arr, entry_size, elem_count); - - /* Copy data to new array */ - array_data = dsm_array_get_pointer(arr, false); - memcpy(array_data, buffer, array_data_size); - - pfree(buffer); -} - -void * -dsm_array_get_pointer(const DsmArray *arr, bool copy) -{ - uint8 *segment_address, - *dsm_array, - *result; - size_t size; - - segment_address = (uint8 *) dsm_segment_address(segment); - dsm_array = segment_address + arr->offset + sizeof(BlockHeader); - - if (copy) - { - size = arr->elem_count * arr->entry_size; - result = palloc(size); - memcpy((void *) result, (void *) dsm_array, size); - } - else - result = dsm_array; - - return result; -} diff --git a/src/dsm_array.h b/src/dsm_array.h deleted file mode 100644 index 2b7184d829..0000000000 --- a/src/dsm_array.h +++ /dev/null @@ -1,47 +0,0 @@ -/* ------------------------------------------------------------------------ - * - * dsm_array.h - * Allocate data in shared memory - * - * Copyright (c) 2016, Postgres Professional - * - * ------------------------------------------------------------------------ - */ - -#ifndef DSM_ARRAY_H -#define DSM_ARRAY_H - -#include "postgres.h" -#include "storage/dsm.h" - - -/* - * Dynamic shared memory array - */ -typedef struct -{ - dsm_handle segment; - size_t offset; - size_t elem_count; - size_t entry_size; -} DsmArray; - - -#define InvalidDsmArray { 0, 0, 0, 0 } - -#define INITIAL_BLOCKS_COUNT 8192 - - -/* Dynamic shared memory functions */ -Size estimate_dsm_config_size(void); -void init_dsm_config(void); -bool init_dsm_segment(size_t blocks_count, size_t block_size); -void init_dsm_table(size_t block_size, size_t start, size_t end); -void alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count); -void free_dsm_array(DsmArray *arr); -void resize_dsm_array(DsmArray *arr, size_t entry_size, size_t elem_count); -void *dsm_array_get_pointer(const DsmArray *arr, bool copy); -dsm_handle get_dsm_array_segment(void); -void attach_dsm_array_segment(void); - -#endif diff --git a/src/hooks.c b/src/hooks.c index 78d1c83cbd..389e3e40ea 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -530,7 +530,6 @@ pathman_shmem_startup_hook(void) /* Allocate shared memory objects */ LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - init_dsm_config(); init_shmem_config(); LWLockRelease(AddinShmemInitLock); } diff --git a/src/init.c b/src/init.c index 2657ede25d..6169bb936f 100644 --- a/src/init.c +++ b/src/init.c @@ -210,8 +210,7 @@ unload_config(void) Size estimate_pathman_shmem_size(void) { - return estimate_dsm_config_size() + - estimate_concurrent_part_task_slots_size() + + return estimate_concurrent_part_task_slots_size() + MAXALIGN(sizeof(PathmanState)); } diff --git a/src/relation_info.h b/src/relation_info.h index 6eba1900a7..fc405f9340 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -11,8 +11,6 @@ #ifndef RELATION_INFO_H #define RELATION_INFO_H -#include "dsm_array.h" - #include "postgres.h" #include "access/attnum.h" #include "port/atomics.h" From da8aba63fa219ded1b161481a669e10560f88ca1 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 3 Oct 2016 19:43:49 +0300 Subject: [PATCH 163/184] introduce view 'pathman_partition_list', pl/pgSQL refactoring, extract pl_range_funcs.c & pl_hash_funcs.c from pl_funcs.c --- Makefile | 9 +- init.sql | 110 ++++---- src/pathman.h | 15 +- src/pathman_workers.h | 2 +- src/pl_funcs.c | 565 +++++++++++++++++++----------------------- src/pl_hash_funcs.c | 46 ++++ src/pl_range_funcs.c | 288 +++++++++++++++++++++ src/relation_info.c | 16 ++ src/relation_info.h | 1 + 9 files changed, 691 insertions(+), 361 deletions(-) create mode 100644 src/pl_hash_funcs.c create mode 100644 src/pl_range_funcs.c diff --git a/Makefile b/Makefile index 4eb347ea0f..31abb4e7d8 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,11 @@ # contrib/pg_pathman/Makefile MODULE_big = pg_pathman -OBJS = src/init.o src/relation_info.o src/utils.o src/partition_filter.o src/runtimeappend.o \ - src/runtime_merge_append.o src/pg_pathman.o src/rangeset.o src/pl_funcs.o \ - src/pathman_workers.o src/hooks.o src/nodes_common.o src/xact_handling.o \ - src/copy_stmt_hooking.o src/pg_compat.o $(WIN32RES) +OBJS = src/init.o src/relation_info.o src/utils.o src/partition_filter.o \ + src/runtimeappend.o src/runtime_merge_append.o src/pg_pathman.o src/rangeset.o \ + src/pl_funcs.o src/pl_range_funcs.o src/pl_hash_funcs.o src/pathman_workers.o \ + src/hooks.o src/nodes_common.o src/xact_handling.o src/copy_stmt_hooking.o \ + src/pg_compat.o $(WIN32RES) EXTENSION = pg_pathman EXTVERSION = 1.0 diff --git a/init.sql b/init.sql index 42ff2ea2b8..1e0dce73f7 100644 --- a/init.sql +++ b/init.sql @@ -77,9 +77,6 @@ SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config', ''); SELECT pg_catalog.pg_extension_config_dump('@extschema@.pathman_config_params', ''); -CREATE OR REPLACE FUNCTION @extschema@.invalidate_relcache(relid OID) -RETURNS VOID AS 'pg_pathman' LANGUAGE C STRICT; - CREATE OR REPLACE FUNCTION @extschema@.partitions_count(relation REGCLASS) RETURNS INT AS $$ @@ -150,6 +147,25 @@ END $$ LANGUAGE plpgsql; +/* + * Show all existing parents and partitions. + */ +CREATE OR REPLACE FUNCTION @extschema@.show_partition_list() +RETURNS TABLE ( + parent REGCLASS, + partition REGCLASS, + parttype INT4, + partattr TEXT, + range_min TEXT, + range_max TEXT) +AS 'pg_pathman', 'show_partition_list_internal' LANGUAGE C STRICT; + +/* + * View for show_partition_list(). + */ +CREATE OR REPLACE VIEW @extschema@.pathman_partition_list +AS SELECT * FROM @extschema@.show_partition_list(); + /* * Show all existing concurrent partitioning tasks. */ @@ -160,8 +176,8 @@ RETURNS TABLE ( dbid OID, relid REGCLASS, processed INT, - status TEXT -) AS 'pg_pathman', 'show_concurrent_part_tasks_internal' LANGUAGE C STRICT; + status TEXT) +AS 'pg_pathman', 'show_concurrent_part_tasks_internal' LANGUAGE C STRICT; /* * View for show_concurrent_part_tasks(). @@ -348,7 +364,7 @@ $$ LANGUAGE plpgsql; /* - * Returns relname without quotes or something + * Returns relname without quotes or something. */ CREATE OR REPLACE FUNCTION @extschema@.get_plain_schema_and_relname( cls REGCLASS, @@ -366,7 +382,7 @@ $$ LANGUAGE plpgsql STRICT; /* - * Returns schema-qualified name for table + * Returns the schema-qualified name of table. */ CREATE OR REPLACE FUNCTION @extschema@.get_schema_qualified_name( cls REGCLASS, @@ -385,7 +401,7 @@ $$ LANGUAGE plpgsql STRICT; /* - * Validates relation name. It must be schema qualified + * Validates relation name. It must be schema qualified. */ CREATE OR REPLACE FUNCTION @extschema@.validate_relname( cls REGCLASS) @@ -407,7 +423,7 @@ $$ LANGUAGE plpgsql; /* - * Check if two relations have equal structures + * Check if two relations have equal structures. */ CREATE OR REPLACE FUNCTION @extschema@.validate_relations_equality( relation1 OID, relation2 OID) @@ -439,7 +455,7 @@ $$ LANGUAGE plpgsql; /* - * DDL trigger that deletes entry from pathman_config table + * DDL trigger that deletes entry from pathman_config table. */ CREATE OR REPLACE FUNCTION @extschema@.pathman_ddl_trigger_func() RETURNS event_trigger AS @@ -472,7 +488,7 @@ $$ LANGUAGE plpgsql; /* - * Drop trigger + * Drop triggers. */ CREATE OR REPLACE FUNCTION @extschema@.drop_triggers( parent_relid REGCLASS) @@ -485,8 +501,8 @@ END $$ LANGUAGE plpgsql STRICT; /* - * Drop partitions - * If delete_data set to TRUE then partitions will be dropped with all the data + * Drop partitions. If delete_data set to TRUE, partitions + * will be dropped with all the data. */ CREATE OR REPLACE FUNCTION @extschema@.drop_partitions( parent_relid REGCLASS, @@ -578,16 +594,6 @@ ON sql_drop EXECUTE PROCEDURE @extschema@.pathman_ddl_trigger_func(); -/* - * Attach a previously partitioned table - */ -CREATE OR REPLACE FUNCTION @extschema@.add_to_pathman_config( - parent_relid REGCLASS, - attname TEXT, - range_interval TEXT DEFAULT NULL) -RETURNS BOOLEAN AS 'pg_pathman', 'add_to_pathman_config' -LANGUAGE C; - CREATE OR REPLACE FUNCTION @extschema@.on_create_partitions( relid REGCLASS) @@ -619,40 +625,41 @@ CREATE OR REPLACE FUNCTION @extschema@.get_base_type(REGTYPE) RETURNS REGTYPE AS 'pg_pathman', 'get_base_type_pl' LANGUAGE C STRICT; - /* - * Checks if attribute is nullable + * Returns attribute type name for relation. */ -CREATE OR REPLACE FUNCTION @extschema@.is_attribute_nullable( +CREATE OR REPLACE FUNCTION @extschema@.get_attribute_type( REGCLASS, TEXT) -RETURNS BOOLEAN AS 'pg_pathman', 'is_attribute_nullable' +RETURNS REGTYPE AS 'pg_pathman', 'get_attribute_type_pl' LANGUAGE C STRICT; /* - * Check if regclass is date or timestamp + * Return tablespace name for specified relation. */ -CREATE OR REPLACE FUNCTION @extschema@.is_date_type( - typid REGTYPE) -RETURNS BOOLEAN AS 'pg_pathman', 'is_date_type' +CREATE OR REPLACE FUNCTION @extschema@.get_rel_tablespace_name(relation REGCLASS) +RETURNS TEXT AS 'pg_pathman', 'get_rel_tablespace_name' LANGUAGE C STRICT; + /* - * Returns attribute type name for relation + * Checks if attribute is nullable */ -CREATE OR REPLACE FUNCTION @extschema@.get_attribute_type( +CREATE OR REPLACE FUNCTION @extschema@.is_attribute_nullable( REGCLASS, TEXT) -RETURNS REGTYPE AS 'pg_pathman', 'get_attribute_type_pl' +RETURNS BOOLEAN AS 'pg_pathman', 'is_attribute_nullable' LANGUAGE C STRICT; /* - * Get parent of pg_pathman's partition. + * Check if regclass is date or timestamp. */ -CREATE OR REPLACE FUNCTION @extschema@.get_parent_of_partition(REGCLASS) -RETURNS REGCLASS AS 'pg_pathman', 'get_parent_of_partition_pl' +CREATE OR REPLACE FUNCTION @extschema@.is_date_type( + typid REGTYPE) +RETURNS BOOLEAN AS 'pg_pathman', 'is_date_type' LANGUAGE C STRICT; + /* - * Build check constraint name for a specified relation's column + * Build check constraint name for a specified relation's column. */ CREATE OR REPLACE FUNCTION @extschema@.build_check_constraint_name( REGCLASS, INT2) @@ -679,7 +686,22 @@ LANGUAGE C STRICT; /* - * Lock partitioned relation to restrict concurrent modification of partitioning scheme. + * Attach a previously partitioned table. + */ +CREATE OR REPLACE FUNCTION @extschema@.add_to_pathman_config( + parent_relid REGCLASS, + attname TEXT, + range_interval TEXT DEFAULT NULL) +RETURNS BOOLEAN AS 'pg_pathman', 'add_to_pathman_config' +LANGUAGE C; + +CREATE OR REPLACE FUNCTION @extschema@.invalidate_relcache(relid OID) +RETURNS VOID AS 'pg_pathman' LANGUAGE C STRICT; + + +/* + * Lock partitioned relation to restrict concurrent + * modification of partitioning scheme. */ CREATE OR REPLACE FUNCTION @extschema@.lock_partitioned_relation( REGCLASS) @@ -702,18 +724,12 @@ CREATE OR REPLACE FUNCTION @extschema@.debug_capture() RETURNS VOID AS 'pg_pathman', 'debug_capture' LANGUAGE C STRICT; -/* - * Return tablespace name for specified relation. - */ -CREATE OR REPLACE FUNCTION @extschema@.get_rel_tablespace_name(relation REGCLASS) -RETURNS TEXT AS 'pg_pathman', 'get_rel_tablespace_name' -LANGUAGE C STRICT; - /* * Checks that callback function meets specific requirements. Particularly it * must have the only JSONB argument and VOID return type. */ -CREATE OR REPLACE FUNCTION @extschema@.validate_on_partition_created_callback(callback REGPROC) +CREATE OR REPLACE FUNCTION @extschema@.validate_on_partition_created_callback( + callback REGPROC) RETURNS VOID AS 'pg_pathman', 'validate_on_part_init_callback_pl' LANGUAGE C STRICT; diff --git a/src/pathman.h b/src/pathman.h index 5c3db46232..c078ed1045 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -52,7 +52,7 @@ #define PATHMAN_CONFIG_interval_typmod -1 /* - * Definitions for the "pathman_config_params" table + * Definitions for the "pathman_config_params" table. */ #define PATHMAN_CONFIG_PARAMS "pathman_config_params" #define Natts_pathman_config_params 4 @@ -61,6 +61,19 @@ #define Anum_pathman_config_params_auto 3 /* auto partitions creation */ #define Anum_pathman_config_params_init_callback 4 /* partition action callback */ +/* + * Definitions for the "pathman_partition_list" view. + */ +#define PATHMAN_PARTITION_LIST "pathman_partition_list" +#define Natts_pathman_partition_list 6 +#define Anum_pathman_pl_parent 1 +#define Anum_pathman_pl_partition 2 +#define Anum_pathman_pl_parttype 3 +#define Anum_pathman_pl_partattr 4 +#define Anum_pathman_pl_range_min 5 +#define Anum_pathman_pl_range_max 6 + + /* * Cache current PATHMAN_CONFIG relid (set during load_config()). */ diff --git a/src/pathman_workers.h b/src/pathman_workers.h index 3ea664d57c..dfa14d53fb 100644 --- a/src/pathman_workers.h +++ b/src/pathman_workers.h @@ -111,7 +111,7 @@ cps_set_status(ConcurrentPartSlot *slot, ConcurrentPartSlotStatus status) /* - * Definitions for the "pathman_concurrent_part_tasks" view + * Definitions for the "pathman_concurrent_part_tasks" view. */ #define PATHMAN_CONCURRENT_PART_TASKS "pathman_concurrent_part_tasks" #define Natts_pathman_cp_tasks 6 diff --git a/src/pl_funcs.c b/src/pl_funcs.c index d58ae96dc3..ca02a3ebbd 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -11,60 +11,66 @@ #include "init.h" #include "pathman.h" #include "relation_info.h" -#include "utils.h" #include "xact_handling.h" -#include "fmgr.h" #include "access/htup_details.h" #include "access/nbtree.h" -#include "access/xact.h" #include "catalog/indexing.h" #include "catalog/pg_type.h" -#include "catalog/pg_proc.h" -#include "commands/sequence.h" #include "commands/tablespace.h" +#include "funcapi.h" #include "miscadmin.h" -#include "utils/array.h" #include "utils/builtins.h" -#include +#include "utils/inval.h" #include "utils/jsonb.h" -#include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/syscache.h" -#include "utils/typcache.h" -/* declarations */ +/* Function declarations */ + PG_FUNCTION_INFO_V1( on_partitions_created ); PG_FUNCTION_INFO_V1( on_partitions_updated ); PG_FUNCTION_INFO_V1( on_partitions_removed ); + PG_FUNCTION_INFO_V1( get_parent_of_partition_pl ); PG_FUNCTION_INFO_V1( get_base_type_pl ); PG_FUNCTION_INFO_V1( get_attribute_type_pl ); -PG_FUNCTION_INFO_V1( find_or_create_range_partition); -PG_FUNCTION_INFO_V1( get_range_by_idx ); -PG_FUNCTION_INFO_V1( get_range_by_part_oid ); -PG_FUNCTION_INFO_V1( get_min_range_value ); -PG_FUNCTION_INFO_V1( get_max_range_value ); -PG_FUNCTION_INFO_V1( get_type_hash_func ); -PG_FUNCTION_INFO_V1( get_hash_part_idx ); -PG_FUNCTION_INFO_V1( check_overlap ); -PG_FUNCTION_INFO_V1( build_range_condition ); -PG_FUNCTION_INFO_V1( build_check_constraint_name_attnum ); -PG_FUNCTION_INFO_V1( build_check_constraint_name_attname ); +PG_FUNCTION_INFO_V1( get_rel_tablespace_name ); + +PG_FUNCTION_INFO_V1( show_partition_list_internal ); + PG_FUNCTION_INFO_V1( build_update_trigger_func_name ); PG_FUNCTION_INFO_V1( build_update_trigger_name ); +PG_FUNCTION_INFO_V1( build_check_constraint_name_attnum ); +PG_FUNCTION_INFO_V1( build_check_constraint_name_attname ); + PG_FUNCTION_INFO_V1( is_date_type ); PG_FUNCTION_INFO_V1( is_attribute_nullable ); + PG_FUNCTION_INFO_V1( add_to_pathman_config ); PG_FUNCTION_INFO_V1( invalidate_relcache ); + PG_FUNCTION_INFO_V1( lock_partitioned_relation ); PG_FUNCTION_INFO_V1( prevent_relation_modification ); -PG_FUNCTION_INFO_V1( debug_capture ); -PG_FUNCTION_INFO_V1( get_rel_tablespace_name ); + PG_FUNCTION_INFO_V1( validate_on_part_init_callback_pl ); PG_FUNCTION_INFO_V1( invoke_on_partition_created_callback ); +PG_FUNCTION_INFO_V1( debug_capture ); + + +typedef struct +{ + Relation pathman_config; + HeapScanDesc pathman_config_scan; + Snapshot snapshot; + + const PartRelationInfo *current_prel; + + uint32 child_number; +} show_partition_list_cxt; + static void on_partitions_created_internal(Oid partitioned_table, bool add_callbacks); static void on_partitions_updated_internal(Oid partitioned_table, bool add_callbacks); @@ -82,7 +88,9 @@ check_relation_exists(Oid relid) /* - * Callbacks. + * ---------------------------- + * Partition events callbacks + * ---------------------------- */ static void @@ -113,9 +121,6 @@ on_partitions_removed_internal(Oid partitioned_table, bool add_callbacks) (add_callbacks ? "true" : "false"), partitioned_table); } -/* - * Thin layer between pure C and pl/PgSQL. - */ Datum on_partitions_created(PG_FUNCTION_ARGS) @@ -139,6 +144,12 @@ on_partitions_removed(PG_FUNCTION_ARGS) } +/* + * ------------------------ + * Various useful getters + * ------------------------ + */ + /* * Get parent of a specified partition. */ @@ -176,7 +187,6 @@ get_base_type_pl(PG_FUNCTION_ARGS) PG_RETURN_OID(getBaseType(PG_GETARG_OID(0))); } - /* * Get type (as REGTYPE) of a given attribute. */ @@ -207,246 +217,212 @@ get_attribute_type_pl(PG_FUNCTION_ARGS) } /* - * Returns partition oid for specified parent relid and value. - * In case when partition doesn't exist try to create one. + * Return tablespace name for specified relation */ Datum -find_or_create_range_partition(PG_FUNCTION_ARGS) +get_rel_tablespace_name(PG_FUNCTION_ARGS) { - Oid parent_oid = PG_GETARG_OID(0); - Datum value = PG_GETARG_DATUM(1); - Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); - const PartRelationInfo *prel; - FmgrInfo cmp_func; - RangeEntry found_rentry; - search_rangerel_result search_state; - - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); - - fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); + Oid relid = PG_GETARG_OID(0); + Oid tablespace_id; + char *result; - /* Use available PartRelationInfo to find partition */ - search_state = search_range_partition_eq(value, &cmp_func, prel, - &found_rentry); + tablespace_id = get_rel_tablespace(relid); - /* - * If found then just return oid, else create new partitions - */ - if (search_state == SEARCH_RANGEREL_FOUND) - PG_RETURN_OID(found_rentry.child_oid); - /* - * If not found and value is between first and last partitions - */ - else if (search_state == SEARCH_RANGEREL_GAP) - PG_RETURN_NULL(); - else + /* If tablespace id is InvalidOid then use the default tablespace */ + if (!OidIsValid(tablespace_id)) { - Oid child_oid = create_partitions(parent_oid, value, value_type); - - /* get_pathman_relation_info() will refresh this entry */ - invalidate_pathman_relation_info(parent_oid, NULL); + tablespace_id = GetDefaultTablespace(get_rel_persistence(relid)); - PG_RETURN_OID(child_oid); + /* If tablespace is still invalid then use database's default */ + if (!OidIsValid(tablespace_id)) + tablespace_id = MyDatabaseTableSpace; } + + result = get_tablespace_name(tablespace_id); + PG_RETURN_TEXT_P(cstring_to_text(result)); } + /* - * Returns range entry (min, max) (in form of array). - * - * arg #1 is the parent's Oid. - * arg #2 is the partition's Oid. + * ---------------------- + * Common purpose VIEWs + * ---------------------- */ -Datum -get_range_by_part_oid(PG_FUNCTION_ARGS) -{ - Oid parent_oid = PG_GETARG_OID(0); - Oid child_oid = PG_GETARG_OID(1); - uint32 i; - RangeEntry *ranges; - const PartRelationInfo *prel; - - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); - - ranges = PrelGetRangesArray(prel); - - /* Look for the specified partition */ - for (i = 0; i < PrelChildrenCount(prel); i++) - if (ranges[i].child_oid == child_oid) - { - ArrayType *arr; - Datum elems[2] = { ranges[i].min, ranges[i].max }; - - arr = construct_array(elems, 2, prel->atttype, - prel->attlen, prel->attbyval, - prel->attalign); - - PG_RETURN_ARRAYTYPE_P(arr); - } - - /* No partition found, report error */ - elog(ERROR, "Relation \"%s\" has no partition \"%s\"", - get_rel_name_or_relid(parent_oid), - get_rel_name_or_relid(child_oid)); - - PG_RETURN_NULL(); /* keep compiler happy */ -} /* - * Returns N-th range entry (min, max) (in form of array). - * - * arg #1 is the parent's Oid. - * arg #2 is the index of the range - * (if it is negative then the last range will be returned). + * List all existing partitions and their parents. */ Datum -get_range_by_idx(PG_FUNCTION_ARGS) +show_partition_list_internal(PG_FUNCTION_ARGS) { - Oid parent_oid = PG_GETARG_OID(0); - int idx = PG_GETARG_INT32(1); - Datum elems[2]; - RangeEntry *ranges; - const PartRelationInfo *prel; - - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + show_partition_list_cxt *usercxt; + FuncCallContext *funccxt; - /* Now we have to deal with 'idx' */ - if (idx < -1) - { - elog(ERROR, "Negative indices other than -1 (last partition) are not allowed"); - } - else if (idx == -1) - { - idx = PrelLastChild(prel); - } - else if (((uint32) abs(idx)) >= PrelChildrenCount(prel)) + /* + * Initialize tuple descriptor & function call context. + */ + if (SRF_IS_FIRSTCALL()) { - elog(ERROR, "Partition #%d does not exist (total amount is %u)", - idx, PrelChildrenCount(prel)); - } + TupleDesc tupdesc; + MemoryContext old_mcxt; - ranges = PrelGetRangesArray(prel); + funccxt = SRF_FIRSTCALL_INIT(); - elems[0] = ranges[idx].min; - elems[1] = ranges[idx].max; - - PG_RETURN_ARRAYTYPE_P(construct_array(elems, 2, - prel->atttype, - prel->attlen, - prel->attbyval, - prel->attalign)); -} + old_mcxt = MemoryContextSwitchTo(funccxt->multi_call_memory_ctx); -/* - * Returns min value of the first range for relation. - */ -Datum -get_min_range_value(PG_FUNCTION_ARGS) -{ - Oid parent_oid = PG_GETARG_OID(0); - RangeEntry *ranges; - const PartRelationInfo *prel; + usercxt = (show_partition_list_cxt *) palloc(sizeof(show_partition_list_cxt)); - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + /* Open PATHMAN_CONFIG with latest snapshot available */ + usercxt->pathman_config = heap_open(get_pathman_config_relid(), + AccessShareLock); + usercxt->snapshot = RegisterSnapshot(GetLatestSnapshot()); + usercxt->pathman_config_scan = heap_beginscan(usercxt->pathman_config, + usercxt->snapshot, 0, NULL); - ranges = PrelGetRangesArray(prel); + usercxt->current_prel = NULL; - PG_RETURN_DATUM(ranges[0].min); -} + /* Create tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(Natts_pathman_partition_list, false); -/* - * Returns max value of the last range for relation. - */ -Datum -get_max_range_value(PG_FUNCTION_ARGS) -{ - Oid parent_oid = PG_GETARG_OID(0); - RangeEntry *ranges; - const PartRelationInfo *prel; + TupleDescInitEntry(tupdesc, Anum_pathman_pl_parent, + "parent", REGCLASSOID, -1, 0); + TupleDescInitEntry(tupdesc, Anum_pathman_pl_partition, + "partition", REGCLASSOID, -1, 0); + TupleDescInitEntry(tupdesc, Anum_pathman_pl_parttype, + "parttype", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, Anum_pathman_pl_partattr, + "partattr", TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, Anum_pathman_pl_range_min, + "range_min", TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, Anum_pathman_pl_range_max, + "range_max", TEXTOID, -1, 0); - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + funccxt->tuple_desc = BlessTupleDesc(tupdesc); + funccxt->user_fctx = (void *) usercxt; - ranges = PrelGetRangesArray(prel); + MemoryContextSwitchTo(old_mcxt); + } - PG_RETURN_DATUM(ranges[PrelLastChild(prel)].max); -} + funccxt = SRF_PERCALL_SETUP(); + usercxt = (show_partition_list_cxt *) funccxt->user_fctx; -/* - * Checks if range overlaps with existing partitions. - * Returns TRUE if overlaps and FALSE otherwise. - */ -Datum -check_overlap(PG_FUNCTION_ARGS) -{ - Oid parent_oid = PG_GETARG_OID(0); + /* Iterate through pathman cache */ + for(;;) + { + const PartRelationInfo *prel; + HeapTuple htup; + Datum values[Natts_pathman_partition_list]; + bool isnull[Natts_pathman_partition_list] = { 0 }; + char *partattr_cstr; + + /* Fetch next PartRelationInfo if needed */ + if (usercxt->current_prel == NULL) + { + HeapTuple pathman_config_htup; + Datum parent_table; + bool parent_table_isnull; + Oid parent_table_oid; - Datum p1 = PG_GETARG_DATUM(1), - p2 = PG_GETARG_DATUM(2); + pathman_config_htup = heap_getnext(usercxt->pathman_config_scan, + ForwardScanDirection); + if (!HeapTupleIsValid(pathman_config_htup)) + break; - Oid p1_type = get_fn_expr_argtype(fcinfo->flinfo, 1), - p2_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + parent_table = heap_getattr(pathman_config_htup, + Anum_pathman_config_partrel, + RelationGetDescr(usercxt->pathman_config), + &parent_table_isnull); - FmgrInfo cmp_func_1, - cmp_func_2; + Assert(parent_table_isnull == false); + parent_table_oid = DatumGetObjectId(parent_table); - uint32 i; - RangeEntry *ranges; - const PartRelationInfo *prel; + usercxt->current_prel = get_pathman_relation_info(parent_table_oid); + if (usercxt->current_prel == NULL) + continue; - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + usercxt->child_number = 0; + } - /* comparison functions */ - fill_type_cmp_fmgr_info(&cmp_func_1, p1_type, prel->atttype); - fill_type_cmp_fmgr_info(&cmp_func_2, p2_type, prel->atttype); + /* Alias to 'usercxt->current_prel' */ + prel = usercxt->current_prel; - ranges = PrelGetRangesArray(prel); - for (i = 0; i < PrelChildrenCount(prel); i++) - { - int c1 = FunctionCall2(&cmp_func_1, p1, ranges[i].max); - int c2 = FunctionCall2(&cmp_func_2, p2, ranges[i].min); + if (usercxt->child_number >= PrelChildrenCount(prel)) + { + usercxt->current_prel = NULL; + usercxt->child_number = 0; - if (c1 < 0 && c2 > 0) - PG_RETURN_BOOL(true); - } + continue; + } - PG_RETURN_BOOL(false); -} + partattr_cstr = get_attname(PrelParentRelid(prel), prel->attnum); + if (!partattr_cstr) + { + usercxt->current_prel = NULL; + continue; + } + values[Anum_pathman_pl_parent - 1] = PrelParentRelid(prel); + values[Anum_pathman_pl_parttype - 1] = prel->parttype; + values[Anum_pathman_pl_partattr - 1] = CStringGetTextDatum(partattr_cstr); -/* - * HASH-related stuff. - */ + switch (prel->parttype) + { + case PT_HASH: + { + Oid *children = PrelGetChildrenArray(prel), + child_oid = children[usercxt->child_number]; + + values[Anum_pathman_pl_partition - 1] = child_oid; + isnull[Anum_pathman_pl_range_min - 1] = true; + isnull[Anum_pathman_pl_range_max - 1] = true; + } + break; + + case PT_RANGE: + { + RangeEntry *re; + Datum rmin, + rmax; + + re = &PrelGetRangesArray(prel)[usercxt->child_number]; + + rmin = CStringGetTextDatum(datum_to_cstring(re->min, + prel->atttype)); + rmax = CStringGetTextDatum(datum_to_cstring(re->max, + prel->atttype)); + + values[Anum_pathman_pl_partition - 1] = re->child_oid; + values[Anum_pathman_pl_range_min - 1] = rmin; + values[Anum_pathman_pl_range_max - 1] = rmax; + } + break; + + default: + elog(ERROR, "Unknown partitioning type %u", prel->parttype); + } -/* Returns hash function's OID for a specified type. */ -Datum -get_type_hash_func(PG_FUNCTION_ARGS) -{ - TypeCacheEntry *tce; - Oid type_oid = PG_GETARG_OID(0); + /* Switch to the next child */ + usercxt->child_number++; - tce = lookup_type_cache(type_oid, TYPECACHE_HASH_PROC); + /* Form output tuple */ + htup = heap_form_tuple(funccxt->tuple_desc, values, isnull); - PG_RETURN_OID(tce->hash_proc); -} + SRF_RETURN_NEXT(funccxt, HeapTupleGetDatum(htup)); + } -/* Wrapper for hash_to_part_index() */ -Datum -get_hash_part_idx(PG_FUNCTION_ARGS) -{ - uint32 value = PG_GETARG_UINT32(0), - part_count = PG_GETARG_UINT32(1); + /* Clean resources */ + heap_endscan(usercxt->pathman_config_scan); + UnregisterSnapshot(usercxt->snapshot); + heap_close(usercxt->pathman_config, AccessShareLock); - PG_RETURN_UINT32(hash_to_part_index(value, part_count)); + SRF_RETURN_DONE(funccxt); } /* - * Traits. + * -------- + * Traits + * -------- */ Datum @@ -480,33 +456,42 @@ is_attribute_nullable(PG_FUNCTION_ARGS) /* - * Useful string builders. + * ------------------------ + * Useful string builders + * ------------------------ */ -/* Build range condition for a CHECK CONSTRAINT. */ Datum -build_range_condition(PG_FUNCTION_ARGS) +build_update_trigger_func_name(PG_FUNCTION_ARGS) { - text *attname = PG_GETARG_TEXT_P(0); + Oid relid = PG_GETARG_OID(0), + nspid; + const char *result; - Datum min_bound = PG_GETARG_DATUM(1), - max_bound = PG_GETARG_DATUM(2); + /* Check that relation exists */ + if (!check_relation_exists(relid)) + elog(ERROR, "Invalid relation %u", relid); - Oid min_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 1), - max_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + nspid = get_rel_namespace(relid); + result = psprintf("%s.%s", + quote_identifier(get_namespace_name(nspid)), + quote_identifier(psprintf("%s_upd_trig_func", + get_rel_name(relid)))); + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} - char *result; +Datum +build_update_trigger_name(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + const char *result; /* trigger's name can't be qualified */ - /* This is not going to trigger (not now, at least), just for the safety */ - if (min_bound_type != max_bound_type) - elog(ERROR, "Cannot build range condition: " - "boundaries should be of the same type"); + /* Check that relation exists */ + if (!check_relation_exists(relid)) + elog(ERROR, "Invalid relation %u", relid); - /* Create range condition CSTRING */ - result = psprintf("%1$s >= '%2$s' AND %1$s < '%3$s'", - text_to_cstring(attname), - datum_to_cstring(min_bound, min_bound_type), - datum_to_cstring(max_bound, max_bound_type)); + result = quote_identifier(psprintf("%s_upd_trig", get_rel_name(relid))); PG_RETURN_TEXT_P(cstring_to_text(result)); } @@ -551,41 +536,12 @@ build_check_constraint_name_attname(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(cstring_to_text(quote_identifier(result))); } -Datum -build_update_trigger_func_name(PG_FUNCTION_ARGS) -{ - Oid relid = PG_GETARG_OID(0), - nspid; - const char *result; - - /* Check that relation exists */ - if (!check_relation_exists(relid)) - elog(ERROR, "Invalid relation %u", relid); - - nspid = get_rel_namespace(relid); - result = psprintf("%s.%s", - quote_identifier(get_namespace_name(nspid)), - quote_identifier(psprintf("%s_upd_trig_func", - get_rel_name(relid)))); - - PG_RETURN_TEXT_P(cstring_to_text(result)); -} - -Datum -build_update_trigger_name(PG_FUNCTION_ARGS) -{ - Oid relid = PG_GETARG_OID(0); - const char *result; /* trigger's name can't be qualified */ - - /* Check that relation exists */ - if (!check_relation_exists(relid)) - elog(ERROR, "Invalid relation %u", relid); - - result = quote_identifier(psprintf("%s_upd_trig", get_rel_name(relid))); - - PG_RETURN_TEXT_P(cstring_to_text(result)); -} +/* + * ------------------------ + * Cache & config updates + * ------------------------ + */ /* * Try to add previously partitioned table to PATHMAN_CONFIG. @@ -697,6 +653,12 @@ invalidate_relcache(PG_FUNCTION_ARGS) } +/* + * -------------------------- + * Special locking routines + * -------------------------- + */ + /* * Acquire appropriate lock on a partitioned relation. */ @@ -744,45 +706,10 @@ prevent_relation_modification(PG_FUNCTION_ARGS) /* - * NOTE: used for DEBUG, set breakpoint here. + * ------------------------------------------- + * User-defined partition creation callbacks + * ------------------------------------------- */ -Datum -debug_capture(PG_FUNCTION_ARGS) -{ - static float8 sleep_time = 0; - DirectFunctionCall1(pg_sleep, Float8GetDatum(sleep_time)); - - /* Write something (doesn't really matter) */ - elog(WARNING, "debug_capture [%u]", MyProcPid); - - PG_RETURN_VOID(); -} - -/* - * Return tablespace name for specified relation - */ -Datum -get_rel_tablespace_name(PG_FUNCTION_ARGS) -{ - Oid relid = PG_GETARG_OID(0); - Oid tablespace_id; - char *result; - - tablespace_id = get_rel_tablespace(relid); - - /* If tablespace id is InvalidOid then use the default tablespace */ - if (!OidIsValid(tablespace_id)) - { - tablespace_id = GetDefaultTablespace(get_rel_persistence(relid)); - - /* If tablespace is still invalid then use database's default */ - if (!OidIsValid(tablespace_id)) - tablespace_id = MyDatabaseTableSpace; - } - - result = get_tablespace_name(tablespace_id); - PG_RETURN_TEXT_P(cstring_to_text(result)); -} /* * Checks that callback function meets specific requirements. @@ -915,3 +842,25 @@ invoke_on_partition_created_callback(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + + +/* + * ------- + * DEBUG + * ------- + */ + +/* + * NOTE: used for DEBUG, set breakpoint here. + */ +Datum +debug_capture(PG_FUNCTION_ARGS) +{ + static float8 sleep_time = 0; + DirectFunctionCall1(pg_sleep, Float8GetDatum(sleep_time)); + + /* Write something (doesn't really matter) */ + elog(WARNING, "debug_capture [%u]", MyProcPid); + + PG_RETURN_VOID(); +} diff --git a/src/pl_hash_funcs.c b/src/pl_hash_funcs.c new file mode 100644 index 0000000000..864a4c1313 --- /dev/null +++ b/src/pl_hash_funcs.c @@ -0,0 +1,46 @@ +/* ------------------------------------------------------------------------ + * + * pl_hash_funcs.c + * Utility C functions for stored HASH procedures + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + +#include "pathman.h" + +#include "utils/typcache.h" + + +/* Function declarations */ + +PG_FUNCTION_INFO_V1( get_type_hash_func ); +PG_FUNCTION_INFO_V1( get_hash_part_idx ); + + +/* + * Returns hash function's OID for a specified type. + */ +Datum +get_type_hash_func(PG_FUNCTION_ARGS) +{ + TypeCacheEntry *tce; + Oid type_oid = PG_GETARG_OID(0); + + tce = lookup_type_cache(type_oid, TYPECACHE_HASH_PROC); + + PG_RETURN_OID(tce->hash_proc); +} + +/* + * Wrapper for hash_to_part_index() + */ +Datum +get_hash_part_idx(PG_FUNCTION_ARGS) +{ + uint32 value = PG_GETARG_UINT32(0), + part_count = PG_GETARG_UINT32(1); + + PG_RETURN_UINT32(hash_to_part_index(value, part_count)); +} diff --git a/src/pl_range_funcs.c b/src/pl_range_funcs.c new file mode 100644 index 0000000000..17a3869d74 --- /dev/null +++ b/src/pl_range_funcs.c @@ -0,0 +1,288 @@ +/* ------------------------------------------------------------------------ + * + * pl_range_funcs.c + * Utility C functions for stored RANGE procedures + * + * Copyright (c) 2016, Postgres Professional + * + * ------------------------------------------------------------------------ + */ + +#include "pathman.h" +#include "relation_info.h" + +#include "utils/array.h" +#include "utils/builtins.h" + + +/* Function declarations */ + +PG_FUNCTION_INFO_V1( find_or_create_range_partition); +PG_FUNCTION_INFO_V1( check_overlap ); + +PG_FUNCTION_INFO_V1( get_range_by_part_oid ); +PG_FUNCTION_INFO_V1( get_range_by_idx ); +PG_FUNCTION_INFO_V1( get_min_range_value ); +PG_FUNCTION_INFO_V1( get_max_range_value ); + +PG_FUNCTION_INFO_V1( build_range_condition ); + + +/* + * ----------------------------- + * Partition creation & checks + * ----------------------------- + */ + +/* + * Returns partition oid for specified parent relid and value. + * In case when partition doesn't exist try to create one. + */ +Datum +find_or_create_range_partition(PG_FUNCTION_ARGS) +{ + Oid parent_oid = PG_GETARG_OID(0); + Datum value = PG_GETARG_DATUM(1); + Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); + const PartRelationInfo *prel; + FmgrInfo cmp_func; + RangeEntry found_rentry; + search_rangerel_result search_state; + + prel = get_pathman_relation_info(parent_oid); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + + fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); + + /* Use available PartRelationInfo to find partition */ + search_state = search_range_partition_eq(value, &cmp_func, prel, + &found_rentry); + + /* + * If found then just return oid, else create new partitions + */ + if (search_state == SEARCH_RANGEREL_FOUND) + PG_RETURN_OID(found_rentry.child_oid); + /* + * If not found and value is between first and last partitions + */ + else if (search_state == SEARCH_RANGEREL_GAP) + PG_RETURN_NULL(); + else + { + Oid child_oid = create_partitions(parent_oid, value, value_type); + + /* get_pathman_relation_info() will refresh this entry */ + invalidate_pathman_relation_info(parent_oid, NULL); + + PG_RETURN_OID(child_oid); + } +} + +/* + * Checks if range overlaps with existing partitions. + * Returns TRUE if overlaps and FALSE otherwise. + */ +Datum +check_overlap(PG_FUNCTION_ARGS) +{ + Oid parent_oid = PG_GETARG_OID(0); + + Datum p1 = PG_GETARG_DATUM(1), + p2 = PG_GETARG_DATUM(2); + + Oid p1_type = get_fn_expr_argtype(fcinfo->flinfo, 1), + p2_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + + FmgrInfo cmp_func_1, + cmp_func_2; + + uint32 i; + RangeEntry *ranges; + const PartRelationInfo *prel; + + prel = get_pathman_relation_info(parent_oid); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + + /* comparison functions */ + fill_type_cmp_fmgr_info(&cmp_func_1, p1_type, prel->atttype); + fill_type_cmp_fmgr_info(&cmp_func_2, p2_type, prel->atttype); + + ranges = PrelGetRangesArray(prel); + for (i = 0; i < PrelChildrenCount(prel); i++) + { + int c1 = FunctionCall2(&cmp_func_1, p1, ranges[i].max); + int c2 = FunctionCall2(&cmp_func_2, p2, ranges[i].min); + + if (c1 < 0 && c2 > 0) + PG_RETURN_BOOL(true); + } + + PG_RETURN_BOOL(false); +} + + +/* + * ------------------------ + * Various useful getters + * ------------------------ + */ + +/* + * Returns range entry (min, max) (in form of array). + * + * arg #1 is the parent's Oid. + * arg #2 is the partition's Oid. + */ +Datum +get_range_by_part_oid(PG_FUNCTION_ARGS) +{ + Oid parent_oid = PG_GETARG_OID(0); + Oid child_oid = PG_GETARG_OID(1); + uint32 i; + RangeEntry *ranges; + const PartRelationInfo *prel; + + prel = get_pathman_relation_info(parent_oid); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + + ranges = PrelGetRangesArray(prel); + + /* Look for the specified partition */ + for (i = 0; i < PrelChildrenCount(prel); i++) + if (ranges[i].child_oid == child_oid) + { + ArrayType *arr; + Datum elems[2] = { ranges[i].min, ranges[i].max }; + + arr = construct_array(elems, 2, prel->atttype, + prel->attlen, prel->attbyval, + prel->attalign); + + PG_RETURN_ARRAYTYPE_P(arr); + } + + /* No partition found, report error */ + elog(ERROR, "Relation \"%s\" has no partition \"%s\"", + get_rel_name_or_relid(parent_oid), + get_rel_name_or_relid(child_oid)); + + PG_RETURN_NULL(); /* keep compiler happy */ +} + +/* + * Returns N-th range entry (min, max) (in form of array). + * + * arg #1 is the parent's Oid. + * arg #2 is the index of the range + * (if it is negative then the last range will be returned). + */ +Datum +get_range_by_idx(PG_FUNCTION_ARGS) +{ + Oid parent_oid = PG_GETARG_OID(0); + int idx = PG_GETARG_INT32(1); + Datum elems[2]; + RangeEntry *ranges; + const PartRelationInfo *prel; + + prel = get_pathman_relation_info(parent_oid); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + + /* Now we have to deal with 'idx' */ + if (idx < -1) + { + elog(ERROR, "Negative indices other than -1 (last partition) are not allowed"); + } + else if (idx == -1) + { + idx = PrelLastChild(prel); + } + else if (((uint32) abs(idx)) >= PrelChildrenCount(prel)) + { + elog(ERROR, "Partition #%d does not exist (total amount is %u)", + idx, PrelChildrenCount(prel)); + } + + ranges = PrelGetRangesArray(prel); + + elems[0] = ranges[idx].min; + elems[1] = ranges[idx].max; + + PG_RETURN_ARRAYTYPE_P(construct_array(elems, 2, + prel->atttype, + prel->attlen, + prel->attbyval, + prel->attalign)); +} + +/* + * Returns min value of the first range for relation. + */ +Datum +get_min_range_value(PG_FUNCTION_ARGS) +{ + Oid parent_oid = PG_GETARG_OID(0); + RangeEntry *ranges; + const PartRelationInfo *prel; + + prel = get_pathman_relation_info(parent_oid); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + + ranges = PrelGetRangesArray(prel); + + PG_RETURN_DATUM(ranges[0].min); +} + +/* + * Returns max value of the last range for relation. + */ +Datum +get_max_range_value(PG_FUNCTION_ARGS) +{ + Oid parent_oid = PG_GETARG_OID(0); + RangeEntry *ranges; + const PartRelationInfo *prel; + + prel = get_pathman_relation_info(parent_oid); + shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + + ranges = PrelGetRangesArray(prel); + + PG_RETURN_DATUM(ranges[PrelLastChild(prel)].max); +} + + +/* + * ------------------------ + * Useful string builders + * ------------------------ + */ + +/* Build range condition for a CHECK CONSTRAINT. */ +Datum +build_range_condition(PG_FUNCTION_ARGS) +{ + text *attname = PG_GETARG_TEXT_P(0); + + Datum min_bound = PG_GETARG_DATUM(1), + max_bound = PG_GETARG_DATUM(2); + + Oid min_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 1), + max_bound_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + + char *result; + + /* This is not going to trigger (not now, at least), just for the safety */ + if (min_bound_type != max_bound_type) + elog(ERROR, "Cannot build range condition: " + "boundaries should be of the same type"); + + /* Create range condition CSTRING */ + result = psprintf("%1$s >= '%2$s' AND %1$s < '%3$s'", + text_to_cstring(attname), + datum_to_cstring(min_bound, min_bound_type), + datum_to_cstring(max_bound, max_bound_type)); + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} diff --git a/src/relation_info.c b/src/relation_info.c index 8a298a94d6..af6dc02400 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -627,6 +627,22 @@ DatumGetPartType(Datum datum) return (PartType) val; } +Datum +PartTypeGetTextDatum(PartType parttype) +{ + switch(parttype) + { + case PT_HASH: + return CStringGetTextDatum("HASH"); + + case PT_RANGE: + return CStringGetTextDatum("RANGE"); + + default: + elog(ERROR, "Unknown partitioning type %u", parttype); + } +} + /* * Common PartRelationInfo checks. Emit ERROR if anything is wrong. */ diff --git a/src/relation_info.h b/src/relation_info.h index fc405f9340..215e1257fc 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -137,6 +137,7 @@ Oid forget_parent_of_partition(Oid partition, PartParentSearch *status); Oid get_parent_of_partition(Oid partition, PartParentSearch *status); PartType DatumGetPartType(Datum datum); +Datum PartTypeGetTextDatum(PartType parttype); void shout_if_prel_is_invalid(Oid parent_oid, const PartRelationInfo *prel, From d377d2b8eeea62585f6f13870503f7c3f95dab84 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 3 Oct 2016 20:03:12 +0300 Subject: [PATCH 164/184] add missing includes to pl_funcs.c --- src/pl_funcs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pl_funcs.c b/src/pl_funcs.c index ca02a3ebbd..cd3f63e965 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -9,6 +9,7 @@ */ #include "init.h" +#include "utils.h" #include "pathman.h" #include "relation_info.h" #include "xact_handling.h" @@ -23,6 +24,7 @@ #include "utils/builtins.h" #include "utils/inval.h" #include "utils/jsonb.h" +#include "utils/snapmgr.h" #include "utils/lsyscache.h" #include "utils/syscache.h" From ad7f1597930c6f675c950ffe7b842f2ba0b43a8a Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 3 Oct 2016 20:10:17 +0300 Subject: [PATCH 165/184] add missing includes to pl_range_funcs.c --- src/pl_range_funcs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pl_range_funcs.c b/src/pl_range_funcs.c index 17a3869d74..b62958cbaa 100644 --- a/src/pl_range_funcs.c +++ b/src/pl_range_funcs.c @@ -10,6 +10,7 @@ #include "pathman.h" #include "relation_info.h" +#include "utils.h" #include "utils/array.h" #include "utils/builtins.h" From 25bb11fe0dcdca6ad602262156aba9689d906351 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 4 Oct 2016 16:52:01 +0300 Subject: [PATCH 166/184] domain-related fixes (base type), remove functions get_min_range_value() & get_max_range_value(), overloaded function get_part_range() (written in C), fix calls of get_part_range() - pass dummy NULL cast to partitioned column's type --- expected/pg_pathman.out | 22 ++--- init.sql | 14 ++-- range.sql | 178 +++++++++++++++++++--------------------- src/pg_pathman.c | 4 +- src/pl_hash_funcs.c | 2 +- src/pl_range_funcs.c | 127 +++++++++++++--------------- src/relation_info.c | 4 +- src/utils.c | 8 +- 8 files changed, 173 insertions(+), 186 deletions(-) diff --git a/expected/pg_pathman.out b/expected/pg_pathman.out index 9c507d3c88..a06e4ab927 100644 --- a/expected/pg_pathman.out +++ b/expected/pg_pathman.out @@ -9,7 +9,7 @@ INSERT INTO test.hash_rel VALUES (1, 1); INSERT INTO test.hash_rel VALUES (2, 2); INSERT INTO test.hash_rel VALUES (3, 3); SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); -ERROR: Partitioning key 'value' must be NOT NULL +ERROR: partitioning key 'value' must be NOT NULL ALTER TABLE test.hash_rel ALTER COLUMN value SET NOT NULL; SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3, partition_data:=false); create_hash_partitions @@ -130,10 +130,10 @@ CREATE INDEX ON test.range_rel (dt); INSERT INTO test.range_rel (dt, txt) SELECT g, md5(g::TEXT) FROM generate_series('2015-01-01', '2015-04-30', '1 day'::interval) as g; SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DATE, '1 month'::INTERVAL, 2); -ERROR: Partitioning key 'dt' must be NOT NULL +ERROR: partitioning key 'dt' must be NOT NULL ALTER TABLE test.range_rel ALTER COLUMN dt SET NOT NULL; SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DATE, '1 month'::INTERVAL, 2); -ERROR: Not enough partitions to fit all values of 'dt' +ERROR: not enough partitions to fit all values of 'dt' SELECT pathman.create_range_partitions('test.range_rel', 'DT', '2015-01-01'::DATE, '1 month'::INTERVAL); NOTICE: sequence "range_rel_seq" does not exist, skipping create_range_partitions @@ -1045,7 +1045,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-12-15' A (3 rows) SELECT pathman.add_range_partition('test.range_rel', '2014-12-01'::DATE, '2015-01-02'::DATE); -ERROR: Specified range overlaps with existing partitions +ERROR: specified range overlaps with existing partitions SELECT pathman.add_range_partition('test.range_rel', '2014-12-01'::DATE, '2015-01-01'::DATE); add_range_partition --------------------- @@ -1064,7 +1064,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-12-15' A CREATE TABLE test.range_rel_archive (LIKE test.range_rel INCLUDING ALL); SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_archive', '2014-01-01'::DATE, '2015-01-01'::DATE); -ERROR: Specified range overlaps with existing partitions +ERROR: specified range overlaps with existing partitions SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_archive', '2014-01-01'::DATE, '2014-12-01'::DATE); attach_range_partition ------------------------ @@ -1103,12 +1103,12 @@ CREATE TABLE test.range_rel_test1 ( txt TEXT, abc INTEGER); SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test1', '2013-01-01'::DATE, '2014-01-01'::DATE); -ERROR: Partition must have the exact same structure as parent +ERROR: partition must have the exact same structure as parent CREATE TABLE test.range_rel_test2 ( id SERIAL PRIMARY KEY, dt TIMESTAMP); SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test2', '2013-01-01'::DATE, '2014-01-01'::DATE); -ERROR: Partition must have the exact same structure as parent +ERROR: partition must have the exact same structure as parent /* * Zero partitions count and adding partitions with specified name */ @@ -1124,9 +1124,9 @@ NOTICE: sequence "zero_seq" does not exist, skipping (1 row) SELECT pathman.append_range_partition('test.zero', 'test.zero_0'); -ERROR: Cannot append to empty partitions set +ERROR: cannot append to empty partitions set SELECT pathman.prepend_range_partition('test.zero', 'test.zero_1'); -ERROR: Cannot prepend to empty partitions set +ERROR: cannot prepend to empty partitions set SELECT pathman.add_range_partition('test.zero', 50, 70, 'test.zero_50'); add_range_partition --------------------- @@ -1689,8 +1689,8 @@ CREATE TABLE replies(id SERIAL PRIMARY KEY, message_id INTEGER REFERENCES messag INSERT INTO messages SELECT g, md5(g::text) FROM generate_series(1, 10) as g; INSERT INTO replies SELECT g, g, md5(g::text) FROM generate_series(1, 10) as g; SELECT create_range_partitions('messages', 'id', 1, 100, 2); -WARNING: Foreign key 'replies_message_id_fkey' references to the relation 'messages' -ERROR: Relation "messages" is referenced from other relations +WARNING: foreign key 'replies_message_id_fkey' references relation 'messages' +ERROR: relation "messages" is referenced from other relations ALTER TABLE replies DROP CONSTRAINT replies_message_id_fkey; SELECT create_range_partitions('messages', 'id', 1, 100, 2); NOTICE: sequence "messages_seq" does not exist, skipping diff --git a/init.sql b/init.sql index 1e0dce73f7..4f04699bb6 100644 --- a/init.sql +++ b/init.sql @@ -332,17 +332,17 @@ BEGIN WHERE oid = p_relation INTO rel_persistence; IF rel_persistence = 't'::CHAR THEN - RAISE EXCEPTION 'Temporary table "%" cannot be partitioned', + RAISE EXCEPTION 'temporary table "%" cannot be partitioned', p_relation::TEXT; END IF; IF EXISTS (SELECT * FROM @extschema@.pathman_config WHERE partrel = p_relation) THEN - RAISE EXCEPTION 'Relation "%" has already been partitioned', p_relation; + RAISE EXCEPTION 'relation "%" has already been partitioned', p_relation; END IF; IF @extschema@.is_attribute_nullable(p_relation, p_attribute) THEN - RAISE EXCEPTION 'Partitioning key ''%'' must be NOT NULL', p_attribute; + RAISE EXCEPTION 'partitioning key ''%'' must be NOT NULL', p_attribute; END IF; /* Check if there are foreign keys that reference the relation */ @@ -350,12 +350,12 @@ BEGIN FROM pg_constraint WHERE confrelid = p_relation::regclass::oid) LOOP is_referenced := TRUE; - RAISE WARNING 'Foreign key ''%'' references to the relation ''%''', + RAISE WARNING 'foreign key ''%'' references relation ''%''', v_rec.conname, p_relation; END LOOP; IF is_referenced THEN - RAISE EXCEPTION 'Relation "%" is referenced from other relations', p_relation; + RAISE EXCEPTION 'relation "%" is referenced from other relations', p_relation; END IF; RETURN TRUE; @@ -414,7 +414,7 @@ BEGIN relname = @extschema@.get_schema_qualified_name(cls); IF relname IS NULL THEN - RAISE EXCEPTION 'Relation %s does not exist', cls; + RAISE EXCEPTION 'relation %s does not exist', cls; END IF; RETURN relname; @@ -529,7 +529,7 @@ BEGIN DELETE FROM @extschema@.pathman_config_params WHERE partrel = parent_relid; IF conf_num_del = 0 THEN - RAISE EXCEPTION 'Relation "%" has no partitions', parent_relid::text; + RAISE EXCEPTION 'relation "%" has no partitions', parent_relid::text; END IF; FOR v_rec IN (SELECT inhrelid::regclass::text AS tbl diff --git a/range.sql b/range.sql index 55c04db583..9541f7ccff 100644 --- a/range.sql +++ b/range.sql @@ -63,13 +63,13 @@ BEGIN /* Check lower boundary */ IF p_start_value > v_min THEN - RAISE EXCEPTION 'Start value is less than minimum value of ''%''', + RAISE EXCEPTION 'start value is less than minimum value of ''%''', p_attribute; END IF; /* Check upper boundary */ IF p_end_value <= v_max THEN - RAISE EXCEPTION 'Not enough partitions to fit all values of ''%''', + RAISE EXCEPTION 'not enough partitions to fit all values of ''%''', p_attribute; END IF; END @@ -108,7 +108,7 @@ BEGIN PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); IF p_count < 0 THEN - RAISE EXCEPTION 'Partitions count must not be less than zero'; + RAISE EXCEPTION '''p_count'' must not be less than 0'; END IF; /* Try to determine partitions count if not set */ @@ -117,7 +117,7 @@ BEGIN INTO v_rows_count, v_max; IF v_rows_count = 0 THEN - RAISE EXCEPTION 'Cannot determine partitions count for empty table'; + RAISE EXCEPTION 'cannot determine partitions count for empty table'; END IF; p_count := 0; @@ -220,7 +220,7 @@ BEGIN PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); IF p_count < 0 THEN - RAISE EXCEPTION 'Partitions count must not be less than zero'; + RAISE EXCEPTION 'partitions count must not be less than zero'; END IF; /* Try to determine partitions count if not set */ @@ -229,7 +229,7 @@ BEGIN INTO v_rows_count, v_max; IF v_rows_count = 0 THEN - RAISE EXCEPTION 'Cannot determine partitions count for empty table'; + RAISE EXCEPTION 'cannot determine partitions count for empty table'; END IF; IF v_max IS NULL THEN @@ -327,7 +327,7 @@ BEGIN PERFORM @extschema@.common_relation_checks(parent_relid, p_attribute); IF p_interval <= 0 THEN - RAISE EXCEPTION 'Interval must be positive'; + RAISE EXCEPTION 'interval must be positive'; END IF; /* Check boundaries */ @@ -471,7 +471,7 @@ BEGIN WHERE partrel = parent_relid; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; + RAISE EXCEPTION 'table "%" is not partitioned', parent_relid::TEXT; END IF; SELECT * INTO v_plain_schema, v_plain_relname @@ -550,53 +550,57 @@ CREATE OR REPLACE FUNCTION @extschema@.split_range_partition( RETURNS ANYARRAY AS $$ DECLARE - v_parent_relid REGCLASS; - v_attname TEXT; - v_cond TEXT; - v_new_partition TEXT; - v_part_type INTEGER; - v_part_relname TEXT; - v_check_name TEXT; + v_parent REGCLASS; + v_attname TEXT; + v_cond TEXT; + v_new_partition TEXT; + v_part_type INTEGER; + v_part_relname TEXT; + v_check_name TEXT; BEGIN v_part_relname := @extschema@.validate_relname(p_partition); - v_parent_relid = @extschema@.get_parent_of_partition(p_partition); + v_parent = @extschema@.get_parent_of_partition(p_partition); /* Acquire lock on parent */ - PERFORM @extschema@.lock_partitioned_relation(v_parent_relid); + PERFORM @extschema@.lock_partitioned_relation(v_parent); /* Acquire data modification lock (prevent further modifications) */ PERFORM @extschema@.prevent_relation_modification(p_partition); SELECT attname, parttype FROM @extschema@.pathman_config - WHERE partrel = v_parent_relid + WHERE partrel = v_parent INTO v_attname, v_part_type; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table "%" is not partitioned', v_parent_relid::TEXT; + RAISE EXCEPTION 'table "%" is not partitioned', v_parent::TEXT; END IF; /* Check if this is a RANGE partition */ IF v_part_type != 2 THEN - RAISE EXCEPTION 'Specified partition isn''t RANGE partition'; + RAISE EXCEPTION 'specified partition isn''t RANGE partition'; END IF; /* Get partition values range */ - p_range := @extschema@.get_range_by_part_oid(v_parent_relid, p_partition, 0); + EXECUTE format('SELECT @extschema@.get_part_range($1, NULL::%s)', + @extschema@.get_attribute_type(v_parent, v_attname)::TEXT) + USING p_partition + INTO p_range; + IF p_range IS NULL THEN - RAISE EXCEPTION 'Could not find specified partition'; + RAISE EXCEPTION 'could not find specified partition'; END IF; /* Check if value fit into the range */ IF p_range[1] > p_value OR p_range[2] <= p_value THEN - RAISE EXCEPTION 'Specified value does not fit into the range [%, %)', + RAISE EXCEPTION 'specified value does not fit into the range [%, %)', p_range[1], p_range[2]; END IF; /* Create new partition */ - v_new_partition := @extschema@.create_single_range_partition(v_parent_relid, + v_new_partition := @extschema@.create_single_range_partition(v_parent, p_value, p_range[2], partition_name); @@ -623,7 +627,7 @@ BEGIN v_cond); /* Tell backend to reload configuration */ - PERFORM @extschema@.on_update_partitions(v_parent_relid); + PERFORM @extschema@.on_update_partitions(v_parent); END $$ LANGUAGE plpgsql; @@ -638,53 +642,53 @@ CREATE OR REPLACE FUNCTION @extschema@.merge_range_partitions( RETURNS VOID AS $$ DECLARE - v_parent_relid1 REGCLASS; - v_parent_relid2 REGCLASS; - v_attname TEXT; - v_part_type INTEGER; - v_atttype REGTYPE; + v_parent1 REGCLASS; + v_parent2 REGCLASS; + v_attname TEXT; + v_part_type INTEGER; + v_atttype REGTYPE; BEGIN IF partition1 = partition2 THEN - RAISE EXCEPTION 'Cannot merge partition with itself'; + RAISE EXCEPTION 'cannot merge partition with itself'; END IF; - v_parent_relid1 := @extschema@.get_parent_of_partition(partition1); - v_parent_relid2 := @extschema@.get_parent_of_partition(partition2); + v_parent1 := @extschema@.get_parent_of_partition(partition1); + v_parent2 := @extschema@.get_parent_of_partition(partition2); /* Acquire data modification locks (prevent further modifications) */ PERFORM @extschema@.prevent_relation_modification(partition1); PERFORM @extschema@.prevent_relation_modification(partition2); - IF v_parent_relid1 != v_parent_relid2 THEN - RAISE EXCEPTION 'Cannot merge partitions with different parents'; + IF v_parent1 != v_parent2 THEN + RAISE EXCEPTION 'cannot merge partitions with different parents'; END IF; /* Acquire lock on parent */ - PERFORM @extschema@.lock_partitioned_relation(v_parent_relid1); + PERFORM @extschema@.lock_partitioned_relation(v_parent1); SELECT attname, parttype FROM @extschema@.pathman_config - WHERE partrel = v_parent_relid1 + WHERE partrel = v_parent1 INTO v_attname, v_part_type; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table "%" is not partitioned', v_parent_relid1::TEXT; + RAISE EXCEPTION 'table "%" is not partitioned', v_parent1::TEXT; END IF; /* Check if this is a RANGE partition */ IF v_part_type != 2 THEN - RAISE EXCEPTION 'Specified partitions aren''t RANGE partitions'; + RAISE EXCEPTION 'specified partitions aren''t RANGE partitions'; END IF; v_atttype := @extschema@.get_attribute_type(partition1, v_attname); EXECUTE format('SELECT @extschema@.merge_range_partitions_internal($1, $2, $3, NULL::%s)', @extschema@.get_base_type(v_atttype)::TEXT) - USING v_parent_relid1, partition1, partition2; + USING v_parent1, partition1, partition2; /* Tell backend to reload configuration */ - PERFORM @extschema@.on_update_partitions(v_parent_relid1); + PERFORM @extschema@.on_update_partitions(v_parent1); END $$ LANGUAGE plpgsql; @@ -715,20 +719,19 @@ BEGIN INTO v_attname; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; + RAISE EXCEPTION 'table "%" is not partitioned', parent_relid::TEXT; END IF; - /* - * Get ranges - * first and second elements of array are MIN and MAX of partition1 - * third and forth elements are MIN and MAX of partition2 - */ - p_range := @extschema@.get_range_by_part_oid(parent_relid, partition1, 0) || - @extschema@.get_range_by_part_oid(parent_relid, partition2, 0); + /* We have to pass fake NULL casted to column's type */ + EXECUTE format('SELECT @extschema@.get_part_range($1, NULL::%1$s) || + @extschema@.get_part_range($2, NULL::%1$s)', + @extschema@.get_attribute_type(parent_relid, v_attname)::TEXT) + USING partition1, partition2 + INTO p_range; /* Check if ranges are adjacent */ IF p_range[1] != p_range[4] AND p_range[2] != p_range[3] THEN - RAISE EXCEPTION 'Merge failed. Partitions must be adjacent'; + RAISE EXCEPTION 'merge failed, partitions must be adjacent'; END IF; /* Drop constraint on first partition... */ @@ -782,7 +785,7 @@ BEGIN INTO v_attname, v_interval; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; + RAISE EXCEPTION 'table "%" is not partitioned', parent_relid::TEXT; END IF; v_atttype := @extschema@.get_attribute_type(parent_relid, v_attname); @@ -826,10 +829,14 @@ DECLARE BEGIN IF @extschema@.partitions_count(parent_relid) = 0 THEN - RAISE EXCEPTION 'Cannot append to empty partitions set'; + RAISE EXCEPTION 'cannot append to empty partitions set'; END IF; - p_range := @extschema@.get_range_by_idx(parent_relid, -1, 0); + /* We have to pass fake NULL casted to column's type */ + EXECUTE format('SELECT @extschema@.get_part_range($1, -1, NULL::%s)', + p_atttype::TEXT) + USING parent_relid + INTO p_range; IF @extschema@.is_date_type(p_atttype) THEN v_part_name := @extschema@.create_single_range_partition( @@ -880,7 +887,7 @@ BEGIN INTO v_attname, v_interval; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; + RAISE EXCEPTION 'table "%" is not partitioned', parent_relid::TEXT; END IF; v_atttype := @extschema@.get_attribute_type(parent_relid, v_attname); @@ -924,10 +931,14 @@ DECLARE BEGIN IF @extschema@.partitions_count(parent_relid) = 0 THEN - RAISE EXCEPTION 'Cannot prepend to empty partitions set'; + RAISE EXCEPTION 'cannot prepend to empty partitions set'; END IF; - p_range := @extschema@.get_range_by_idx(parent_relid, 0, 0); + /* We have to pass fake NULL casted to column's type */ + EXECUTE format('SELECT @extschema@.get_part_range($1, 0, NULL::%s)', + p_atttype::TEXT) + USING parent_relid + INTO p_range; IF @extschema@.is_date_type(p_atttype) THEN v_part_name := @extschema@.create_single_range_partition( @@ -975,13 +986,13 @@ BEGIN PERFORM @extschema@.lock_partitioned_relation(parent_relid); IF p_start_value >= p_end_value THEN - RAISE EXCEPTION 'Failed to create partition: p_start_value is greater than p_end_value'; + RAISE EXCEPTION 'failed to create partition: p_start_value is greater than p_end_value'; END IF; /* check range overlap */ IF @extschema@.partitions_count(parent_relid) > 0 AND @extschema@.check_overlap(parent_relid, p_start_value, p_end_value) THEN - RAISE EXCEPTION 'Specified range overlaps with existing partitions'; + RAISE EXCEPTION 'specified range overlaps with existing partitions'; END IF; /* Create new partition */ @@ -1051,16 +1062,16 @@ BEGIN WHERE oid = p_partition INTO rel_persistence; IF rel_persistence = 't'::CHAR THEN - RAISE EXCEPTION 'Temporary table "%" cannot be used as a partition', + RAISE EXCEPTION 'temporary table "%" cannot be used as a partition', p_partition::TEXT; END IF; IF @extschema@.check_overlap(parent_relid, p_start_value, p_end_value) THEN - RAISE EXCEPTION 'Specified range overlaps with existing partitions'; + RAISE EXCEPTION 'specified range overlaps with existing partitions'; END IF; IF NOT @extschema@.validate_relations_equality(parent_relid, p_partition) THEN - RAISE EXCEPTION 'Partition must have the exact same structure as parent'; + RAISE EXCEPTION 'partition must have the exact same structure as parent'; END IF; /* Set inheritance */ @@ -1069,7 +1080,7 @@ BEGIN v_attname := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; + RAISE EXCEPTION 'table "%" is not partitioned', parent_relid::TEXT; END IF; /* Set check constraint */ @@ -1111,7 +1122,7 @@ BEGIN WHERE partrel = parent_relid; IF v_attname IS NULL THEN - RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; + RAISE EXCEPTION 'table "%" is not partitioned', parent_relid::TEXT; END IF; /* Remove inheritance */ @@ -1187,7 +1198,7 @@ BEGIN attr := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; IF attr IS NULL THEN - RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; + RAISE EXCEPTION 'table "%" is not partitioned', parent_relid::TEXT; END IF; SELECT string_agg(attname, ', '), @@ -1237,45 +1248,26 @@ CREATE OR REPLACE FUNCTION @extschema@.build_range_condition( p_start_value ANYELEMENT, p_end_value ANYELEMENT) RETURNS TEXT AS 'pg_pathman', 'build_range_condition' -LANGUAGE C STRICT; +LANGUAGE C; /* * Returns N-th range (as an array of two elements). */ -CREATE OR REPLACE FUNCTION @extschema@.get_range_by_idx( +CREATE OR REPLACE FUNCTION @extschema@.get_part_range( parent_relid REGCLASS, - idx INTEGER, + partition_idx INTEGER, dummy ANYELEMENT) -RETURNS ANYARRAY AS 'pg_pathman', 'get_range_by_idx' -LANGUAGE C STRICT; +RETURNS ANYARRAY AS 'pg_pathman', 'get_part_range_by_idx' +LANGUAGE C; /* * Returns min and max values for specified RANGE partition. */ -CREATE OR REPLACE FUNCTION @extschema@.get_range_by_part_oid( - parent_relid REGCLASS, +CREATE OR REPLACE FUNCTION @extschema@.get_part_range( partition_relid REGCLASS, dummy ANYELEMENT) -RETURNS ANYARRAY AS 'pg_pathman', 'get_range_by_part_oid' -LANGUAGE C STRICT; - -/* - * Returns min value of the first partition's RangeEntry. - */ -CREATE OR REPLACE FUNCTION @extschema@.get_min_range_value( - parent_relid REGCLASS, - dummy ANYELEMENT) -RETURNS ANYELEMENT AS 'pg_pathman', 'get_min_range_value' -LANGUAGE C STRICT; - -/* - * Returns max value of the last partition's RangeEntry. - */ -CREATE OR REPLACE FUNCTION @extschema@.get_max_range_value( - parent_relid REGCLASS, - dummy ANYELEMENT) -RETURNS ANYELEMENT AS 'pg_pathman', 'get_max_range_value' -LANGUAGE C STRICT; +RETURNS ANYARRAY AS 'pg_pathman', 'get_part_range_by_oid' +LANGUAGE C; /* * Checks if range overlaps with existing partitions. @@ -1286,7 +1278,7 @@ CREATE OR REPLACE FUNCTION @extschema@.check_overlap( range_min ANYELEMENT, range_max ANYELEMENT) RETURNS BOOLEAN AS 'pg_pathman', 'check_overlap' -LANGUAGE C STRICT; +LANGUAGE C; /* * Needed for an UPDATE trigger. @@ -1295,4 +1287,4 @@ CREATE OR REPLACE FUNCTION @extschema@.find_or_create_range_partition( parent_relid REGCLASS, value ANYELEMENT) RETURNS REGCLASS AS 'pg_pathman', 'find_or_create_range_partition' -LANGUAGE C STRICT; +LANGUAGE C; diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 8d2b7b674e..0b0e48e618 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -1172,7 +1172,9 @@ handle_binary_opexpr(WalkerContext *context, WrapperNode *result, tce = lookup_type_cache(vartype, TYPECACHE_BTREE_OPFAMILY); strategy = get_op_opfamily_strategy(expr->opno, tce->btree_opf); - fill_type_cmp_fmgr_info(&cmp_func, c->consttype, prel->atttype); + fill_type_cmp_fmgr_info(&cmp_func, + getBaseType(c->consttype), + getBaseType(prel->atttype)); switch (prel->parttype) { diff --git a/src/pl_hash_funcs.c b/src/pl_hash_funcs.c index 864a4c1313..6dc0916fbb 100644 --- a/src/pl_hash_funcs.c +++ b/src/pl_hash_funcs.c @@ -34,7 +34,7 @@ get_type_hash_func(PG_FUNCTION_ARGS) } /* - * Wrapper for hash_to_part_index() + * Wrapper for hash_to_part_index(). */ Datum get_hash_part_idx(PG_FUNCTION_ARGS) diff --git a/src/pl_range_funcs.c b/src/pl_range_funcs.c index b62958cbaa..bd71ce0979 100644 --- a/src/pl_range_funcs.c +++ b/src/pl_range_funcs.c @@ -14,6 +14,7 @@ #include "utils/array.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" /* Function declarations */ @@ -21,10 +22,8 @@ PG_FUNCTION_INFO_V1( find_or_create_range_partition); PG_FUNCTION_INFO_V1( check_overlap ); -PG_FUNCTION_INFO_V1( get_range_by_part_oid ); -PG_FUNCTION_INFO_V1( get_range_by_idx ); -PG_FUNCTION_INFO_V1( get_min_range_value ); -PG_FUNCTION_INFO_V1( get_max_range_value ); +PG_FUNCTION_INFO_V1( get_part_range_by_oid ); +PG_FUNCTION_INFO_V1( get_part_range_by_idx ); PG_FUNCTION_INFO_V1( build_range_condition ); @@ -53,7 +52,9 @@ find_or_create_range_partition(PG_FUNCTION_ARGS) prel = get_pathman_relation_info(parent_oid); shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); - fill_type_cmp_fmgr_info(&cmp_func, value_type, prel->atttype); + fill_type_cmp_fmgr_info(&cmp_func, + getBaseType(value_type), + getBaseType(prel->atttype)); /* Use available PartRelationInfo to find partition */ search_state = search_range_partition_eq(value, &cmp_func, prel, @@ -93,7 +94,8 @@ check_overlap(PG_FUNCTION_ARGS) p2 = PG_GETARG_DATUM(2); Oid p1_type = get_fn_expr_argtype(fcinfo->flinfo, 1), - p2_type = get_fn_expr_argtype(fcinfo->flinfo, 2); + p2_type = get_fn_expr_argtype(fcinfo->flinfo, 2), + part_type; FmgrInfo cmp_func_1, cmp_func_2; @@ -105,9 +107,11 @@ check_overlap(PG_FUNCTION_ARGS) prel = get_pathman_relation_info(parent_oid); shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); - /* comparison functions */ - fill_type_cmp_fmgr_info(&cmp_func_1, p1_type, prel->atttype); - fill_type_cmp_fmgr_info(&cmp_func_2, p2_type, prel->atttype); + part_type = getBaseType(prel->atttype); + + /* Fetch comparison functions */ + fill_type_cmp_fmgr_info(&cmp_func_1, getBaseType(p1_type), part_type); + fill_type_cmp_fmgr_info(&cmp_func_2, getBaseType(p2_type), part_type); ranges = PrelGetRangesArray(prel); for (i = 0; i < PrelChildrenCount(prel); i++) @@ -136,22 +140,33 @@ check_overlap(PG_FUNCTION_ARGS) * arg #2 is the partition's Oid. */ Datum -get_range_by_part_oid(PG_FUNCTION_ARGS) +get_part_range_by_oid(PG_FUNCTION_ARGS) { - Oid parent_oid = PG_GETARG_OID(0); - Oid child_oid = PG_GETARG_OID(1); + Oid partition_relid = InvalidOid, + parent_relid; + PartParentSearch parent_search; uint32 i; RangeEntry *ranges; const PartRelationInfo *prel; - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + if (PG_ARGISNULL(0)) + elog(ERROR, "'partition_relid' should not be NULL"); + else + partition_relid = PG_GETARG_OID(0); + + parent_relid = get_parent_of_partition(partition_relid, &parent_search); + if (parent_search != PPS_ENTRY_PART_PARENT) + elog(ERROR, "relation \"%s\" is not a partition", + get_rel_name_or_relid(partition_relid)); + + prel = get_pathman_relation_info(parent_relid); + shout_if_prel_is_invalid(parent_relid, prel, PT_RANGE); ranges = PrelGetRangesArray(prel); /* Look for the specified partition */ for (i = 0; i < PrelChildrenCount(prel); i++) - if (ranges[i].child_oid == child_oid) + if (ranges[i].child_oid == partition_relid) { ArrayType *arr; Datum elems[2] = { ranges[i].min, ranges[i].max }; @@ -164,9 +179,9 @@ get_range_by_part_oid(PG_FUNCTION_ARGS) } /* No partition found, report error */ - elog(ERROR, "Relation \"%s\" has no partition \"%s\"", - get_rel_name_or_relid(parent_oid), - get_rel_name_or_relid(child_oid)); + elog(ERROR, "relation \"%s\" has no partition \"%s\"", + get_rel_name_or_relid(parent_relid), + get_rel_name_or_relid(partition_relid)); PG_RETURN_NULL(); /* keep compiler happy */ } @@ -179,36 +194,46 @@ get_range_by_part_oid(PG_FUNCTION_ARGS) * (if it is negative then the last range will be returned). */ Datum -get_range_by_idx(PG_FUNCTION_ARGS) +get_part_range_by_idx(PG_FUNCTION_ARGS) { - Oid parent_oid = PG_GETARG_OID(0); - int idx = PG_GETARG_INT32(1); + Oid parent_relid = InvalidOid; + int partition_idx = 0; Datum elems[2]; RangeEntry *ranges; const PartRelationInfo *prel; - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); + if (PG_ARGISNULL(0)) + elog(ERROR, "'parent_relid' should not be NULL"); + else + parent_relid = PG_GETARG_OID(0); + + if (PG_ARGISNULL(1)) + elog(ERROR, "'partition_idx' should not be NULL"); + else + partition_idx = PG_GETARG_INT32(1); + + prel = get_pathman_relation_info(parent_relid); + shout_if_prel_is_invalid(parent_relid, prel, PT_RANGE); /* Now we have to deal with 'idx' */ - if (idx < -1) + if (partition_idx < -1) { - elog(ERROR, "Negative indices other than -1 (last partition) are not allowed"); + elog(ERROR, "negative indices other than -1 (last partition) are not allowed"); } - else if (idx == -1) + else if (partition_idx == -1) { - idx = PrelLastChild(prel); + partition_idx = PrelLastChild(prel); } - else if (((uint32) abs(idx)) >= PrelChildrenCount(prel)) + else if (((uint32) abs(partition_idx)) >= PrelChildrenCount(prel)) { - elog(ERROR, "Partition #%d does not exist (total amount is %u)", - idx, PrelChildrenCount(prel)); + elog(ERROR, "partition #%d does not exist (total amount is %u)", + partition_idx, PrelChildrenCount(prel)); } ranges = PrelGetRangesArray(prel); - elems[0] = ranges[idx].min; - elems[1] = ranges[idx].max; + elems[0] = ranges[partition_idx].min; + elems[1] = ranges[partition_idx].max; PG_RETURN_ARRAYTYPE_P(construct_array(elems, 2, prel->atttype, @@ -217,42 +242,6 @@ get_range_by_idx(PG_FUNCTION_ARGS) prel->attalign)); } -/* - * Returns min value of the first range for relation. - */ -Datum -get_min_range_value(PG_FUNCTION_ARGS) -{ - Oid parent_oid = PG_GETARG_OID(0); - RangeEntry *ranges; - const PartRelationInfo *prel; - - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); - - ranges = PrelGetRangesArray(prel); - - PG_RETURN_DATUM(ranges[0].min); -} - -/* - * Returns max value of the last range for relation. - */ -Datum -get_max_range_value(PG_FUNCTION_ARGS) -{ - Oid parent_oid = PG_GETARG_OID(0); - RangeEntry *ranges; - const PartRelationInfo *prel; - - prel = get_pathman_relation_info(parent_oid); - shout_if_prel_is_invalid(parent_oid, prel, PT_RANGE); - - ranges = PrelGetRangesArray(prel); - - PG_RETURN_DATUM(ranges[PrelLastChild(prel)].max); -} - /* * ------------------------ @@ -276,7 +265,7 @@ build_range_condition(PG_FUNCTION_ARGS) /* This is not going to trigger (not now, at least), just for the safety */ if (min_bound_type != max_bound_type) - elog(ERROR, "Cannot build range condition: " + elog(ERROR, "cannot build range condition: " "boundaries should be of the same type"); /* Create range condition CSTRING */ diff --git a/src/relation_info.c b/src/relation_info.c index af6dc02400..fc7de1c285 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -652,7 +652,7 @@ shout_if_prel_is_invalid(Oid parent_oid, PartType expected_part_type) { if (!prel) - elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", + elog(ERROR, "relation \"%s\" is not partitioned by pg_pathman", get_rel_name_or_relid(parent_oid)); if (!PrelIsValid(prel)) @@ -683,7 +683,7 @@ shout_if_prel_is_invalid(Oid parent_oid, expected_part_type); } - elog(ERROR, "Relation \"%s\" is not partitioned by %s", + elog(ERROR, "relation \"%s\" is not partitioned by %s", get_rel_name_or_relid(parent_oid), expected_str); } diff --git a/src/utils.c b/src/utils.c index b64fbc49fd..f972b9f131 100644 --- a/src/utils.c +++ b/src/utils.c @@ -641,8 +641,12 @@ datum_to_cstring(Datum datum, Oid typid) char * get_rel_name_or_relid(Oid relid) { - return DatumGetCString(DirectFunctionCall1(regclassout, - ObjectIdGetDatum(relid))); + char *relname = get_rel_name(relid); + + if (!relname) + return DatumGetCString(DirectFunctionCall1(oidout, + ObjectIdGetDatum(relid))); + return relname; } From ace63869fd270748a6cc674cee305fa9cd9fb44e Mon Sep 17 00:00:00 2001 From: Dmitry Maslyuk Date: Tue, 4 Oct 2016 18:19:32 +0300 Subject: [PATCH 167/184] Update problems.sgml --- doc/src/sgml/problems.sgml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/problems.sgml b/doc/src/sgml/problems.sgml index 02d509956b..60b41f3ff2 100644 --- a/doc/src/sgml/problems.sgml +++ b/doc/src/sgml/problems.sgml @@ -295,15 +295,24 @@ Where to Report Bugs - In general, send bug reports to the bug report our support email - address at + In general, send bug reports to our support email address at bugs@postgrespro.ru. You are requested to use a descriptive subject for your email message, perhaps parts of the error message. - Do not send bug reports to any of the user mailing lists, such as + Do not send bug reports specific to Postgres Pro + to the PostgreSQL support email address, + as Postgres Pro is not supported by + the PostgreSQL community. + But you can send reports to bugs@postgresql.org + for any bugs related to PostgreSQL. + + + + Even if your bug is not specific to Postgres Pro, + do not send bug reports to any of the user mailing lists, such as pgsql-sql@postgresql.org or pgsql-general@postgresql.org. These mailing lists are for answering @@ -316,9 +325,10 @@ the developers' mailing list pgsql-hackers@postgresql.org. This list is for discussing the development of PostgreSQL, and it would be nice - if we could keep the bug reports separate. We might choose to take up a + if the community could keep the bug reports separate. + The community might choose to take up a discussion about your bug report on pgsql-hackers, - if the problem needs more review. + if the PostgreSQL-related problem needs more review. From ba2f892a4a319974d08c0d68eb5cdf7762029d5e Mon Sep 17 00:00:00 2001 From: Dmitry Maslyuk Date: Tue, 4 Oct 2016 18:40:05 +0300 Subject: [PATCH 168/184] Update pgpathman and sr_plan sgml --- doc/src/sgml/pgpathman.sgml | 4 ++-- doc/src/sgml/sr_plan.sgml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/pgpathman.sgml b/doc/src/sgml/pgpathman.sgml index 36f93c9a6d..78f4c689c8 100644 --- a/doc/src/sgml/pgpathman.sgml +++ b/doc/src/sgml/pgpathman.sgml @@ -94,12 +94,12 @@ WHERE id = 150 - LIST-patitioning; + LIST-partitioning; - HASH-patitioning by non integer attribtes. + HASH-partitioning by non-integer attributes. diff --git a/doc/src/sgml/sr_plan.sgml b/doc/src/sgml/sr_plan.sgml index 34136d18b2..7437da5d4d 100644 --- a/doc/src/sgml/sr_plan.sgml +++ b/doc/src/sgml/sr_plan.sgml @@ -10,7 +10,7 @@ sr_plan is an extension which allows to save query execution plans and use these plans for all repetitions of same query, instead of - optimizing identical query again and again/ + optimizing identical query again and again. sr_plan looks like Oracle Outline system. It can be used to lock From 3de19cab1c1411de746ddd22f1a3cd660cab594a Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 4 Oct 2016 18:47:33 +0300 Subject: [PATCH 169/184] drop foreign tables in drop_partitions() func; test FDW features --- init.sql | 16 +++++- tests/README.md | 35 ++++++++++++ tests/partitioning_test.py | 110 +++++++++++++++++++++++++++++++------ 3 files changed, 143 insertions(+), 18 deletions(-) create mode 100644 tests/README.md diff --git a/init.sql b/init.sql index 42ff2ea2b8..b2e0e5d6cc 100644 --- a/init.sql +++ b/init.sql @@ -498,6 +498,7 @@ DECLARE v_rows INTEGER; v_part_count INTEGER := 0; conf_num_del INTEGER; + v_relkind CHAR; BEGIN PERFORM @extschema@.validate_relname(parent_relid); @@ -531,7 +532,20 @@ BEGIN RAISE NOTICE '% rows copied from %', v_rows, v_rec.tbl; END IF; - EXECUTE format('DROP TABLE %s', v_rec.tbl); + /* + * Determine the kind of child relation. It can be either regular + * table (r) or foreign table (f). Depending on relkind we use + * DROP TABLE or DROP FOREIGN TABLE + */ + EXECUTE format('SELECT relkind FROM pg_class WHERE oid = ''%s''::regclass', v_rec.tbl) + INTO v_relkind; + + IF v_relkind = 'f' THEN + EXECUTE format('DROP FOREIGN TABLE %s', v_rec.tbl); + ELSE + EXECUTE format('DROP TABLE %s', v_rec.tbl); + END IF; + v_part_count := v_part_count + 1; END LOOP; diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000000..d7f647b2cd --- /dev/null +++ b/tests/README.md @@ -0,0 +1,35 @@ +# Tests + +This directory contains script to tests some features which cannot be tested +with only regression tests + +## Running + +First of all you need to install `testgres` python module which contains useful +functions to start postgres clusters and make queries: + +``` +pip install testgres +``` + +To run tests execute: + +``` +python -m unittest partitioning_test +``` + +from current directory. If you want to run a specific postgres build then +you should specify the path to your pg_config executable by setting PG_CONFIG +environment variable: + +``` +export PG_CONFIG=/path/to/pg_config +``` + +Tests concerning FDW features are disabled by default. To test FDW features +you need to install postgres_fdw contrib module first and then set the TEST_FDW +environment variable: + +``` +export TEST_FDW=1 +``` diff --git a/tests/partitioning_test.py b/tests/partitioning_test.py index f1c03706cb..aacc6a415b 100644 --- a/tests/partitioning_test.py +++ b/tests/partitioning_test.py @@ -12,11 +12,21 @@ import os +def test_fdw(func): + """To run tests with FDW support set environment variable TEST_FDW=1""" + def wrapper(*args, **kwargs): + if os.environ.get('TEST_FDW'): + func(*args, **kwargs) + else: + print('Warning: FDW features tests are disabled, skipping...') + return wrapper + + class PartitioningTests(unittest.TestCase): def setUp(self): self.setup_cmd = [ - 'create extension pg_pathman', + # 'create extension pg_pathman', 'create table abc(id serial, t text)', 'insert into abc select generate_series(1, 300000)', 'select create_hash_partitions(\'abc\', \'id\', 3, partition_data := false)', @@ -26,6 +36,16 @@ def tearDown(self): stop_all() # clean_all() + def start_new_pathman_cluster(self, name='test', allows_streaming=False): + node = get_new_node(name) + node.init(allows_streaming=allows_streaming) + node.append_conf( + 'postgresql.conf', + 'shared_preload_libraries=\'pg_pathman\'\n') + node.start() + node.psql('postgres', 'create extension pg_pathman') + return node + def init_test_data(self, node): """Initialize pg_pathman extension and test data""" for cmd in self.setup_cmd: @@ -42,17 +62,12 @@ def catchup_replica(self, master, replica): def printlog(self, logfile): with open(logfile, 'r') as log: for line in log.readlines(): - print line + print(line) def test_concurrent(self): """Tests concurrent partitioning""" - node = get_new_node('test') try: - node.init() - node.append_conf( - 'postgresql.conf', - 'shared_preload_libraries=\'pg_pathman\'\n') - node.start() + node = self.start_new_pathman_cluster() self.init_test_data(node) node.psql( @@ -95,11 +110,7 @@ def test_replication(self): try: # initialize master server - node.init(allows_streaming=True) - node.append_conf( - 'postgresql.conf', - 'shared_preload_libraries=\'pg_pathman\'\n') - node.start() + node = self.start_new_pathman_cluster(allows_streaming=True) node.backup('my_backup') # initialize replica from backup @@ -238,8 +249,8 @@ def add_partition(node, flag, query): con.commit() # Now wait until each thread finishes - for i in range(3): - threads[i].join() + for thread in threads: + thread.join() # Check flags, it should be true which means that threads are # finished @@ -277,11 +288,11 @@ def check_tablespace(node, tablename, tablespace): 'postgresql.conf', 'shared_preload_libraries=\'pg_pathman\'\n') node.start() - path = os.path.join(node.data_dir, 'test_space_location') - os.mkdir(path) node.psql('postgres', 'create extension pg_pathman') # create tablespace + path = os.path.join(node.data_dir, 'test_space_location') + os.mkdir(path) node.psql( 'postgres', 'create tablespace test_space location \'{}\''.format(path)) @@ -330,6 +341,71 @@ def check_tablespace(node, tablename, tablespace): self.assertTrue(check_tablespace(node, 'abc_prepended_2', 'pg_default')) self.assertTrue(check_tablespace(node, 'abc_added_2', 'pg_default')) + @test_fdw + def test_foreign_table(self): + """Test foreign tables""" + + # Start master server + master = get_new_node('test') + master.init() + master.append_conf( + 'postgresql.conf', + 'shared_preload_libraries=\'pg_pathman, postgres_fdw\'\n') + master.start() + master.psql('postgres', 'create extension pg_pathman') + master.psql('postgres', 'create extension postgres_fdw') + master.psql( + 'postgres', + '''create table abc(id serial, name text); + select create_range_partitions('abc', 'id', 0, 10, 2)''') + + # Current user name (needed for user mapping) + username = master.execute('postgres', 'select current_user')[0][0] + + # Start foreign server + fserv = get_new_node('fserv') + fserv.init().start() + fserv.safe_psql('postgres', 'create table ftable(id serial, name text)') + fserv.safe_psql('postgres', 'insert into ftable values (25, \'foreign\')') + + # Create foreign table and attach it to partitioned table + master.safe_psql( + 'postgres', + '''create server fserv + foreign data wrapper postgres_fdw + options (dbname 'postgres', host '127.0.0.1', port '{}')'''.format(fserv.port) + ) + master.safe_psql( + 'postgres', + '''create user mapping for {0} + server fserv + options (user '{0}')'''.format(username) + ) + master.safe_psql( + 'postgres', + '''import foreign schema public limit to (ftable) + from server fserv into public''' + ) + master.safe_psql( + 'postgres', + 'select attach_range_partition(\'abc\', \'ftable\', 20, 30)') + + # Check that table attached to partitioned table + self.assertEqual( + master.safe_psql('postgres', 'select * from ftable'), + '25|foreign\n' + ) + + # Check that we can successfully insert new data into foreign partition + master.safe_psql('postgres', 'insert into abc values (26, \'part\')') + self.assertEqual( + master.safe_psql('postgres', 'select * from ftable order by id'), + '25|foreign\n26|part\n' + ) + + # Testing drop partitions (including foreign partitions) + master.safe_psql('postgres', 'select drop_partitions(\'abc\')') + if __name__ == "__main__": unittest.main() From c67196df7ec291b4971ff80d30ca8f6c09a3d17a Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 4 Oct 2016 19:31:10 +0300 Subject: [PATCH 170/184] regression test refactoring (split into 5 tests), fixes for domain partitioning (wrong types) --- Makefile | 6 +- .../{pg_pathman.out => pathman_basic.out} | 401 +---- expected/pathman_callbacks.out | 112 ++ expected/pathman_domains.out | 108 ++ expected/pathman_foreign_keys.out | 71 + expected/pathman_runtime_nodes.out | 320 ++++ hash.sql | 2 +- init.sql | 2 +- range.sql | 17 +- results/pathman_basic.out | 1434 +++++++++++++++++ results/pathman_callbacks.out | 112 ++ results/pathman_domains.out | 108 ++ results/pathman_foreign_keys.out | 71 + results/pathman_runtime_nodes.out | 320 ++++ sql/{pg_pathman.sql => pathman_basic.sql} | 307 +--- sql/pathman_callbacks.sql | 55 + sql/pathman_domains.sql | 35 + sql/pathman_foreign_keys.sql | 27 + sql/pathman_runtime_nodes.sql | 270 ++++ src/pg_pathman.c | 6 +- src/utils.c | 18 +- src/utils.h | 2 + 22 files changed, 3091 insertions(+), 713 deletions(-) rename expected/{pg_pathman.out => pathman_basic.out} (78%) create mode 100644 expected/pathman_callbacks.out create mode 100644 expected/pathman_domains.out create mode 100644 expected/pathman_foreign_keys.out create mode 100644 expected/pathman_runtime_nodes.out create mode 100644 results/pathman_basic.out create mode 100644 results/pathman_callbacks.out create mode 100644 results/pathman_domains.out create mode 100644 results/pathman_foreign_keys.out create mode 100644 results/pathman_runtime_nodes.out rename sql/{pg_pathman.sql => pathman_basic.sql} (63%) create mode 100644 sql/pathman_callbacks.sql create mode 100644 sql/pathman_domains.sql create mode 100644 sql/pathman_foreign_keys.sql create mode 100644 sql/pathman_runtime_nodes.sql diff --git a/Makefile b/Makefile index 31abb4e7d8..f055ee12e4 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,11 @@ EXTVERSION = 1.0 DATA_built = $(EXTENSION)--$(EXTVERSION).sql PGFILEDESC = "pg_pathman - partitioning tool" -REGRESS = pg_pathman +REGRESS = pathman_basic \ + pathman_runtime_nodes \ + pathman_callbacks \ + pathman_domains \ + pathman_foreign_keys EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add EXTRA_CLEAN = $(EXTENSION)--$(EXTVERSION).sql ./isolation_output diff --git a/expected/pg_pathman.out b/expected/pathman_basic.out similarity index 78% rename from expected/pg_pathman.out rename to expected/pathman_basic.out index a06e4ab927..86ef91aef3 100644 --- a/expected/pg_pathman.out +++ b/expected/pathman_basic.out @@ -648,286 +648,6 @@ SELECT * FROM ttt; Filter: (value = 2) (5 rows) -/* - * Test RuntimeAppend - */ -create or replace function test.pathman_assert(smt bool, error_msg text) returns text as $$ -begin - if not smt then - raise exception '%', error_msg; - end if; - - return 'ok'; -end; -$$ language plpgsql; -create or replace function test.pathman_equal(a text, b text, error_msg text) returns text as $$ -begin - if a != b then - raise exception '''%'' is not equal to ''%'', %', a, b, error_msg; - end if; - - return 'equal'; -end; -$$ language plpgsql; -create or replace function test.pathman_test(query text) returns jsonb as $$ -declare - plan jsonb; -begin - execute 'explain (analyze, format json)' || query into plan; - - return plan; -end; -$$ language plpgsql; -create or replace function test.pathman_test_1() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.runtime_test_1 where id = (select * from test.run_values limit 1)'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Custom Plan Provider')::text, - '"RuntimeAppend"', - 'wrong plan provider'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Relation Name')::text, - format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(1), 6)), - 'wrong partition'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans') into num; - perform test.pathman_equal(num::text, '2', 'expected 2 child plans for custom scan'); - - return 'ok'; -end; -$$ language plpgsql; -create or replace function test.pathman_test_2() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.runtime_test_1 where id = any (select * from test.run_values limit 4)'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Nested Loop"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, - '"RuntimeAppend"', - 'wrong plan provider'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; - perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); - - for i in 0..3 loop - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->i->'Relation Name')::text, - format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), - 'wrong partition'); - - num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; - perform test.pathman_equal(num::text, '1', 'expected 1 loop'); - end loop; - - return 'ok'; -end; -$$ language plpgsql; -create or replace function test.pathman_test_3() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.runtime_test_1 a join test.run_values b on a.id = b.val'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Nested Loop"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, - '"RuntimeAppend"', - 'wrong plan provider'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; - perform test.pathman_equal(num::text, '6', 'expected 6 child plans for custom scan'); - - for i in 0..5 loop - num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; - perform test.pathman_assert(num > 0 and num <= 1718, 'expected no more than 1718 loops'); - end loop; - - return 'ok'; -end; -$$ language plpgsql; -create or replace function test.pathman_test_4() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.category c, lateral' || - '(select * from test.runtime_test_2 g where g.category_id = c.id order by rating limit 4) as tg'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Nested Loop"', - 'wrong plan type'); - - /* Limit -> Custom Scan */ - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Custom Plan Provider')::text, - '"RuntimeMergeAppend"', - 'wrong plan provider'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans') into num; - perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); - - for i in 0..3 loop - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Relation Name')::text, - format('"runtime_test_2_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), - 'wrong partition'); - - num = plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Actual Loops'; - perform test.pathman_assert(num = 1, 'expected no more than 1 loops'); - end loop; - - return 'ok'; -end; -$$ language plpgsql; -create or replace function test.pathman_test_5() returns text as $$ -declare - res record; -begin - select - from test.runtime_test_3 - where id = (select * from test.vals order by val limit 1) - limit 1 - into res; /* test empty tlist */ - - - select id, generate_series(1, 2) gen, val - from test.runtime_test_3 - where id = any (select * from test.vals order by val limit 5) - order by id, gen, val - offset 1 limit 1 - into res; /* without IndexOnlyScan */ - - perform test.pathman_equal(res.id::text, '1', 'id is incorrect (t2)'); - perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t2)'); - perform test.pathman_equal(res.val::text, 'k = 1', 'val is incorrect (t2)'); - - - select id - from test.runtime_test_3 - where id = any (select * from test.vals order by val limit 5) - order by id - offset 3 limit 1 - into res; /* with IndexOnlyScan */ - - perform test.pathman_equal(res.id::text, '4', 'id is incorrect (t3)'); - - - select v.val v1, generate_series(2, 2) gen, t.val v2 - from test.runtime_test_3 t join test.vals v on id = v.val - order by v1, gen, v2 - limit 1 - into res; - - perform test.pathman_equal(res.v1::text, '1', 'v1 is incorrect (t4)'); - perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t4)'); - perform test.pathman_equal(res.v2::text, 'k = 1', 'v2 is incorrect (t4)'); - - return 'ok'; -end; -$$ language plpgsql -set pg_pathman.enable = true -set enable_hashjoin = off -set enable_mergejoin = off; -NOTICE: RuntimeAppend, RuntimeMergeAppend and PartitionFilter nodes and some other options have been enabled -create table test.run_values as select generate_series(1, 10000) val; -create table test.runtime_test_1(id serial primary key, val real); -insert into test.runtime_test_1 select generate_series(1, 10000), random(); -select pathman.create_hash_partitions('test.runtime_test_1', 'id', 6); - create_hash_partitions ------------------------- - 6 -(1 row) - -create table test.category as (select id, 'cat' || id::text as name from generate_series(1, 4) id); -create table test.runtime_test_2 (id serial, category_id int not null, name text, rating real); -insert into test.runtime_test_2 (select id, (id % 6) + 1 as category_id, 'good' || id::text as name, random() as rating from generate_series(1, 100000) id); -create index on test.runtime_test_2 (category_id, rating); -select pathman.create_hash_partitions('test.runtime_test_2', 'category_id', 6); - create_hash_partitions ------------------------- - 6 -(1 row) - -create table test.vals as (select generate_series(1, 10000) as val); -create table test.runtime_test_3(val text, id serial not null); -insert into test.runtime_test_3(id, val) select * from generate_series(1, 10000) k, format('k = %s', k); -select pathman.create_hash_partitions('test.runtime_test_3', 'id', 4); - create_hash_partitions ------------------------- - 4 -(1 row) - -create index on test.runtime_test_3 (id); -create index on test.runtime_test_3_0 (id); -analyze test.run_values; -analyze test.runtime_test_1; -analyze test.runtime_test_2; -analyze test.runtime_test_3; -analyze test.runtime_test_3_0; -set enable_mergejoin = off; -set enable_hashjoin = off; -set pg_pathman.enable_runtimeappend = on; -set pg_pathman.enable_runtimemergeappend = on; -select test.pathman_test_1(); /* RuntimeAppend (select ... where id = (subquery)) */ - pathman_test_1 ----------------- - ok -(1 row) - -select test.pathman_test_2(); /* RuntimeAppend (select ... where id = any(subquery)) */ - pathman_test_2 ----------------- - ok -(1 row) - -select test.pathman_test_3(); /* RuntimeAppend (a join b on a.id = b.val) */ - pathman_test_3 ----------------- - ok -(1 row) - -select test.pathman_test_4(); /* RuntimeMergeAppend (lateral) */ - pathman_test_4 ----------------- - ok -(1 row) - -select test.pathman_test_5(); /* projection tests for RuntimeXXX nodes */ - pathman_test_5 ----------------- - ok -(1 row) - -set pg_pathman.enable_runtimeappend = off; -set pg_pathman.enable_runtimemergeappend = off; -set enable_mergejoin = on; -set enable_hashjoin = on; -drop table test.run_values, test.runtime_test_1, test.runtime_test_2, test.runtime_test_3, test.vals cascade; -NOTICE: drop cascades to 16 other objects /* * Test split and merge */ @@ -1707,119 +1427,8 @@ EXPLAIN (COSTS OFF) SELECT * FROM messages; -> Seq Scan on messages_2 (3 rows) -/* Check primary keys generation */ -CREATE TABLE test_ref(comment TEXT UNIQUE); -INSERT INTO test_ref VALUES('test'); -CREATE TABLE test_fkey( - id INT NOT NULL, - comment TEXT, - FOREIGN KEY (comment) REFERENCES test_ref(comment)); -INSERT INTO test_fkey SELECT generate_series(1, 1000), 'test'; -SELECT create_range_partitions('test_fkey', 'id', 1, 100); -NOTICE: sequence "test_fkey_seq" does not exist, skipping - create_range_partitions -------------------------- - 10 -(1 row) - -INSERT INTO test_fkey VALUES(1, 'wrong'); -ERROR: insert or update on table "test_fkey_1" violates foreign key constraint "test_fkey_1_comment_fkey" -INSERT INTO test_fkey VALUES(1, 'test'); -SELECT drop_partitions('test_fkey'); -NOTICE: function public.test_fkey_upd_trig_func() does not exist, skipping -NOTICE: 100 rows copied from test_fkey_10 -NOTICE: 100 rows copied from test_fkey_9 -NOTICE: 100 rows copied from test_fkey_8 -NOTICE: 100 rows copied from test_fkey_7 -NOTICE: 100 rows copied from test_fkey_6 -NOTICE: 100 rows copied from test_fkey_5 -NOTICE: 100 rows copied from test_fkey_4 -NOTICE: 100 rows copied from test_fkey_3 -NOTICE: 100 rows copied from test_fkey_2 -NOTICE: 101 rows copied from test_fkey_1 - drop_partitions ------------------ - 10 -(1 row) - -SELECT create_hash_partitions('test_fkey', 'id', 10); - create_hash_partitions ------------------------- - 10 -(1 row) - -INSERT INTO test_fkey VALUES(1, 'wrong'); -ERROR: insert or update on table "test_fkey_0" violates foreign key constraint "test_fkey_0_comment_fkey" -INSERT INTO test_fkey VALUES(1, 'test'); -SELECT drop_partitions('test_fkey'); -NOTICE: function public.test_fkey_upd_trig_func() does not exist, skipping -NOTICE: 94 rows copied from test_fkey_9 -NOTICE: 108 rows copied from test_fkey_8 -NOTICE: 118 rows copied from test_fkey_7 -NOTICE: 95 rows copied from test_fkey_6 -NOTICE: 90 rows copied from test_fkey_5 -NOTICE: 101 rows copied from test_fkey_4 -NOTICE: 116 rows copied from test_fkey_3 -NOTICE: 90 rows copied from test_fkey_2 -NOTICE: 90 rows copied from test_fkey_1 -NOTICE: 100 rows copied from test_fkey_0 - drop_partitions ------------------ - 10 -(1 row) - -/* Check callbacks */ -CREATE TABLE log(id serial, message text); -CREATE OR REPLACE FUNCTION abc_on_partition_created_callback(args JSONB) -RETURNS VOID AS $$ -DECLARE - start_value TEXT := args->>'start'; - end_value TEXT := args->'end'; -BEGIN - INSERT INTO log(message) - VALUES (start_value || '-' || end_value); -END -$$ language plpgsql; -CREATE TABLE abc(a serial, b int); -SELECT create_range_partitions('abc', 'a', 1, 100, 2); -NOTICE: sequence "abc_seq" does not exist, skipping - create_range_partitions -------------------------- - 2 -(1 row) - -SELECT set_part_init_callback('abc', 'abc_on_partition_created_callback'); - set_part_init_callback ------------------------- - -(1 row) - -INSERT INTO abc VALUES (123, 1); -INSERT INTO abc VALUES (223, 1); -SELECT append_range_partition('abc'); - append_range_partition ------------------------- - public.abc_4 -(1 row) - -SELECT prepend_range_partition('abc'); - prepend_range_partition -------------------------- - public.abc_5 -(1 row) - -SELECT add_range_partition('abc', 401, 501); - add_range_partition ---------------------- - public.abc_6 -(1 row) - -SELECT message FROM log ORDER BY id; - message ------------ - 201-"301" - 301-"401" - -99-"1" - 401-"501" -(4 rows) - +DROP SCHEMA test CASCADE; +NOTICE: drop cascades to 13 other objects +DROP EXTENSION pg_pathman CASCADE; +NOTICE: drop cascades to 3 other objects +DROP SCHEMA pathman CASCADE; diff --git a/expected/pathman_callbacks.out b/expected/pathman_callbacks.out new file mode 100644 index 0000000000..d168729cee --- /dev/null +++ b/expected/pathman_callbacks.out @@ -0,0 +1,112 @@ +CREATE EXTENSION pg_pathman; +CREATE SCHEMA callbacks; +/* Check callbacks */ +CREATE TABLE callbacks.log(id serial, message text); +CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_range_callback( + args JSONB) +RETURNS VOID AS $$ +DECLARE + start_value TEXT := args->>'start'; + end_value TEXT := args->'end'; +BEGIN + INSERT INTO callbacks.log(message) + VALUES (start_value || '-' || end_value); +END +$$ language plpgsql; +CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_hash_callback( + args JSONB) +RETURNS VOID AS $$ +BEGIN + RAISE WARNING 'callback: partition %', args->'partition'; +END +$$ language plpgsql; +/* set callback to be called on RANGE partitions */ +CREATE TABLE callbacks.abc(a serial, b int); +SELECT create_range_partitions('callbacks.abc', 'a', 1, 100, 2); +NOTICE: sequence "abc_seq" does not exist, skipping + create_range_partitions +------------------------- + 2 +(1 row) + +SELECT set_part_init_callback('callbacks.abc', + 'callbacks.abc_on_part_created_range_callback'); + set_part_init_callback +------------------------ + +(1 row) + +INSERT INTO callbacks.abc VALUES (123, 1); +INSERT INTO callbacks.abc VALUES (223, 1); +SELECT append_range_partition('callbacks.abc'); + append_range_partition +------------------------ + callbacks.abc_4 +(1 row) + +SELECT prepend_range_partition('callbacks.abc'); + prepend_range_partition +------------------------- + callbacks.abc_5 +(1 row) + +SELECT add_range_partition('callbacks.abc', 401, 502); + add_range_partition +--------------------- + callbacks.abc_6 +(1 row) + +SELECT message FROM callbacks.log ORDER BY id; + message +----------- + 201-"301" + 301-"401" + -99-"1" + 401-"502" +(4 rows) + +SELECT drop_partitions('callbacks.abc'); +NOTICE: function callbacks.abc_upd_trig_func() does not exist, skipping +NOTICE: 0 rows copied from callbacks.abc_1 +NOTICE: 1 rows copied from callbacks.abc_2 +NOTICE: 1 rows copied from callbacks.abc_3 +NOTICE: 0 rows copied from callbacks.abc_4 +NOTICE: 0 rows copied from callbacks.abc_5 +NOTICE: 0 rows copied from callbacks.abc_6 + drop_partitions +----------------- + 6 +(1 row) + +/* set callback to be called on HASH partitions */ +SELECT set_part_init_callback('callbacks.abc', + 'callbacks.abc_on_part_created_hash_callback'); + set_part_init_callback +------------------------ + +(1 row) + +SELECT create_hash_partitions('callbacks.abc', 'a', 5); +WARNING: callback: partition "abc_0" +WARNING: callback: partition "abc_1" +WARNING: callback: partition "abc_2" +WARNING: callback: partition "abc_3" +WARNING: callback: partition "abc_4" + create_hash_partitions +------------------------ + 5 +(1 row) + +DROP SCHEMA callbacks CASCADE; +NOTICE: drop cascades to 10 other objects +DETAIL: drop cascades to table callbacks.log +drop cascades to function callbacks.abc_on_part_created_range_callback(jsonb) +drop cascades to function callbacks.abc_on_part_created_hash_callback(jsonb) +drop cascades to table callbacks.abc +drop cascades to sequence callbacks.abc_seq +drop cascades to table callbacks.abc_0 +drop cascades to table callbacks.abc_1 +drop cascades to table callbacks.abc_2 +drop cascades to table callbacks.abc_3 +drop cascades to table callbacks.abc_4 +DROP EXTENSION pg_pathman CASCADE; diff --git a/expected/pathman_domains.out b/expected/pathman_domains.out new file mode 100644 index 0000000000..590531c400 --- /dev/null +++ b/expected/pathman_domains.out @@ -0,0 +1,108 @@ +CREATE EXTENSION pg_pathman; +CREATE SCHEMA domains; +CREATE DOMAIN domains.dom_test AS numeric CHECK (value < 1200); +CREATE TABLE domains.dom_table(val domains.dom_test NOT NULL); +INSERT INTO domains.dom_table SELECT generate_series(1, 999); +SELECT create_range_partitions('domains.dom_table', 'val', 1, 100); +NOTICE: sequence "dom_table_seq" does not exist, skipping + create_range_partitions +------------------------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM domains.dom_table +WHERE val < 250; + QUERY PLAN +--------------------------------------------------- + Append + -> Seq Scan on dom_table_1 + -> Seq Scan on dom_table_2 + -> Seq Scan on dom_table_3 + Filter: ((val)::numeric < '250'::numeric) +(5 rows) + +INSERT INTO domains.dom_table VALUES(1500); +ERROR: value for domain domains.dom_test violates check constraint "dom_test_check" +INSERT INTO domains.dom_table VALUES(-10); +SELECT append_range_partition('domains.dom_table'); + append_range_partition +------------------------ + domains.dom_table_12 +(1 row) + +SELECT prepend_range_partition('domains.dom_table'); + prepend_range_partition +------------------------- + domains.dom_table_13 +(1 row) + +SELECT merge_range_partitions('domains.dom_table_1', 'domains.dom_table_2'); + merge_range_partitions +------------------------ + +(1 row) + +SELECT split_range_partition('domains.dom_table_1', 50); + split_range_partition +----------------------- + {1,201} +(1 row) + +INSERT INTO domains.dom_table VALUES(1101); +EXPLAIN (COSTS OFF) +SELECT * FROM domains.dom_table +WHERE val < 450; + QUERY PLAN +--------------------------------------------------- + Append + -> Seq Scan on dom_table_13 + -> Seq Scan on dom_table_11 + -> Seq Scan on dom_table_1 + -> Seq Scan on dom_table_14 + -> Seq Scan on dom_table_3 + -> Seq Scan on dom_table_4 + -> Seq Scan on dom_table_5 + Filter: ((val)::numeric < '450'::numeric) +(9 rows) + +SELECT * FROM pathman_partition_list +ORDER BY range_min::INT, range_max::INT; + parent | partition | parttype | partattr | range_min | range_max +-------------------+----------------------+----------+----------+-----------+----------- + domains.dom_table | domains.dom_table_13 | 2 | val | -199 | -99 + domains.dom_table | domains.dom_table_11 | 2 | val | -99 | 1 + domains.dom_table | domains.dom_table_1 | 2 | val | 1 | 50 + domains.dom_table | domains.dom_table_14 | 2 | val | 50 | 201 + domains.dom_table | domains.dom_table_3 | 2 | val | 201 | 301 + domains.dom_table | domains.dom_table_4 | 2 | val | 301 | 401 + domains.dom_table | domains.dom_table_5 | 2 | val | 401 | 501 + domains.dom_table | domains.dom_table_6 | 2 | val | 501 | 601 + domains.dom_table | domains.dom_table_7 | 2 | val | 601 | 701 + domains.dom_table | domains.dom_table_8 | 2 | val | 701 | 801 + domains.dom_table | domains.dom_table_9 | 2 | val | 801 | 901 + domains.dom_table | domains.dom_table_10 | 2 | val | 901 | 1001 + domains.dom_table | domains.dom_table_12 | 2 | val | 1001 | 1101 + domains.dom_table | domains.dom_table_15 | 2 | val | 1101 | 1201 +(14 rows) + +DROP SCHEMA domains CASCADE; +NOTICE: drop cascades to 17 other objects +DETAIL: drop cascades to type domains.dom_test +drop cascades to table domains.dom_table +drop cascades to sequence domains.dom_table_seq +drop cascades to table domains.dom_table_1 +drop cascades to table domains.dom_table_3 +drop cascades to table domains.dom_table_4 +drop cascades to table domains.dom_table_5 +drop cascades to table domains.dom_table_6 +drop cascades to table domains.dom_table_7 +drop cascades to table domains.dom_table_8 +drop cascades to table domains.dom_table_9 +drop cascades to table domains.dom_table_10 +drop cascades to table domains.dom_table_11 +drop cascades to table domains.dom_table_12 +drop cascades to table domains.dom_table_13 +drop cascades to table domains.dom_table_14 +drop cascades to table domains.dom_table_15 +DROP EXTENSION pg_pathman CASCADE; diff --git a/expected/pathman_foreign_keys.out b/expected/pathman_foreign_keys.out new file mode 100644 index 0000000000..dee09d99b8 --- /dev/null +++ b/expected/pathman_foreign_keys.out @@ -0,0 +1,71 @@ +CREATE EXTENSION pg_pathman; +CREATE SCHEMA fkeys; +/* Check primary keys generation */ +CREATE TABLE fkeys.test_ref(comment TEXT UNIQUE); +INSERT INTO fkeys.test_ref VALUES('test'); +CREATE TABLE fkeys.test_fkey( + id INT NOT NULL, + comment TEXT, + FOREIGN KEY (comment) REFERENCES fkeys.test_ref(comment)); +INSERT INTO fkeys.test_fkey SELECT generate_series(1, 1000), 'test'; +SELECT create_range_partitions('fkeys.test_fkey', 'id', 1, 100); +NOTICE: sequence "test_fkey_seq" does not exist, skipping + create_range_partitions +------------------------- + 10 +(1 row) + +INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); +ERROR: insert or update on table "test_fkey_1" violates foreign key constraint "test_fkey_1_comment_fkey" +DETAIL: Key (comment)=(wrong) is not present in table "test_ref". +INSERT INTO fkeys.test_fkey VALUES(1, 'test'); +SELECT drop_partitions('fkeys.test_fkey'); +NOTICE: function fkeys.test_fkey_upd_trig_func() does not exist, skipping +NOTICE: 101 rows copied from fkeys.test_fkey_1 +NOTICE: 100 rows copied from fkeys.test_fkey_2 +NOTICE: 100 rows copied from fkeys.test_fkey_3 +NOTICE: 100 rows copied from fkeys.test_fkey_4 +NOTICE: 100 rows copied from fkeys.test_fkey_5 +NOTICE: 100 rows copied from fkeys.test_fkey_6 +NOTICE: 100 rows copied from fkeys.test_fkey_7 +NOTICE: 100 rows copied from fkeys.test_fkey_8 +NOTICE: 100 rows copied from fkeys.test_fkey_9 +NOTICE: 100 rows copied from fkeys.test_fkey_10 + drop_partitions +----------------- + 10 +(1 row) + +SELECT create_hash_partitions('fkeys.test_fkey', 'id', 10); + create_hash_partitions +------------------------ + 10 +(1 row) + +INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); +ERROR: insert or update on table "test_fkey_0" violates foreign key constraint "test_fkey_0_comment_fkey" +DETAIL: Key (comment)=(wrong) is not present in table "test_ref". +INSERT INTO fkeys.test_fkey VALUES(1, 'test'); +SELECT drop_partitions('fkeys.test_fkey'); +NOTICE: function fkeys.test_fkey_upd_trig_func() does not exist, skipping +NOTICE: 100 rows copied from fkeys.test_fkey_0 +NOTICE: 90 rows copied from fkeys.test_fkey_1 +NOTICE: 90 rows copied from fkeys.test_fkey_2 +NOTICE: 116 rows copied from fkeys.test_fkey_3 +NOTICE: 101 rows copied from fkeys.test_fkey_4 +NOTICE: 90 rows copied from fkeys.test_fkey_5 +NOTICE: 95 rows copied from fkeys.test_fkey_6 +NOTICE: 118 rows copied from fkeys.test_fkey_7 +NOTICE: 108 rows copied from fkeys.test_fkey_8 +NOTICE: 94 rows copied from fkeys.test_fkey_9 + drop_partitions +----------------- + 10 +(1 row) + +DROP SCHEMA fkeys CASCADE; +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table fkeys.test_ref +drop cascades to table fkeys.test_fkey +drop cascades to sequence fkeys.test_fkey_seq +DROP EXTENSION pg_pathman CASCADE; diff --git a/expected/pathman_runtime_nodes.out b/expected/pathman_runtime_nodes.out new file mode 100644 index 0000000000..cf75cfa049 --- /dev/null +++ b/expected/pathman_runtime_nodes.out @@ -0,0 +1,320 @@ +CREATE SCHEMA pathman; +CREATE EXTENSION pg_pathman SCHEMA pathman; +CREATE SCHEMA test; +/* + * Test RuntimeAppend + */ +create or replace function test.pathman_assert(smt bool, error_msg text) returns text as $$ +begin + if not smt then + raise exception '%', error_msg; + end if; + + return 'ok'; +end; +$$ language plpgsql; +create or replace function test.pathman_equal(a text, b text, error_msg text) returns text as $$ +begin + if a != b then + raise exception '''%'' is not equal to ''%'', %', a, b, error_msg; + end if; + + return 'equal'; +end; +$$ language plpgsql; +create or replace function test.pathman_test(query text) returns jsonb as $$ +declare + plan jsonb; +begin + execute 'explain (analyze, format json)' || query into plan; + + return plan; +end; +$$ language plpgsql; +create or replace function test.pathman_test_1() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.runtime_test_1 where id = (select * from test.run_values limit 1)'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Custom Plan Provider')::text, + '"RuntimeAppend"', + 'wrong plan provider'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Relation Name')::text, + format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(1), 6)), + 'wrong partition'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans') into num; + perform test.pathman_equal(num::text, '2', 'expected 2 child plans for custom scan'); + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; +create or replace function test.pathman_test_2() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.runtime_test_1 where id = any (select * from test.run_values limit 4)'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Nested Loop"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, + '"RuntimeAppend"', + 'wrong plan provider'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; + perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); + + for i in 0..3 loop + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->i->'Relation Name')::text, + format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), + 'wrong partition'); + + num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; + perform test.pathman_equal(num::text, '1', 'expected 1 loop'); + end loop; + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; +create or replace function test.pathman_test_3() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.runtime_test_1 a join test.run_values b on a.id = b.val'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Nested Loop"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, + '"RuntimeAppend"', + 'wrong plan provider'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; + perform test.pathman_equal(num::text, '6', 'expected 6 child plans for custom scan'); + + for i in 0..5 loop + num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; + perform test.pathman_assert(num > 0 and num <= 1718, 'expected no more than 1718 loops'); + end loop; + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; +create or replace function test.pathman_test_4() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.category c, lateral' || + '(select * from test.runtime_test_2 g where g.category_id = c.id order by rating limit 4) as tg'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Nested Loop"', + 'wrong plan type'); + + /* Limit -> Custom Scan */ + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Custom Plan Provider')::text, + '"RuntimeMergeAppend"', + 'wrong plan provider'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans') into num; + perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); + + for i in 0..3 loop + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Relation Name')::text, + format('"runtime_test_2_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), + 'wrong partition'); + + num = plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Actual Loops'; + perform test.pathman_assert(num = 1, 'expected no more than 1 loops'); + end loop; + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; +create or replace function test.pathman_test_5() returns text as $$ +declare + res record; +begin + select + from test.runtime_test_3 + where id = (select * from test.vals order by val limit 1) + limit 1 + into res; /* test empty tlist */ + + + select id, generate_series(1, 2) gen, val + from test.runtime_test_3 + where id = any (select * from test.vals order by val limit 5) + order by id, gen, val + offset 1 limit 1 + into res; /* without IndexOnlyScan */ + + perform test.pathman_equal(res.id::text, '1', 'id is incorrect (t2)'); + perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t2)'); + perform test.pathman_equal(res.val::text, 'k = 1', 'val is incorrect (t2)'); + + + select id + from test.runtime_test_3 + where id = any (select * from test.vals order by val limit 5) + order by id + offset 3 limit 1 + into res; /* with IndexOnlyScan */ + + perform test.pathman_equal(res.id::text, '4', 'id is incorrect (t3)'); + + + select v.val v1, generate_series(2, 2) gen, t.val v2 + from test.runtime_test_3 t join test.vals v on id = v.val + order by v1, gen, v2 + limit 1 + into res; + + perform test.pathman_equal(res.v1::text, '1', 'v1 is incorrect (t4)'); + perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t4)'); + perform test.pathman_equal(res.v2::text, 'k = 1', 'v2 is incorrect (t4)'); + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_hashjoin = off +set enable_mergejoin = off; +create table test.run_values as select generate_series(1, 10000) val; +create table test.runtime_test_1(id serial primary key, val real); +insert into test.runtime_test_1 select generate_series(1, 10000), random(); +select pathman.create_hash_partitions('test.runtime_test_1', 'id', 6); + create_hash_partitions +------------------------ + 6 +(1 row) + +create table test.category as (select id, 'cat' || id::text as name from generate_series(1, 4) id); +create table test.runtime_test_2 (id serial, category_id int not null, name text, rating real); +insert into test.runtime_test_2 (select id, (id % 6) + 1 as category_id, 'good' || id::text as name, random() as rating from generate_series(1, 100000) id); +create index on test.runtime_test_2 (category_id, rating); +select pathman.create_hash_partitions('test.runtime_test_2', 'category_id', 6); + create_hash_partitions +------------------------ + 6 +(1 row) + +create table test.vals as (select generate_series(1, 10000) as val); +create table test.runtime_test_3(val text, id serial not null); +insert into test.runtime_test_3(id, val) select * from generate_series(1, 10000) k, format('k = %s', k); +select pathman.create_hash_partitions('test.runtime_test_3', 'id', 4); + create_hash_partitions +------------------------ + 4 +(1 row) + +create index on test.runtime_test_3 (id); +create index on test.runtime_test_3_0 (id); +analyze test.run_values; +analyze test.runtime_test_1; +analyze test.runtime_test_2; +analyze test.runtime_test_3; +analyze test.runtime_test_3_0; +set pg_pathman.enable_runtimeappend = on; +set pg_pathman.enable_runtimemergeappend = on; +select test.pathman_test_1(); /* RuntimeAppend (select ... where id = (subquery)) */ + pathman_test_1 +---------------- + ok +(1 row) + +select test.pathman_test_2(); /* RuntimeAppend (select ... where id = any(subquery)) */ + pathman_test_2 +---------------- + ok +(1 row) + +select test.pathman_test_3(); /* RuntimeAppend (a join b on a.id = b.val) */ + pathman_test_3 +---------------- + ok +(1 row) + +select test.pathman_test_4(); /* RuntimeMergeAppend (lateral) */ + pathman_test_4 +---------------- + ok +(1 row) + +select test.pathman_test_5(); /* projection tests for RuntimeXXX nodes */ + pathman_test_5 +---------------- + ok +(1 row) + +DROP SCHEMA test CASCADE; +NOTICE: drop cascades to 30 other objects +DETAIL: drop cascades to function test.pathman_assert(boolean,text) +drop cascades to function test.pathman_equal(text,text,text) +drop cascades to function test.pathman_test(text) +drop cascades to function test.pathman_test_1() +drop cascades to function test.pathman_test_2() +drop cascades to function test.pathman_test_3() +drop cascades to function test.pathman_test_4() +drop cascades to function test.pathman_test_5() +drop cascades to table test.run_values +drop cascades to table test.runtime_test_1 +drop cascades to table test.runtime_test_1_0 +drop cascades to table test.runtime_test_1_1 +drop cascades to table test.runtime_test_1_2 +drop cascades to table test.runtime_test_1_3 +drop cascades to table test.runtime_test_1_4 +drop cascades to table test.runtime_test_1_5 +drop cascades to table test.category +drop cascades to table test.runtime_test_2 +drop cascades to table test.runtime_test_2_0 +drop cascades to table test.runtime_test_2_1 +drop cascades to table test.runtime_test_2_2 +drop cascades to table test.runtime_test_2_3 +drop cascades to table test.runtime_test_2_4 +drop cascades to table test.runtime_test_2_5 +drop cascades to table test.vals +drop cascades to table test.runtime_test_3 +drop cascades to table test.runtime_test_3_0 +drop cascades to table test.runtime_test_3_1 +drop cascades to table test.runtime_test_3_2 +drop cascades to table test.runtime_test_3_3 +DROP EXTENSION pg_pathman CASCADE; +DROP SCHEMA pathman CASCADE; diff --git a/hash.sql b/hash.sql index c69920aee1..af0bf673be 100644 --- a/hash.sql +++ b/hash.sql @@ -159,7 +159,7 @@ BEGIN attr := attname FROM @extschema@.pathman_config WHERE partrel = parent_relid; IF attr IS NULL THEN - RAISE EXCEPTION 'Table "%" is not partitioned', parent_relid::TEXT; + RAISE EXCEPTION 'table "%" is not partitioned', parent_relid::TEXT; END IF; SELECT string_agg(attname, ', '), diff --git a/init.sql b/init.sql index 4f04699bb6..f2837822de 100644 --- a/init.sql +++ b/init.sql @@ -636,7 +636,7 @@ LANGUAGE C STRICT; /* * Return tablespace name for specified relation. */ -CREATE OR REPLACE FUNCTION @extschema@.get_rel_tablespace_name(relation REGCLASS) +CREATE OR REPLACE FUNCTION @extschema@.get_rel_tablespace_name(REGCLASS) RETURNS TEXT AS 'pg_pathman', 'get_rel_tablespace_name' LANGUAGE C STRICT; diff --git a/range.sql b/range.sql index 9541f7ccff..1e0f196561 100644 --- a/range.sql +++ b/range.sql @@ -552,14 +552,14 @@ $$ DECLARE v_parent REGCLASS; v_attname TEXT; + v_atttype REGTYPE; v_cond TEXT; v_new_partition TEXT; v_part_type INTEGER; - v_part_relname TEXT; v_check_name TEXT; BEGIN - v_part_relname := @extschema@.validate_relname(p_partition); + PERFORM @extschema@.validate_relname(p_partition); v_parent = @extschema@.get_parent_of_partition(p_partition); /* Acquire lock on parent */ @@ -582,9 +582,11 @@ BEGIN RAISE EXCEPTION 'specified partition isn''t RANGE partition'; END IF; + v_atttype = @extschema@.get_attribute_type(v_parent, v_attname); + /* Get partition values range */ EXECUTE format('SELECT @extschema@.get_part_range($1, NULL::%s)', - @extschema@.get_attribute_type(v_parent, v_attname)::TEXT) + @extschema@.get_base_type(v_atttype)::TEXT) USING p_partition INTO p_range; @@ -711,6 +713,7 @@ RETURNS ANYARRAY AS $$ DECLARE v_attname TEXT; + v_atttype REGTYPE; v_check_name TEXT; BEGIN @@ -722,10 +725,12 @@ BEGIN RAISE EXCEPTION 'table "%" is not partitioned', parent_relid::TEXT; END IF; + v_atttype = @extschema@.get_attribute_type(parent_relid, v_attname); + /* We have to pass fake NULL casted to column's type */ EXECUTE format('SELECT @extschema@.get_part_range($1, NULL::%1$s) || @extschema@.get_part_range($2, NULL::%1$s)', - @extschema@.get_attribute_type(parent_relid, v_attname)::TEXT) + @extschema@.get_base_type(v_atttype)::TEXT) USING partition1, partition2 INTO p_range; @@ -834,7 +839,7 @@ BEGIN /* We have to pass fake NULL casted to column's type */ EXECUTE format('SELECT @extschema@.get_part_range($1, -1, NULL::%s)', - p_atttype::TEXT) + @extschema@.get_base_type(p_atttype)::TEXT) USING parent_relid INTO p_range; @@ -936,7 +941,7 @@ BEGIN /* We have to pass fake NULL casted to column's type */ EXECUTE format('SELECT @extschema@.get_part_range($1, 0, NULL::%s)', - p_atttype::TEXT) + @extschema@.get_base_type(p_atttype)::TEXT) USING parent_relid INTO p_range; diff --git a/results/pathman_basic.out b/results/pathman_basic.out new file mode 100644 index 0000000000..86ef91aef3 --- /dev/null +++ b/results/pathman_basic.out @@ -0,0 +1,1434 @@ +\set VERBOSITY terse +CREATE SCHEMA pathman; +CREATE EXTENSION pg_pathman SCHEMA pathman; +CREATE SCHEMA test; +CREATE TABLE test.hash_rel ( + id SERIAL PRIMARY KEY, + value INTEGER); +INSERT INTO test.hash_rel VALUES (1, 1); +INSERT INTO test.hash_rel VALUES (2, 2); +INSERT INTO test.hash_rel VALUES (3, 3); +SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); +ERROR: partitioning key 'value' must be NOT NULL +ALTER TABLE test.hash_rel ALTER COLUMN value SET NOT NULL; +SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3, partition_data:=false); + create_hash_partitions +------------------------ + 3 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel + -> Seq Scan on hash_rel_0 + -> Seq Scan on hash_rel_1 + -> Seq Scan on hash_rel_2 +(5 rows) + +SELECT * FROM test.hash_rel; + id | value +----+------- + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +SELECT pathman.set_enable_parent('test.hash_rel', false); + set_enable_parent +------------------- + +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel_0 + -> Seq Scan on hash_rel_1 + -> Seq Scan on hash_rel_2 +(4 rows) + +SELECT * FROM test.hash_rel; + id | value +----+------- +(0 rows) + +SELECT pathman.set_enable_parent('test.hash_rel', true); + set_enable_parent +------------------- + +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel + -> Seq Scan on hash_rel_0 + -> Seq Scan on hash_rel_1 + -> Seq Scan on hash_rel_2 +(5 rows) + +SELECT * FROM test.hash_rel; + id | value +----+------- + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +SELECT pathman.drop_partitions('test.hash_rel'); +NOTICE: function test.hash_rel_upd_trig_func() does not exist, skipping +NOTICE: 0 rows copied from test.hash_rel_0 +NOTICE: 0 rows copied from test.hash_rel_1 +NOTICE: 0 rows copied from test.hash_rel_2 + drop_partitions +----------------- + 3 +(1 row) + +SELECT pathman.create_hash_partitions('test.hash_rel', 'Value', 3); + create_hash_partitions +------------------------ + 3 +(1 row) + +SELECT COUNT(*) FROM test.hash_rel; + count +------- + 3 +(1 row) + +SELECT COUNT(*) FROM ONLY test.hash_rel; + count +------- + 0 +(1 row) + +INSERT INTO test.hash_rel VALUES (4, 4); +INSERT INTO test.hash_rel VALUES (5, 5); +INSERT INTO test.hash_rel VALUES (6, 6); +SELECT COUNT(*) FROM test.hash_rel; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM ONLY test.hash_rel; + count +------- + 0 +(1 row) + +CREATE TABLE test.range_rel ( + id SERIAL PRIMARY KEY, + dt TIMESTAMP, + txt TEXT); +CREATE INDEX ON test.range_rel (dt); +INSERT INTO test.range_rel (dt, txt) +SELECT g, md5(g::TEXT) FROM generate_series('2015-01-01', '2015-04-30', '1 day'::interval) as g; +SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DATE, '1 month'::INTERVAL, 2); +ERROR: partitioning key 'dt' must be NOT NULL +ALTER TABLE test.range_rel ALTER COLUMN dt SET NOT NULL; +SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DATE, '1 month'::INTERVAL, 2); +ERROR: not enough partitions to fit all values of 'dt' +SELECT pathman.create_range_partitions('test.range_rel', 'DT', '2015-01-01'::DATE, '1 month'::INTERVAL); +NOTICE: sequence "range_rel_seq" does not exist, skipping + create_range_partitions +------------------------- + 4 +(1 row) + +SELECT COUNT(*) FROM test.range_rel; + count +------- + 120 +(1 row) + +SELECT COUNT(*) FROM ONLY test.range_rel; + count +------- + 0 +(1 row) + +CREATE TABLE test.num_range_rel ( + id SERIAL PRIMARY KEY, + txt TEXT); +SELECT pathman.create_range_partitions('test.num_range_rel', 'id', 0, 1000, 4); +NOTICE: sequence "num_range_rel_seq" does not exist, skipping + create_range_partitions +------------------------- + 4 +(1 row) + +SELECT COUNT(*) FROM test.num_range_rel; + count +------- + 0 +(1 row) + +SELECT COUNT(*) FROM ONLY test.num_range_rel; + count +------- + 0 +(1 row) + +INSERT INTO test.num_range_rel + SELECT g, md5(g::TEXT) FROM generate_series(1, 3000) as g; +SELECT COUNT(*) FROM test.num_range_rel; + count +------- + 3000 +(1 row) + +SELECT COUNT(*) FROM ONLY test.num_range_rel; + count +------- + 0 +(1 row) + +SELECT * FROM ONLY test.range_rel UNION SELECT * FROM test.range_rel; +ERROR: It is prohibited to query partitioned tables both with and without ONLY modifier +SET pg_pathman.enable_runtimeappend = OFF; +SET pg_pathman.enable_runtimemergeappend = OFF; +VACUUM; +/* update triggers test */ +SELECT pathman.create_hash_update_trigger('test.hash_rel'); + create_hash_update_trigger +----------------------------- + test.hash_rel_upd_trig_func +(1 row) + +UPDATE test.hash_rel SET value = 7 WHERE value = 6; +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value = 7; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel_1 + Filter: (value = 7) +(3 rows) + +SELECT * FROM test.hash_rel WHERE value = 7; + id | value +----+------- + 6 | 7 +(1 row) + +SELECT pathman.create_range_update_trigger('test.num_range_rel'); + create_range_update_trigger +---------------------------------- + test.num_range_rel_upd_trig_func +(1 row) + +UPDATE test.num_range_rel SET id = 3001 WHERE id = 1; +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id = 3001; + QUERY PLAN +----------------------------------- + Append + -> Seq Scan on num_range_rel_4 + Filter: (id = 3001) +(3 rows) + +SELECT * FROM test.num_range_rel WHERE id = 3001; + id | txt +------+---------------------------------- + 3001 | c4ca4238a0b923820dcc509a6f75849b +(1 row) + +SET enable_indexscan = OFF; +SET enable_bitmapscan = OFF; +SET enable_seqscan = ON; +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel_0 + -> Seq Scan on hash_rel_1 + -> Seq Scan on hash_rel_2 +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value = 2; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel_1 + Filter: (value = 2) +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value = 2 OR value = 1; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel_1 + Filter: (value = 2) + -> Seq Scan on hash_rel_2 + Filter: (value = 1) +(5 rows) + +-- Temporarily commented out +-- EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value BETWEEN 1 AND 2; +-- QUERY PLAN +-- ------------------------------------------------- +-- Append +-- -> Seq Scan on hash_rel_1 +-- Filter: ((value >= 1) AND (value <= 2)) +-- -> Seq Scan on hash_rel_2 +-- Filter: ((value >= 1) AND (value <= 2)) +-- (5 rows) +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id > 2500; + QUERY PLAN +----------------------------------- + Append + -> Seq Scan on num_range_rel_3 + Filter: (id > 2500) + -> Seq Scan on num_range_rel_4 +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 1000 AND id < 3000; + QUERY PLAN +----------------------------------- + Append + -> Seq Scan on num_range_rel_2 + -> Seq Scan on num_range_rel_3 +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 1500 AND id < 2500; + QUERY PLAN +----------------------------------- + Append + -> Seq Scan on num_range_rel_2 + Filter: (id >= 1500) + -> Seq Scan on num_range_rel_3 + Filter: (id < 2500) +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE (id >= 500 AND id < 1500) OR (id > 2500); + QUERY PLAN +----------------------------------- + Append + -> Seq Scan on num_range_rel_1 + Filter: (id >= 500) + -> Seq Scan on num_range_rel_2 + Filter: (id < 1500) + -> Seq Scan on num_range_rel_3 + Filter: (id > 2500) + -> Seq Scan on num_range_rel_4 +(8 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt > '2015-02-15'; + QUERY PLAN +-------------------------------------------------------------------------------- + Append + -> Seq Scan on range_rel_2 + Filter: (dt > 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) + -> Seq Scan on range_rel_3 + -> Seq Scan on range_rel_4 +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt >= '2015-02-01' AND dt < '2015-03-01'; + QUERY PLAN +------------------------------- + Append + -> Seq Scan on range_rel_2 +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt >= '2015-02-15' AND dt < '2015-03-15'; + QUERY PLAN +--------------------------------------------------------------------------------- + Append + -> Seq Scan on range_rel_2 + Filter: (dt >= 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) + -> Seq Scan on range_rel_3 + Filter: (dt < 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE (dt >= '2015-01-15' AND dt < '2015-02-15') OR (dt > '2015-03-15'); + QUERY PLAN +--------------------------------------------------------------------------------- + Append + -> Seq Scan on range_rel_1 + Filter: (dt >= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) + -> Seq Scan on range_rel_2 + Filter: (dt < 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) + -> Seq Scan on range_rel_3 + Filter: (dt > 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) + -> Seq Scan on range_rel_4 +(8 rows) + +SET enable_indexscan = ON; +SET enable_bitmapscan = OFF; +SET enable_seqscan = OFF; +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel_0 + -> Seq Scan on hash_rel_1 + -> Seq Scan on hash_rel_2 +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value = 2; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel_1 + Filter: (value = 2) +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value = 2 OR value = 1; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on hash_rel_1 + Filter: (value = 2) + -> Seq Scan on hash_rel_2 + Filter: (value = 1) +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id > 2500; + QUERY PLAN +---------------------------------------------------------------- + Append + -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 + Index Cond: (id > 2500) + -> Seq Scan on num_range_rel_4 +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 1000 AND id < 3000; + QUERY PLAN +----------------------------------- + Append + -> Seq Scan on num_range_rel_2 + -> Seq Scan on num_range_rel_3 +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 1500 AND id < 2500; + QUERY PLAN +---------------------------------------------------------------- + Append + -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 + Index Cond: (id >= 1500) + -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 + Index Cond: (id < 2500) +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE (id >= 500 AND id < 1500) OR (id > 2500); + QUERY PLAN +---------------------------------------------------------------- + Append + -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 + Index Cond: (id >= 500) + -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 + Index Cond: (id < 1500) + -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 + Index Cond: (id > 2500) + -> Seq Scan on num_range_rel_4 +(8 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel ORDER BY id; + QUERY PLAN +---------------------------------------------------------------- + Append + -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 + -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 + -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 + -> Index Scan using num_range_rel_4_pkey on num_range_rel_4 +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id <= 2500 ORDER BY id; + QUERY PLAN +---------------------------------------------------------------- + Append + -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 + -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 + -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 + Index Cond: (id <= 2500) +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt > '2015-02-15'; + QUERY PLAN +------------------------------------------------------------------------------------ + Append + -> Index Scan using range_rel_2_dt_idx on range_rel_2 + Index Cond: (dt > 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) + -> Seq Scan on range_rel_3 + -> Seq Scan on range_rel_4 +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt >= '2015-02-01' AND dt < '2015-03-01'; + QUERY PLAN +------------------------------- + Append + -> Seq Scan on range_rel_2 +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt >= '2015-02-15' AND dt < '2015-03-15'; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Index Scan using range_rel_2_dt_idx on range_rel_2 + Index Cond: (dt >= 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) + -> Index Scan using range_rel_3_dt_idx on range_rel_3 + Index Cond: (dt < 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE (dt >= '2015-01-15' AND dt < '2015-02-15') OR (dt > '2015-03-15'); + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Index Scan using range_rel_1_dt_idx on range_rel_1 + Index Cond: (dt >= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) + -> Index Scan using range_rel_2_dt_idx on range_rel_2 + Index Cond: (dt < 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) + -> Index Scan using range_rel_3_dt_idx on range_rel_3 + Index Cond: (dt > 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) + -> Seq Scan on range_rel_4 +(8 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel ORDER BY dt; + QUERY PLAN +---------------------------------------------------------- + Append + -> Index Scan using range_rel_1_dt_idx on range_rel_1 + -> Index Scan using range_rel_2_dt_idx on range_rel_2 + -> Index Scan using range_rel_3_dt_idx on range_rel_3 + -> Index Scan using range_rel_4_dt_idx on range_rel_4 +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt >= '2015-01-15' ORDER BY dt DESC; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Index Scan Backward using range_rel_4_dt_idx on range_rel_4 + -> Index Scan Backward using range_rel_3_dt_idx on range_rel_3 + -> Index Scan Backward using range_rel_2_dt_idx on range_rel_2 + -> Index Scan Backward using range_rel_1_dt_idx on range_rel_1 + Index Cond: (dt >= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) +(6 rows) + +/* + * Sorting + */ +SET enable_indexscan = OFF; +SET enable_seqscan = ON; +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt < '2015-03-01' ORDER BY dt; + QUERY PLAN +------------------------------------- + Sort + Sort Key: range_rel_1.dt + -> Append + -> Seq Scan on range_rel_1 + -> Seq Scan on range_rel_2 +(5 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel_1 UNION ALL SELECT * FROM test.range_rel_2 ORDER BY dt; + QUERY PLAN +------------------------------------- + Sort + Sort Key: range_rel_1.dt + -> Append + -> Seq Scan on range_rel_1 + -> Seq Scan on range_rel_2 +(5 rows) + +SET enable_indexscan = ON; +SET enable_seqscan = OFF; +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt < '2015-03-01' ORDER BY dt; + QUERY PLAN +---------------------------------------------------------- + Append + -> Index Scan using range_rel_1_dt_idx on range_rel_1 + -> Index Scan using range_rel_2_dt_idx on range_rel_2 +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel_1 UNION ALL SELECT * FROM test.range_rel_2 ORDER BY dt; + QUERY PLAN +---------------------------------------------------------- + Merge Append + Sort Key: range_rel_1.dt + -> Index Scan using range_rel_1_dt_idx on range_rel_1 + -> Index Scan using range_rel_2_dt_idx on range_rel_2 +(4 rows) + +/* + * Join + */ +SET enable_hashjoin = OFF; +set enable_nestloop = OFF; +SET enable_mergejoin = ON; +EXPLAIN (COSTS OFF) +SELECT * FROM test.range_rel j1 +JOIN test.range_rel j2 on j2.id = j1.id +JOIN test.num_range_rel j3 on j3.id = j1.id +WHERE j1.dt < '2015-03-01' AND j2.dt >= '2015-02-01' ORDER BY j2.dt; + QUERY PLAN +------------------------------------------------------------------------------------------- + Sort + Sort Key: j2.dt + -> Merge Join + Merge Cond: (j3.id = j2.id) + -> Append + -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 j3 + -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 j3_1 + -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 j3_2 + -> Index Scan using num_range_rel_4_pkey on num_range_rel_4 j3_3 + -> Materialize + -> Merge Join + Merge Cond: (j2.id = j1.id) + -> Merge Append + Sort Key: j2.id + -> Index Scan using range_rel_2_pkey on range_rel_2 j2 + -> Index Scan using range_rel_3_pkey on range_rel_3 j2_1 + -> Index Scan using range_rel_4_pkey on range_rel_4 j2_2 + -> Materialize + -> Merge Append + Sort Key: j1.id + -> Index Scan using range_rel_1_pkey on range_rel_1 j1 + -> Index Scan using range_rel_2_pkey on range_rel_2 j1_1 +(22 rows) + +SET enable_hashjoin = ON; +SET enable_mergejoin = OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM test.range_rel j1 +JOIN test.range_rel j2 on j2.id = j1.id +JOIN test.num_range_rel j3 on j3.id = j1.id +WHERE j1.dt < '2015-03-01' AND j2.dt >= '2015-02-01' ORDER BY j2.dt; + QUERY PLAN +------------------------------------------------------------------------------------------- + Sort + Sort Key: j2.dt + -> Hash Join + Hash Cond: (j3.id = j2.id) + -> Append + -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 j3 + -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 j3_1 + -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 j3_2 + -> Index Scan using num_range_rel_4_pkey on num_range_rel_4 j3_3 + -> Hash + -> Hash Join + Hash Cond: (j2.id = j1.id) + -> Append + -> Index Scan using range_rel_2_dt_idx on range_rel_2 j2 + -> Index Scan using range_rel_3_dt_idx on range_rel_3 j2_1 + -> Index Scan using range_rel_4_dt_idx on range_rel_4 j2_2 + -> Hash + -> Append + -> Index Scan using range_rel_1_pkey on range_rel_1 j1 + -> Index Scan using range_rel_2_pkey on range_rel_2 j1_1 +(20 rows) + +/* + * Test CTE query + */ +EXPLAIN (COSTS OFF) + WITH ttt AS (SELECT * FROM test.range_rel WHERE dt >= '2015-02-01' AND dt < '2015-03-15') +SELECT * FROM ttt; + QUERY PLAN +-------------------------------------------------------------------------------------------- + CTE Scan on ttt + CTE ttt + -> Append + -> Seq Scan on range_rel_2 + -> Index Scan using range_rel_3_dt_idx on range_rel_3 + Index Cond: (dt < 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) +(6 rows) + +EXPLAIN (COSTS OFF) + WITH ttt AS (SELECT * FROM test.hash_rel WHERE value = 2) +SELECT * FROM ttt; + QUERY PLAN +-------------------------------------- + CTE Scan on ttt + CTE ttt + -> Append + -> Seq Scan on hash_rel_1 + Filter: (value = 2) +(5 rows) + +/* + * Test split and merge + */ +/* Split first partition in half */ +SELECT pathman.split_range_partition('test.num_range_rel_1', 500); + split_range_partition +----------------------- + {0,1000} +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id BETWEEN 100 AND 700; + QUERY PLAN +---------------------------------------------------------------- + Append + -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 + Index Cond: (id >= 100) + -> Index Scan using num_range_rel_5_pkey on num_range_rel_5 + Index Cond: (id <= 700) +(5 rows) + +SELECT pathman.split_range_partition('test.range_rel_1', '2015-01-15'::DATE); + split_range_partition +------------------------- + {01-01-2015,02-01-2015} +(1 row) + +/* Merge two partitions into one */ +SELECT pathman.merge_range_partitions('test.num_range_rel_1', 'test.num_range_rel_' || currval('test.num_range_rel_seq')); + merge_range_partitions +------------------------ + +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id BETWEEN 100 AND 700; + QUERY PLAN +---------------------------------------------------------------- + Append + -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 + Index Cond: ((id >= 100) AND (id <= 700)) +(3 rows) + +SELECT pathman.merge_range_partitions('test.range_rel_1', 'test.range_rel_' || currval('test.range_rel_seq')); + merge_range_partitions +------------------------ + +(1 row) + +/* Append and prepend partitions */ +SELECT pathman.append_range_partition('test.num_range_rel'); + append_range_partition +------------------------ + test.num_range_rel_6 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 4000; + QUERY PLAN +----------------------------------- + Append + -> Seq Scan on num_range_rel_6 +(2 rows) + +SELECT pathman.prepend_range_partition('test.num_range_rel'); + prepend_range_partition +------------------------- + test.num_range_rel_7 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id < 0; + QUERY PLAN +----------------------------------- + Append + -> Seq Scan on num_range_rel_7 +(2 rows) + +SELECT pathman.drop_range_partition('test.num_range_rel_7'); + drop_range_partition +---------------------- + test.num_range_rel_7 +(1 row) + +SELECT pathman.append_range_partition('test.range_rel'); + append_range_partition +------------------------ + test.range_rel_6 +(1 row) + +SELECT pathman.prepend_range_partition('test.range_rel'); + prepend_range_partition +------------------------- + test.range_rel_7 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-12-15' AND '2015-01-15'; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Index Scan using range_rel_7_dt_idx on range_rel_7 + Index Cond: (dt >= 'Mon Dec 15 00:00:00 2014'::timestamp without time zone) + -> Index Scan using range_rel_1_dt_idx on range_rel_1 + Index Cond: (dt <= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) +(5 rows) + +SELECT pathman.drop_range_partition('test.range_rel_7'); + drop_range_partition +---------------------- + test.range_rel_7 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-12-15' AND '2015-01-15'; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Index Scan using range_rel_1_dt_idx on range_rel_1 + Index Cond: (dt <= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) +(3 rows) + +SELECT pathman.add_range_partition('test.range_rel', '2014-12-01'::DATE, '2015-01-02'::DATE); +ERROR: specified range overlaps with existing partitions +SELECT pathman.add_range_partition('test.range_rel', '2014-12-01'::DATE, '2015-01-01'::DATE); + add_range_partition +--------------------- + test.range_rel_8 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-12-15' AND '2015-01-15'; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Index Scan using range_rel_8_dt_idx on range_rel_8 + Index Cond: (dt >= 'Mon Dec 15 00:00:00 2014'::timestamp without time zone) + -> Index Scan using range_rel_1_dt_idx on range_rel_1 + Index Cond: (dt <= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) +(5 rows) + +CREATE TABLE test.range_rel_archive (LIKE test.range_rel INCLUDING ALL); +SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_archive', '2014-01-01'::DATE, '2015-01-01'::DATE); +ERROR: specified range overlaps with existing partitions +SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_archive', '2014-01-01'::DATE, '2014-12-01'::DATE); + attach_range_partition +------------------------ + test.range_rel_archive +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-11-15' AND '2015-01-15'; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Index Scan using range_rel_archive_dt_idx on range_rel_archive + Index Cond: (dt >= 'Sat Nov 15 00:00:00 2014'::timestamp without time zone) + -> Seq Scan on range_rel_8 + -> Index Scan using range_rel_1_dt_idx on range_rel_1 + Index Cond: (dt <= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) +(6 rows) + +SELECT pathman.detach_range_partition('test.range_rel_archive'); + detach_range_partition +------------------------ + test.range_rel_archive +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-11-15' AND '2015-01-15'; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Seq Scan on range_rel_8 + -> Index Scan using range_rel_1_dt_idx on range_rel_1 + Index Cond: (dt <= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) +(4 rows) + +CREATE TABLE test.range_rel_test1 ( + id SERIAL PRIMARY KEY, + dt TIMESTAMP, + txt TEXT, + abc INTEGER); +SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test1', '2013-01-01'::DATE, '2014-01-01'::DATE); +ERROR: partition must have the exact same structure as parent +CREATE TABLE test.range_rel_test2 ( + id SERIAL PRIMARY KEY, + dt TIMESTAMP); +SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test2', '2013-01-01'::DATE, '2014-01-01'::DATE); +ERROR: partition must have the exact same structure as parent +/* + * Zero partitions count and adding partitions with specified name + */ +CREATE TABLE test.zero( + id SERIAL PRIMARY KEY, + value INT NOT NULL); +INSERT INTO test.zero SELECT g, g FROM generate_series(1, 100) as g; +SELECT pathman.create_range_partitions('test.zero', 'value', 50, 10, 0); +NOTICE: sequence "zero_seq" does not exist, skipping + create_range_partitions +------------------------- + 0 +(1 row) + +SELECT pathman.append_range_partition('test.zero', 'test.zero_0'); +ERROR: cannot append to empty partitions set +SELECT pathman.prepend_range_partition('test.zero', 'test.zero_1'); +ERROR: cannot prepend to empty partitions set +SELECT pathman.add_range_partition('test.zero', 50, 70, 'test.zero_50'); + add_range_partition +--------------------- + test.zero_50 +(1 row) + +SELECT pathman.append_range_partition('test.zero', 'test.zero_appended'); + append_range_partition +------------------------ + test.zero_appended +(1 row) + +SELECT pathman.prepend_range_partition('test.zero', 'test.zero_prepended'); + prepend_range_partition +------------------------- + test.zero_prepended +(1 row) + +SELECT pathman.split_range_partition('test.zero_50', 60, 'test.zero_60'); + split_range_partition +----------------------- + {50,70} +(1 row) + +DROP TABLE test.zero CASCADE; +NOTICE: drop cascades to 4 other objects +/* + * Check that altering table columns doesn't break trigger + */ +ALTER TABLE test.hash_rel ADD COLUMN abc int; +INSERT INTO test.hash_rel (id, value, abc) VALUES (123, 456, 789); +SELECT * FROM test.hash_rel WHERE id = 123; + id | value | abc +-----+-------+----- + 123 | 456 | 789 +(1 row) + +/* + * Clean up + */ +SELECT pathman.drop_partitions('test.hash_rel'); +NOTICE: drop cascades to 3 other objects +NOTICE: 2 rows copied from test.hash_rel_2 +NOTICE: 3 rows copied from test.hash_rel_1 +NOTICE: 2 rows copied from test.hash_rel_0 + drop_partitions +----------------- + 3 +(1 row) + +SELECT COUNT(*) FROM ONLY test.hash_rel; + count +------- + 7 +(1 row) + +SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); + create_hash_partitions +------------------------ + 3 +(1 row) + +SELECT pathman.drop_partitions('test.hash_rel', TRUE); +NOTICE: function test.hash_rel_upd_trig_func() does not exist, skipping + drop_partitions +----------------- + 3 +(1 row) + +SELECT COUNT(*) FROM ONLY test.hash_rel; + count +------- + 0 +(1 row) + +DROP TABLE test.hash_rel CASCADE; +SELECT pathman.drop_partitions('test.num_range_rel'); +NOTICE: drop cascades to 4 other objects +NOTICE: 0 rows copied from test.num_range_rel_6 +NOTICE: 2 rows copied from test.num_range_rel_4 +NOTICE: 1000 rows copied from test.num_range_rel_3 +NOTICE: 1000 rows copied from test.num_range_rel_2 +NOTICE: 998 rows copied from test.num_range_rel_1 + drop_partitions +----------------- + 5 +(1 row) + +DROP TABLE test.num_range_rel CASCADE; +DROP TABLE test.range_rel CASCADE; +NOTICE: drop cascades to 7 other objects +/* Test automatic partition creation */ +CREATE TABLE test.range_rel ( + id SERIAL PRIMARY KEY, + dt TIMESTAMP NOT NULL); +SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DATE, '10 days'::INTERVAL, 1); + create_range_partitions +------------------------- + 1 +(1 row) + +INSERT INTO test.range_rel (dt) +SELECT generate_series('2015-01-01', '2015-04-30', '1 day'::interval); +INSERT INTO test.range_rel (dt) +SELECT generate_series('2014-12-31', '2014-12-01', '-1 day'::interval); +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt = '2014-12-15'; + QUERY PLAN +-------------------------------------------------------------------------------- + Append + -> Seq Scan on range_rel_14 + Filter: (dt = 'Mon Dec 15 00:00:00 2014'::timestamp without time zone) +(3 rows) + +SELECT * FROM test.range_rel WHERE dt = '2014-12-15'; + id | dt +-----+-------------------------- + 137 | Mon Dec 15 00:00:00 2014 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; + QUERY PLAN +-------------------------------------------------------------------------------- + Append + -> Seq Scan on range_rel_8 + Filter: (dt = 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) +(3 rows) + +SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; + id | dt +----+-------------------------- + 74 | Sun Mar 15 00:00:00 2015 +(1 row) + +SELECT pathman.set_auto('test.range_rel', false); + set_auto +---------- + +(1 row) + +INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); +ERROR: no suitable partition for key 'Mon Jun 01 00:00:00 2015' +SELECT pathman.set_auto('test.range_rel', true); + set_auto +---------- + +(1 row) + +INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); +DROP TABLE test.range_rel CASCADE; +NOTICE: drop cascades to 20 other objects +SELECT * FROM pathman.pathman_config; + partrel | attname | parttype | range_interval +---------+---------+----------+---------------- +(0 rows) + +/* Check overlaps */ +CREATE TABLE test.num_range_rel ( + id SERIAL PRIMARY KEY, + txt TEXT); +SELECT pathman.create_range_partitions('test.num_range_rel', 'id', 1000, 1000, 4); + create_range_partitions +------------------------- + 4 +(1 row) + +SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 4001, 5000); + check_overlap +--------------- + t +(1 row) + +SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 4000, 5000); + check_overlap +--------------- + t +(1 row) + +SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 3999, 5000); + check_overlap +--------------- + t +(1 row) + +SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 3000, 3500); + check_overlap +--------------- + t +(1 row) + +SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 0, 999); + check_overlap +--------------- + f +(1 row) + +SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 0, 1000); + check_overlap +--------------- + f +(1 row) + +SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 0, 1001); + check_overlap +--------------- + t +(1 row) + +/* CaMeL cAsE table names and attributes */ +CREATE TABLE test."TeSt" (a INT NOT NULL, b INT); +SELECT pathman.create_hash_partitions('test.TeSt', 'a', 3); +ERROR: relation "test.test" does not exist at character 39 +SELECT pathman.create_hash_partitions('test."TeSt"', 'a', 3); + create_hash_partitions +------------------------ + 3 +(1 row) + +INSERT INTO test."TeSt" VALUES (1, 1); +INSERT INTO test."TeSt" VALUES (2, 2); +INSERT INTO test."TeSt" VALUES (3, 3); +SELECT * FROM test."TeSt"; + a | b +---+--- + 3 | 3 + 2 | 2 + 1 | 1 +(3 rows) + +SELECT pathman.create_hash_update_trigger('test."TeSt"'); + create_hash_update_trigger +---------------------------- + test."TeSt_upd_trig_func" +(1 row) + +UPDATE test."TeSt" SET a = 1; +SELECT * FROM test."TeSt"; + a | b +---+--- + 1 | 3 + 1 | 2 + 1 | 1 +(3 rows) + +SELECT * FROM test."TeSt" WHERE a = 1; + a | b +---+--- + 1 | 3 + 1 | 2 + 1 | 1 +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test."TeSt" WHERE a = 1; + QUERY PLAN +---------------------------- + Append + -> Seq Scan on "TeSt_2" + Filter: (a = 1) +(3 rows) + +SELECT pathman.drop_partitions('test."TeSt"'); +NOTICE: drop cascades to 3 other objects +NOTICE: 3 rows copied from test."TeSt_2" +NOTICE: 0 rows copied from test."TeSt_1" +NOTICE: 0 rows copied from test."TeSt_0" + drop_partitions +----------------- + 3 +(1 row) + +SELECT * FROM test."TeSt"; + a | b +---+--- + 1 | 3 + 1 | 2 + 1 | 1 +(3 rows) + +CREATE TABLE test."RangeRel" ( + id SERIAL PRIMARY KEY, + dt TIMESTAMP NOT NULL, + txt TEXT); +INSERT INTO test."RangeRel" (dt, txt) +SELECT g, md5(g::TEXT) FROM generate_series('2015-01-01', '2015-01-03', '1 day'::interval) as g; +SELECT pathman.create_range_partitions('test."RangeRel"', 'dt', '2015-01-01'::DATE, '1 day'::INTERVAL); +NOTICE: sequence "RangeRel_seq" does not exist, skipping + create_range_partitions +------------------------- + 3 +(1 row) + +SELECT pathman.append_range_partition('test."RangeRel"'); + append_range_partition +------------------------ + test."RangeRel_4" +(1 row) + +SELECT pathman.prepend_range_partition('test."RangeRel"'); + prepend_range_partition +------------------------- + test."RangeRel_5" +(1 row) + +SELECT pathman.merge_range_partitions('test."RangeRel_1"', 'test."RangeRel_' || currval('test."RangeRel_seq"') || '"'); + merge_range_partitions +------------------------ + +(1 row) + +SELECT pathman.split_range_partition('test."RangeRel_1"', '2015-01-01'::DATE); + split_range_partition +------------------------- + {12-31-2014,01-02-2015} +(1 row) + +SELECT pathman.drop_partitions('test."RangeRel"'); +NOTICE: function test.RangeRel_upd_trig_func() does not exist, skipping +NOTICE: 1 rows copied from test."RangeRel_6" +NOTICE: 0 rows copied from test."RangeRel_4" +NOTICE: 1 rows copied from test."RangeRel_3" +NOTICE: 1 rows copied from test."RangeRel_2" +NOTICE: 0 rows copied from test."RangeRel_1" + drop_partitions +----------------- + 5 +(1 row) + +SELECT pathman.create_partitions_from_range('test."RangeRel"', 'dt', '2015-01-01'::DATE, '2015-01-05'::DATE, '1 day'::INTERVAL); + create_partitions_from_range +------------------------------ + 5 +(1 row) + +DROP TABLE test."RangeRel" CASCADE; +NOTICE: drop cascades to 5 other objects +SELECT * FROM pathman.pathman_config; + partrel | attname | parttype | range_interval +--------------------+---------+----------+---------------- + test.num_range_rel | id | 2 | 1000 +(1 row) + +CREATE TABLE test."RangeRel" ( + id SERIAL PRIMARY KEY, + dt TIMESTAMP NOT NULL, + txt TEXT); +SELECT pathman.create_range_partitions('test."RangeRel"', 'id', 1, 100, 3); + create_range_partitions +------------------------- + 3 +(1 row) + +SELECT pathman.drop_partitions('test."RangeRel"'); +NOTICE: function test.RangeRel_upd_trig_func() does not exist, skipping +NOTICE: 0 rows copied from test."RangeRel_3" +NOTICE: 0 rows copied from test."RangeRel_2" +NOTICE: 0 rows copied from test."RangeRel_1" + drop_partitions +----------------- + 3 +(1 row) + +SELECT pathman.create_partitions_from_range('test."RangeRel"', 'id', 1, 300, 100); + create_partitions_from_range +------------------------------ + 3 +(1 row) + +DROP TABLE test."RangeRel" CASCADE; +NOTICE: drop cascades to 3 other objects +DROP EXTENSION pg_pathman; +/* Test that everithing works fine without schemas */ +CREATE EXTENSION pg_pathman; +/* Hash */ +CREATE TABLE hash_rel ( + id SERIAL PRIMARY KEY, + value INTEGER NOT NULL); +INSERT INTO hash_rel (value) SELECT g FROM generate_series(1, 10000) as g; +SELECT create_hash_partitions('hash_rel', 'value', 3); + create_hash_partitions +------------------------ + 3 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM hash_rel WHERE id = 1234; + QUERY PLAN +------------------------------------------------------ + Append + -> Index Scan using hash_rel_0_pkey on hash_rel_0 + Index Cond: (id = 1234) + -> Index Scan using hash_rel_1_pkey on hash_rel_1 + Index Cond: (id = 1234) + -> Index Scan using hash_rel_2_pkey on hash_rel_2 + Index Cond: (id = 1234) +(7 rows) + +/* Range */ +CREATE TABLE range_rel ( + id SERIAL PRIMARY KEY, + dt TIMESTAMP NOT NULL, + value INTEGER); +INSERT INTO range_rel (dt, value) SELECT g, extract(day from g) FROM generate_series('2010-01-01'::date, '2010-12-31'::date, '1 day') as g; +SELECT create_range_partitions('range_rel', 'dt', '2010-01-01'::date, '1 month'::interval, 12); +NOTICE: sequence "range_rel_seq" does not exist, skipping + create_range_partitions +------------------------- + 12 +(1 row) + +SELECT merge_range_partitions('range_rel_1', 'range_rel_2'); + merge_range_partitions +------------------------ + +(1 row) + +SELECT split_range_partition('range_rel_1', '2010-02-15'::date); + split_range_partition +------------------------- + {01-01-2010,03-01-2010} +(1 row) + +SELECT append_range_partition('range_rel'); + append_range_partition +------------------------ + public.range_rel_14 +(1 row) + +SELECT prepend_range_partition('range_rel'); + prepend_range_partition +------------------------- + public.range_rel_15 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM range_rel WHERE dt < '2010-03-01'; + QUERY PLAN +-------------------------------- + Append + -> Seq Scan on range_rel_15 + -> Seq Scan on range_rel_1 + -> Seq Scan on range_rel_13 +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM range_rel WHERE dt > '2010-12-15'; + QUERY PLAN +-------------------------------------------------------------------------------- + Append + -> Seq Scan on range_rel_12 + Filter: (dt > 'Wed Dec 15 00:00:00 2010'::timestamp without time zone) + -> Seq Scan on range_rel_14 +(4 rows) + +/* Temporary table for JOINs */ +CREATE TABLE tmp (id INTEGER NOT NULL, value INTEGER NOT NULL); +INSERT INTO tmp VALUES (1, 1), (2, 2); +/* Test UPDATE and DELETE */ +EXPLAIN (COSTS OFF) UPDATE range_rel SET value = 111 WHERE dt = '2010-06-15'; + QUERY PLAN +-------------------------------------------------------------------------------- + Update on range_rel_6 + -> Seq Scan on range_rel_6 + Filter: (dt = 'Tue Jun 15 00:00:00 2010'::timestamp without time zone) +(3 rows) + +UPDATE range_rel SET value = 111 WHERE dt = '2010-06-15'; +SELECT * FROM range_rel WHERE dt = '2010-06-15'; + id | dt | value +-----+--------------------------+------- + 166 | Tue Jun 15 00:00:00 2010 | 111 +(1 row) + +EXPLAIN (COSTS OFF) DELETE FROM range_rel WHERE dt = '2010-06-15'; + QUERY PLAN +-------------------------------------------------------------------------------- + Delete on range_rel_6 + -> Seq Scan on range_rel_6 + Filter: (dt = 'Tue Jun 15 00:00:00 2010'::timestamp without time zone) +(3 rows) + +DELETE FROM range_rel WHERE dt = '2010-06-15'; +SELECT * FROM range_rel WHERE dt = '2010-06-15'; + id | dt | value +----+----+------- +(0 rows) + +EXPLAIN (COSTS OFF) UPDATE range_rel r SET value = t.value FROM tmp t WHERE r.dt = '2010-01-01' AND r.id = t.id; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Update on range_rel_1 r + -> Hash Join + Hash Cond: (t.id = r.id) + -> Seq Scan on tmp t + -> Hash + -> Index Scan using range_rel_1_pkey on range_rel_1 r + Filter: (dt = 'Fri Jan 01 00:00:00 2010'::timestamp without time zone) +(7 rows) + +UPDATE range_rel r SET value = t.value FROM tmp t WHERE r.dt = '2010-01-01' AND r.id = t.id; +EXPLAIN (COSTS OFF) DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02' AND r.id = t.id; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Delete on range_rel_1 r + -> Hash Join + Hash Cond: (t.id = r.id) + -> Seq Scan on tmp t + -> Hash + -> Index Scan using range_rel_1_pkey on range_rel_1 r + Filter: (dt = 'Sat Jan 02 00:00:00 2010'::timestamp without time zone) +(7 rows) + +DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02' AND r.id = t.id; +/* Create range partitions from whole range */ +SELECT drop_partitions('range_rel'); +NOTICE: function public.range_rel_upd_trig_func() does not exist, skipping +NOTICE: 0 rows copied from range_rel_15 +NOTICE: 0 rows copied from range_rel_14 +NOTICE: 14 rows copied from range_rel_13 +NOTICE: 31 rows copied from range_rel_12 +NOTICE: 30 rows copied from range_rel_11 +NOTICE: 31 rows copied from range_rel_10 +NOTICE: 30 rows copied from range_rel_9 +NOTICE: 31 rows copied from range_rel_8 +NOTICE: 31 rows copied from range_rel_7 +NOTICE: 29 rows copied from range_rel_6 +NOTICE: 31 rows copied from range_rel_5 +NOTICE: 30 rows copied from range_rel_4 +NOTICE: 31 rows copied from range_rel_3 +NOTICE: 44 rows copied from range_rel_1 + drop_partitions +----------------- + 14 +(1 row) + +SELECT create_partitions_from_range('range_rel', 'id', 1, 1000, 100); + create_partitions_from_range +------------------------------ + 10 +(1 row) + +SELECT drop_partitions('range_rel', TRUE); +NOTICE: function public.range_rel_upd_trig_func() does not exist, skipping + drop_partitions +----------------- + 10 +(1 row) + +SELECT create_partitions_from_range('range_rel', 'dt', '2015-01-01'::date, '2015-12-01'::date, '1 month'::interval); + create_partitions_from_range +------------------------------ + 12 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM range_rel WHERE dt = '2015-12-15'; + QUERY PLAN +-------------------------------------------------------------------------------- + Append + -> Seq Scan on range_rel_12 + Filter: (dt = 'Tue Dec 15 00:00:00 2015'::timestamp without time zone) +(3 rows) + +CREATE TABLE messages(id SERIAL PRIMARY KEY, msg TEXT); +CREATE TABLE replies(id SERIAL PRIMARY KEY, message_id INTEGER REFERENCES messages(id), msg TEXT); +INSERT INTO messages SELECT g, md5(g::text) FROM generate_series(1, 10) as g; +INSERT INTO replies SELECT g, g, md5(g::text) FROM generate_series(1, 10) as g; +SELECT create_range_partitions('messages', 'id', 1, 100, 2); +WARNING: foreign key 'replies_message_id_fkey' references relation 'messages' +ERROR: relation "messages" is referenced from other relations +ALTER TABLE replies DROP CONSTRAINT replies_message_id_fkey; +SELECT create_range_partitions('messages', 'id', 1, 100, 2); +NOTICE: sequence "messages_seq" does not exist, skipping + create_range_partitions +------------------------- + 2 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM messages; + QUERY PLAN +------------------------------ + Append + -> Seq Scan on messages_1 + -> Seq Scan on messages_2 +(3 rows) + +DROP SCHEMA test CASCADE; +NOTICE: drop cascades to 13 other objects +DROP EXTENSION pg_pathman CASCADE; +NOTICE: drop cascades to 3 other objects +DROP SCHEMA pathman CASCADE; diff --git a/results/pathman_callbacks.out b/results/pathman_callbacks.out new file mode 100644 index 0000000000..d168729cee --- /dev/null +++ b/results/pathman_callbacks.out @@ -0,0 +1,112 @@ +CREATE EXTENSION pg_pathman; +CREATE SCHEMA callbacks; +/* Check callbacks */ +CREATE TABLE callbacks.log(id serial, message text); +CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_range_callback( + args JSONB) +RETURNS VOID AS $$ +DECLARE + start_value TEXT := args->>'start'; + end_value TEXT := args->'end'; +BEGIN + INSERT INTO callbacks.log(message) + VALUES (start_value || '-' || end_value); +END +$$ language plpgsql; +CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_hash_callback( + args JSONB) +RETURNS VOID AS $$ +BEGIN + RAISE WARNING 'callback: partition %', args->'partition'; +END +$$ language plpgsql; +/* set callback to be called on RANGE partitions */ +CREATE TABLE callbacks.abc(a serial, b int); +SELECT create_range_partitions('callbacks.abc', 'a', 1, 100, 2); +NOTICE: sequence "abc_seq" does not exist, skipping + create_range_partitions +------------------------- + 2 +(1 row) + +SELECT set_part_init_callback('callbacks.abc', + 'callbacks.abc_on_part_created_range_callback'); + set_part_init_callback +------------------------ + +(1 row) + +INSERT INTO callbacks.abc VALUES (123, 1); +INSERT INTO callbacks.abc VALUES (223, 1); +SELECT append_range_partition('callbacks.abc'); + append_range_partition +------------------------ + callbacks.abc_4 +(1 row) + +SELECT prepend_range_partition('callbacks.abc'); + prepend_range_partition +------------------------- + callbacks.abc_5 +(1 row) + +SELECT add_range_partition('callbacks.abc', 401, 502); + add_range_partition +--------------------- + callbacks.abc_6 +(1 row) + +SELECT message FROM callbacks.log ORDER BY id; + message +----------- + 201-"301" + 301-"401" + -99-"1" + 401-"502" +(4 rows) + +SELECT drop_partitions('callbacks.abc'); +NOTICE: function callbacks.abc_upd_trig_func() does not exist, skipping +NOTICE: 0 rows copied from callbacks.abc_1 +NOTICE: 1 rows copied from callbacks.abc_2 +NOTICE: 1 rows copied from callbacks.abc_3 +NOTICE: 0 rows copied from callbacks.abc_4 +NOTICE: 0 rows copied from callbacks.abc_5 +NOTICE: 0 rows copied from callbacks.abc_6 + drop_partitions +----------------- + 6 +(1 row) + +/* set callback to be called on HASH partitions */ +SELECT set_part_init_callback('callbacks.abc', + 'callbacks.abc_on_part_created_hash_callback'); + set_part_init_callback +------------------------ + +(1 row) + +SELECT create_hash_partitions('callbacks.abc', 'a', 5); +WARNING: callback: partition "abc_0" +WARNING: callback: partition "abc_1" +WARNING: callback: partition "abc_2" +WARNING: callback: partition "abc_3" +WARNING: callback: partition "abc_4" + create_hash_partitions +------------------------ + 5 +(1 row) + +DROP SCHEMA callbacks CASCADE; +NOTICE: drop cascades to 10 other objects +DETAIL: drop cascades to table callbacks.log +drop cascades to function callbacks.abc_on_part_created_range_callback(jsonb) +drop cascades to function callbacks.abc_on_part_created_hash_callback(jsonb) +drop cascades to table callbacks.abc +drop cascades to sequence callbacks.abc_seq +drop cascades to table callbacks.abc_0 +drop cascades to table callbacks.abc_1 +drop cascades to table callbacks.abc_2 +drop cascades to table callbacks.abc_3 +drop cascades to table callbacks.abc_4 +DROP EXTENSION pg_pathman CASCADE; diff --git a/results/pathman_domains.out b/results/pathman_domains.out new file mode 100644 index 0000000000..590531c400 --- /dev/null +++ b/results/pathman_domains.out @@ -0,0 +1,108 @@ +CREATE EXTENSION pg_pathman; +CREATE SCHEMA domains; +CREATE DOMAIN domains.dom_test AS numeric CHECK (value < 1200); +CREATE TABLE domains.dom_table(val domains.dom_test NOT NULL); +INSERT INTO domains.dom_table SELECT generate_series(1, 999); +SELECT create_range_partitions('domains.dom_table', 'val', 1, 100); +NOTICE: sequence "dom_table_seq" does not exist, skipping + create_range_partitions +------------------------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM domains.dom_table +WHERE val < 250; + QUERY PLAN +--------------------------------------------------- + Append + -> Seq Scan on dom_table_1 + -> Seq Scan on dom_table_2 + -> Seq Scan on dom_table_3 + Filter: ((val)::numeric < '250'::numeric) +(5 rows) + +INSERT INTO domains.dom_table VALUES(1500); +ERROR: value for domain domains.dom_test violates check constraint "dom_test_check" +INSERT INTO domains.dom_table VALUES(-10); +SELECT append_range_partition('domains.dom_table'); + append_range_partition +------------------------ + domains.dom_table_12 +(1 row) + +SELECT prepend_range_partition('domains.dom_table'); + prepend_range_partition +------------------------- + domains.dom_table_13 +(1 row) + +SELECT merge_range_partitions('domains.dom_table_1', 'domains.dom_table_2'); + merge_range_partitions +------------------------ + +(1 row) + +SELECT split_range_partition('domains.dom_table_1', 50); + split_range_partition +----------------------- + {1,201} +(1 row) + +INSERT INTO domains.dom_table VALUES(1101); +EXPLAIN (COSTS OFF) +SELECT * FROM domains.dom_table +WHERE val < 450; + QUERY PLAN +--------------------------------------------------- + Append + -> Seq Scan on dom_table_13 + -> Seq Scan on dom_table_11 + -> Seq Scan on dom_table_1 + -> Seq Scan on dom_table_14 + -> Seq Scan on dom_table_3 + -> Seq Scan on dom_table_4 + -> Seq Scan on dom_table_5 + Filter: ((val)::numeric < '450'::numeric) +(9 rows) + +SELECT * FROM pathman_partition_list +ORDER BY range_min::INT, range_max::INT; + parent | partition | parttype | partattr | range_min | range_max +-------------------+----------------------+----------+----------+-----------+----------- + domains.dom_table | domains.dom_table_13 | 2 | val | -199 | -99 + domains.dom_table | domains.dom_table_11 | 2 | val | -99 | 1 + domains.dom_table | domains.dom_table_1 | 2 | val | 1 | 50 + domains.dom_table | domains.dom_table_14 | 2 | val | 50 | 201 + domains.dom_table | domains.dom_table_3 | 2 | val | 201 | 301 + domains.dom_table | domains.dom_table_4 | 2 | val | 301 | 401 + domains.dom_table | domains.dom_table_5 | 2 | val | 401 | 501 + domains.dom_table | domains.dom_table_6 | 2 | val | 501 | 601 + domains.dom_table | domains.dom_table_7 | 2 | val | 601 | 701 + domains.dom_table | domains.dom_table_8 | 2 | val | 701 | 801 + domains.dom_table | domains.dom_table_9 | 2 | val | 801 | 901 + domains.dom_table | domains.dom_table_10 | 2 | val | 901 | 1001 + domains.dom_table | domains.dom_table_12 | 2 | val | 1001 | 1101 + domains.dom_table | domains.dom_table_15 | 2 | val | 1101 | 1201 +(14 rows) + +DROP SCHEMA domains CASCADE; +NOTICE: drop cascades to 17 other objects +DETAIL: drop cascades to type domains.dom_test +drop cascades to table domains.dom_table +drop cascades to sequence domains.dom_table_seq +drop cascades to table domains.dom_table_1 +drop cascades to table domains.dom_table_3 +drop cascades to table domains.dom_table_4 +drop cascades to table domains.dom_table_5 +drop cascades to table domains.dom_table_6 +drop cascades to table domains.dom_table_7 +drop cascades to table domains.dom_table_8 +drop cascades to table domains.dom_table_9 +drop cascades to table domains.dom_table_10 +drop cascades to table domains.dom_table_11 +drop cascades to table domains.dom_table_12 +drop cascades to table domains.dom_table_13 +drop cascades to table domains.dom_table_14 +drop cascades to table domains.dom_table_15 +DROP EXTENSION pg_pathman CASCADE; diff --git a/results/pathman_foreign_keys.out b/results/pathman_foreign_keys.out new file mode 100644 index 0000000000..dee09d99b8 --- /dev/null +++ b/results/pathman_foreign_keys.out @@ -0,0 +1,71 @@ +CREATE EXTENSION pg_pathman; +CREATE SCHEMA fkeys; +/* Check primary keys generation */ +CREATE TABLE fkeys.test_ref(comment TEXT UNIQUE); +INSERT INTO fkeys.test_ref VALUES('test'); +CREATE TABLE fkeys.test_fkey( + id INT NOT NULL, + comment TEXT, + FOREIGN KEY (comment) REFERENCES fkeys.test_ref(comment)); +INSERT INTO fkeys.test_fkey SELECT generate_series(1, 1000), 'test'; +SELECT create_range_partitions('fkeys.test_fkey', 'id', 1, 100); +NOTICE: sequence "test_fkey_seq" does not exist, skipping + create_range_partitions +------------------------- + 10 +(1 row) + +INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); +ERROR: insert or update on table "test_fkey_1" violates foreign key constraint "test_fkey_1_comment_fkey" +DETAIL: Key (comment)=(wrong) is not present in table "test_ref". +INSERT INTO fkeys.test_fkey VALUES(1, 'test'); +SELECT drop_partitions('fkeys.test_fkey'); +NOTICE: function fkeys.test_fkey_upd_trig_func() does not exist, skipping +NOTICE: 101 rows copied from fkeys.test_fkey_1 +NOTICE: 100 rows copied from fkeys.test_fkey_2 +NOTICE: 100 rows copied from fkeys.test_fkey_3 +NOTICE: 100 rows copied from fkeys.test_fkey_4 +NOTICE: 100 rows copied from fkeys.test_fkey_5 +NOTICE: 100 rows copied from fkeys.test_fkey_6 +NOTICE: 100 rows copied from fkeys.test_fkey_7 +NOTICE: 100 rows copied from fkeys.test_fkey_8 +NOTICE: 100 rows copied from fkeys.test_fkey_9 +NOTICE: 100 rows copied from fkeys.test_fkey_10 + drop_partitions +----------------- + 10 +(1 row) + +SELECT create_hash_partitions('fkeys.test_fkey', 'id', 10); + create_hash_partitions +------------------------ + 10 +(1 row) + +INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); +ERROR: insert or update on table "test_fkey_0" violates foreign key constraint "test_fkey_0_comment_fkey" +DETAIL: Key (comment)=(wrong) is not present in table "test_ref". +INSERT INTO fkeys.test_fkey VALUES(1, 'test'); +SELECT drop_partitions('fkeys.test_fkey'); +NOTICE: function fkeys.test_fkey_upd_trig_func() does not exist, skipping +NOTICE: 100 rows copied from fkeys.test_fkey_0 +NOTICE: 90 rows copied from fkeys.test_fkey_1 +NOTICE: 90 rows copied from fkeys.test_fkey_2 +NOTICE: 116 rows copied from fkeys.test_fkey_3 +NOTICE: 101 rows copied from fkeys.test_fkey_4 +NOTICE: 90 rows copied from fkeys.test_fkey_5 +NOTICE: 95 rows copied from fkeys.test_fkey_6 +NOTICE: 118 rows copied from fkeys.test_fkey_7 +NOTICE: 108 rows copied from fkeys.test_fkey_8 +NOTICE: 94 rows copied from fkeys.test_fkey_9 + drop_partitions +----------------- + 10 +(1 row) + +DROP SCHEMA fkeys CASCADE; +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table fkeys.test_ref +drop cascades to table fkeys.test_fkey +drop cascades to sequence fkeys.test_fkey_seq +DROP EXTENSION pg_pathman CASCADE; diff --git a/results/pathman_runtime_nodes.out b/results/pathman_runtime_nodes.out new file mode 100644 index 0000000000..cf75cfa049 --- /dev/null +++ b/results/pathman_runtime_nodes.out @@ -0,0 +1,320 @@ +CREATE SCHEMA pathman; +CREATE EXTENSION pg_pathman SCHEMA pathman; +CREATE SCHEMA test; +/* + * Test RuntimeAppend + */ +create or replace function test.pathman_assert(smt bool, error_msg text) returns text as $$ +begin + if not smt then + raise exception '%', error_msg; + end if; + + return 'ok'; +end; +$$ language plpgsql; +create or replace function test.pathman_equal(a text, b text, error_msg text) returns text as $$ +begin + if a != b then + raise exception '''%'' is not equal to ''%'', %', a, b, error_msg; + end if; + + return 'equal'; +end; +$$ language plpgsql; +create or replace function test.pathman_test(query text) returns jsonb as $$ +declare + plan jsonb; +begin + execute 'explain (analyze, format json)' || query into plan; + + return plan; +end; +$$ language plpgsql; +create or replace function test.pathman_test_1() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.runtime_test_1 where id = (select * from test.run_values limit 1)'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Custom Plan Provider')::text, + '"RuntimeAppend"', + 'wrong plan provider'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Relation Name')::text, + format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(1), 6)), + 'wrong partition'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans') into num; + perform test.pathman_equal(num::text, '2', 'expected 2 child plans for custom scan'); + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; +create or replace function test.pathman_test_2() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.runtime_test_1 where id = any (select * from test.run_values limit 4)'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Nested Loop"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, + '"RuntimeAppend"', + 'wrong plan provider'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; + perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); + + for i in 0..3 loop + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->i->'Relation Name')::text, + format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), + 'wrong partition'); + + num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; + perform test.pathman_equal(num::text, '1', 'expected 1 loop'); + end loop; + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; +create or replace function test.pathman_test_3() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.runtime_test_1 a join test.run_values b on a.id = b.val'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Nested Loop"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, + '"RuntimeAppend"', + 'wrong plan provider'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; + perform test.pathman_equal(num::text, '6', 'expected 6 child plans for custom scan'); + + for i in 0..5 loop + num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; + perform test.pathman_assert(num > 0 and num <= 1718, 'expected no more than 1718 loops'); + end loop; + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; +create or replace function test.pathman_test_4() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.category c, lateral' || + '(select * from test.runtime_test_2 g where g.category_id = c.id order by rating limit 4) as tg'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Nested Loop"', + 'wrong plan type'); + + /* Limit -> Custom Scan */ + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Custom Plan Provider')::text, + '"RuntimeMergeAppend"', + 'wrong plan provider'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans') into num; + perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); + + for i in 0..3 loop + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Relation Name')::text, + format('"runtime_test_2_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), + 'wrong partition'); + + num = plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Actual Loops'; + perform test.pathman_assert(num = 1, 'expected no more than 1 loops'); + end loop; + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; +create or replace function test.pathman_test_5() returns text as $$ +declare + res record; +begin + select + from test.runtime_test_3 + where id = (select * from test.vals order by val limit 1) + limit 1 + into res; /* test empty tlist */ + + + select id, generate_series(1, 2) gen, val + from test.runtime_test_3 + where id = any (select * from test.vals order by val limit 5) + order by id, gen, val + offset 1 limit 1 + into res; /* without IndexOnlyScan */ + + perform test.pathman_equal(res.id::text, '1', 'id is incorrect (t2)'); + perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t2)'); + perform test.pathman_equal(res.val::text, 'k = 1', 'val is incorrect (t2)'); + + + select id + from test.runtime_test_3 + where id = any (select * from test.vals order by val limit 5) + order by id + offset 3 limit 1 + into res; /* with IndexOnlyScan */ + + perform test.pathman_equal(res.id::text, '4', 'id is incorrect (t3)'); + + + select v.val v1, generate_series(2, 2) gen, t.val v2 + from test.runtime_test_3 t join test.vals v on id = v.val + order by v1, gen, v2 + limit 1 + into res; + + perform test.pathman_equal(res.v1::text, '1', 'v1 is incorrect (t4)'); + perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t4)'); + perform test.pathman_equal(res.v2::text, 'k = 1', 'v2 is incorrect (t4)'); + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_hashjoin = off +set enable_mergejoin = off; +create table test.run_values as select generate_series(1, 10000) val; +create table test.runtime_test_1(id serial primary key, val real); +insert into test.runtime_test_1 select generate_series(1, 10000), random(); +select pathman.create_hash_partitions('test.runtime_test_1', 'id', 6); + create_hash_partitions +------------------------ + 6 +(1 row) + +create table test.category as (select id, 'cat' || id::text as name from generate_series(1, 4) id); +create table test.runtime_test_2 (id serial, category_id int not null, name text, rating real); +insert into test.runtime_test_2 (select id, (id % 6) + 1 as category_id, 'good' || id::text as name, random() as rating from generate_series(1, 100000) id); +create index on test.runtime_test_2 (category_id, rating); +select pathman.create_hash_partitions('test.runtime_test_2', 'category_id', 6); + create_hash_partitions +------------------------ + 6 +(1 row) + +create table test.vals as (select generate_series(1, 10000) as val); +create table test.runtime_test_3(val text, id serial not null); +insert into test.runtime_test_3(id, val) select * from generate_series(1, 10000) k, format('k = %s', k); +select pathman.create_hash_partitions('test.runtime_test_3', 'id', 4); + create_hash_partitions +------------------------ + 4 +(1 row) + +create index on test.runtime_test_3 (id); +create index on test.runtime_test_3_0 (id); +analyze test.run_values; +analyze test.runtime_test_1; +analyze test.runtime_test_2; +analyze test.runtime_test_3; +analyze test.runtime_test_3_0; +set pg_pathman.enable_runtimeappend = on; +set pg_pathman.enable_runtimemergeappend = on; +select test.pathman_test_1(); /* RuntimeAppend (select ... where id = (subquery)) */ + pathman_test_1 +---------------- + ok +(1 row) + +select test.pathman_test_2(); /* RuntimeAppend (select ... where id = any(subquery)) */ + pathman_test_2 +---------------- + ok +(1 row) + +select test.pathman_test_3(); /* RuntimeAppend (a join b on a.id = b.val) */ + pathman_test_3 +---------------- + ok +(1 row) + +select test.pathman_test_4(); /* RuntimeMergeAppend (lateral) */ + pathman_test_4 +---------------- + ok +(1 row) + +select test.pathman_test_5(); /* projection tests for RuntimeXXX nodes */ + pathman_test_5 +---------------- + ok +(1 row) + +DROP SCHEMA test CASCADE; +NOTICE: drop cascades to 30 other objects +DETAIL: drop cascades to function test.pathman_assert(boolean,text) +drop cascades to function test.pathman_equal(text,text,text) +drop cascades to function test.pathman_test(text) +drop cascades to function test.pathman_test_1() +drop cascades to function test.pathman_test_2() +drop cascades to function test.pathman_test_3() +drop cascades to function test.pathman_test_4() +drop cascades to function test.pathman_test_5() +drop cascades to table test.run_values +drop cascades to table test.runtime_test_1 +drop cascades to table test.runtime_test_1_0 +drop cascades to table test.runtime_test_1_1 +drop cascades to table test.runtime_test_1_2 +drop cascades to table test.runtime_test_1_3 +drop cascades to table test.runtime_test_1_4 +drop cascades to table test.runtime_test_1_5 +drop cascades to table test.category +drop cascades to table test.runtime_test_2 +drop cascades to table test.runtime_test_2_0 +drop cascades to table test.runtime_test_2_1 +drop cascades to table test.runtime_test_2_2 +drop cascades to table test.runtime_test_2_3 +drop cascades to table test.runtime_test_2_4 +drop cascades to table test.runtime_test_2_5 +drop cascades to table test.vals +drop cascades to table test.runtime_test_3 +drop cascades to table test.runtime_test_3_0 +drop cascades to table test.runtime_test_3_1 +drop cascades to table test.runtime_test_3_2 +drop cascades to table test.runtime_test_3_3 +DROP EXTENSION pg_pathman CASCADE; +DROP SCHEMA pathman CASCADE; diff --git a/sql/pg_pathman.sql b/sql/pathman_basic.sql similarity index 63% rename from sql/pg_pathman.sql rename to sql/pathman_basic.sql index 1b6f1c164c..2faadd426d 100644 --- a/sql/pg_pathman.sql +++ b/sql/pathman_basic.sql @@ -164,265 +164,6 @@ EXPLAIN (COSTS OFF) WITH ttt AS (SELECT * FROM test.hash_rel WHERE value = 2) SELECT * FROM ttt; - -/* - * Test RuntimeAppend - */ - -create or replace function test.pathman_assert(smt bool, error_msg text) returns text as $$ -begin - if not smt then - raise exception '%', error_msg; - end if; - - return 'ok'; -end; -$$ language plpgsql; - -create or replace function test.pathman_equal(a text, b text, error_msg text) returns text as $$ -begin - if a != b then - raise exception '''%'' is not equal to ''%'', %', a, b, error_msg; - end if; - - return 'equal'; -end; -$$ language plpgsql; - -create or replace function test.pathman_test(query text) returns jsonb as $$ -declare - plan jsonb; -begin - execute 'explain (analyze, format json)' || query into plan; - - return plan; -end; -$$ language plpgsql; - -create or replace function test.pathman_test_1() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.runtime_test_1 where id = (select * from test.run_values limit 1)'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Custom Plan Provider')::text, - '"RuntimeAppend"', - 'wrong plan provider'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Relation Name')::text, - format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(1), 6)), - 'wrong partition'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans') into num; - perform test.pathman_equal(num::text, '2', 'expected 2 child plans for custom scan'); - - return 'ok'; -end; -$$ language plpgsql; - -create or replace function test.pathman_test_2() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.runtime_test_1 where id = any (select * from test.run_values limit 4)'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Nested Loop"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, - '"RuntimeAppend"', - 'wrong plan provider'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; - perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); - - for i in 0..3 loop - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->i->'Relation Name')::text, - format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), - 'wrong partition'); - - num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; - perform test.pathman_equal(num::text, '1', 'expected 1 loop'); - end loop; - - return 'ok'; -end; -$$ language plpgsql; - -create or replace function test.pathman_test_3() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.runtime_test_1 a join test.run_values b on a.id = b.val'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Nested Loop"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, - '"RuntimeAppend"', - 'wrong plan provider'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; - perform test.pathman_equal(num::text, '6', 'expected 6 child plans for custom scan'); - - for i in 0..5 loop - num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; - perform test.pathman_assert(num > 0 and num <= 1718, 'expected no more than 1718 loops'); - end loop; - - return 'ok'; -end; -$$ language plpgsql; - -create or replace function test.pathman_test_4() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.category c, lateral' || - '(select * from test.runtime_test_2 g where g.category_id = c.id order by rating limit 4) as tg'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Nested Loop"', - 'wrong plan type'); - - /* Limit -> Custom Scan */ - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Custom Plan Provider')::text, - '"RuntimeMergeAppend"', - 'wrong plan provider'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans') into num; - perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); - - for i in 0..3 loop - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Relation Name')::text, - format('"runtime_test_2_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), - 'wrong partition'); - - num = plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Actual Loops'; - perform test.pathman_assert(num = 1, 'expected no more than 1 loops'); - end loop; - - return 'ok'; -end; -$$ language plpgsql; - -create or replace function test.pathman_test_5() returns text as $$ -declare - res record; -begin - select - from test.runtime_test_3 - where id = (select * from test.vals order by val limit 1) - limit 1 - into res; /* test empty tlist */ - - - select id, generate_series(1, 2) gen, val - from test.runtime_test_3 - where id = any (select * from test.vals order by val limit 5) - order by id, gen, val - offset 1 limit 1 - into res; /* without IndexOnlyScan */ - - perform test.pathman_equal(res.id::text, '1', 'id is incorrect (t2)'); - perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t2)'); - perform test.pathman_equal(res.val::text, 'k = 1', 'val is incorrect (t2)'); - - - select id - from test.runtime_test_3 - where id = any (select * from test.vals order by val limit 5) - order by id - offset 3 limit 1 - into res; /* with IndexOnlyScan */ - - perform test.pathman_equal(res.id::text, '4', 'id is incorrect (t3)'); - - - select v.val v1, generate_series(2, 2) gen, t.val v2 - from test.runtime_test_3 t join test.vals v on id = v.val - order by v1, gen, v2 - limit 1 - into res; - - perform test.pathman_equal(res.v1::text, '1', 'v1 is incorrect (t4)'); - perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t4)'); - perform test.pathman_equal(res.v2::text, 'k = 1', 'v2 is incorrect (t4)'); - - return 'ok'; -end; -$$ language plpgsql -set pg_pathman.enable = true -set enable_hashjoin = off -set enable_mergejoin = off; - - - -create table test.run_values as select generate_series(1, 10000) val; -create table test.runtime_test_1(id serial primary key, val real); -insert into test.runtime_test_1 select generate_series(1, 10000), random(); -select pathman.create_hash_partitions('test.runtime_test_1', 'id', 6); - -create table test.category as (select id, 'cat' || id::text as name from generate_series(1, 4) id); -create table test.runtime_test_2 (id serial, category_id int not null, name text, rating real); -insert into test.runtime_test_2 (select id, (id % 6) + 1 as category_id, 'good' || id::text as name, random() as rating from generate_series(1, 100000) id); -create index on test.runtime_test_2 (category_id, rating); -select pathman.create_hash_partitions('test.runtime_test_2', 'category_id', 6); - -create table test.vals as (select generate_series(1, 10000) as val); -create table test.runtime_test_3(val text, id serial not null); -insert into test.runtime_test_3(id, val) select * from generate_series(1, 10000) k, format('k = %s', k); -select pathman.create_hash_partitions('test.runtime_test_3', 'id', 4); -create index on test.runtime_test_3 (id); -create index on test.runtime_test_3_0 (id); - - -analyze test.run_values; -analyze test.runtime_test_1; -analyze test.runtime_test_2; -analyze test.runtime_test_3; -analyze test.runtime_test_3_0; - - -set enable_mergejoin = off; -set enable_hashjoin = off; -set pg_pathman.enable_runtimeappend = on; -set pg_pathman.enable_runtimemergeappend = on; -select test.pathman_test_1(); /* RuntimeAppend (select ... where id = (subquery)) */ -select test.pathman_test_2(); /* RuntimeAppend (select ... where id = any(subquery)) */ -select test.pathman_test_3(); /* RuntimeAppend (a join b on a.id = b.val) */ -select test.pathman_test_4(); /* RuntimeMergeAppend (lateral) */ -select test.pathman_test_5(); /* projection tests for RuntimeXXX nodes */ - -set pg_pathman.enable_runtimeappend = off; -set pg_pathman.enable_runtimemergeappend = off; -set enable_mergejoin = on; -set enable_hashjoin = on; - -drop table test.run_values, test.runtime_test_1, test.runtime_test_2, test.runtime_test_3, test.vals cascade; - /* * Test split and merge */ @@ -645,47 +386,7 @@ ALTER TABLE replies DROP CONSTRAINT replies_message_id_fkey; SELECT create_range_partitions('messages', 'id', 1, 100, 2); EXPLAIN (COSTS OFF) SELECT * FROM messages; -/* Check primary keys generation */ -CREATE TABLE test_ref(comment TEXT UNIQUE); -INSERT INTO test_ref VALUES('test'); - -CREATE TABLE test_fkey( - id INT NOT NULL, - comment TEXT, - FOREIGN KEY (comment) REFERENCES test_ref(comment)); - -INSERT INTO test_fkey SELECT generate_series(1, 1000), 'test'; - -SELECT create_range_partitions('test_fkey', 'id', 1, 100); -INSERT INTO test_fkey VALUES(1, 'wrong'); -INSERT INTO test_fkey VALUES(1, 'test'); -SELECT drop_partitions('test_fkey'); - -SELECT create_hash_partitions('test_fkey', 'id', 10); -INSERT INTO test_fkey VALUES(1, 'wrong'); -INSERT INTO test_fkey VALUES(1, 'test'); -SELECT drop_partitions('test_fkey'); - -/* Check callbacks */ -CREATE TABLE log(id serial, message text); - -CREATE OR REPLACE FUNCTION abc_on_partition_created_callback(args JSONB) -RETURNS VOID AS $$ -DECLARE - start_value TEXT := args->>'start'; - end_value TEXT := args->'end'; -BEGIN - INSERT INTO log(message) - VALUES (start_value || '-' || end_value); -END -$$ language plpgsql; - -CREATE TABLE abc(a serial, b int); -SELECT create_range_partitions('abc', 'a', 1, 100, 2); -SELECT set_part_init_callback('abc', 'abc_on_partition_created_callback'); -INSERT INTO abc VALUES (123, 1); -INSERT INTO abc VALUES (223, 1); -SELECT append_range_partition('abc'); -SELECT prepend_range_partition('abc'); -SELECT add_range_partition('abc', 401, 501); -SELECT message FROM log ORDER BY id; + +DROP SCHEMA test CASCADE; +DROP EXTENSION pg_pathman CASCADE; +DROP SCHEMA pathman CASCADE; diff --git a/sql/pathman_callbacks.sql b/sql/pathman_callbacks.sql new file mode 100644 index 0000000000..c8e12a824a --- /dev/null +++ b/sql/pathman_callbacks.sql @@ -0,0 +1,55 @@ +CREATE EXTENSION pg_pathman; +CREATE SCHEMA callbacks; + +/* Check callbacks */ +CREATE TABLE callbacks.log(id serial, message text); + +CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_range_callback( + args JSONB) +RETURNS VOID AS $$ +DECLARE + start_value TEXT := args->>'start'; + end_value TEXT := args->'end'; +BEGIN + INSERT INTO callbacks.log(message) + VALUES (start_value || '-' || end_value); +END +$$ language plpgsql; + + +CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_hash_callback( + args JSONB) +RETURNS VOID AS $$ +BEGIN + RAISE WARNING 'callback: partition %', args->'partition'; +END +$$ language plpgsql; + + +/* set callback to be called on RANGE partitions */ +CREATE TABLE callbacks.abc(a serial, b int); +SELECT create_range_partitions('callbacks.abc', 'a', 1, 100, 2); + +SELECT set_part_init_callback('callbacks.abc', + 'callbacks.abc_on_part_created_range_callback'); + +INSERT INTO callbacks.abc VALUES (123, 1); +INSERT INTO callbacks.abc VALUES (223, 1); + +SELECT append_range_partition('callbacks.abc'); +SELECT prepend_range_partition('callbacks.abc'); +SELECT add_range_partition('callbacks.abc', 401, 502); + +SELECT message FROM callbacks.log ORDER BY id; + +SELECT drop_partitions('callbacks.abc'); + + +/* set callback to be called on HASH partitions */ +SELECT set_part_init_callback('callbacks.abc', + 'callbacks.abc_on_part_created_hash_callback'); +SELECT create_hash_partitions('callbacks.abc', 'a', 5); + + +DROP SCHEMA callbacks CASCADE; +DROP EXTENSION pg_pathman CASCADE; diff --git a/sql/pathman_domains.sql b/sql/pathman_domains.sql new file mode 100644 index 0000000000..78138bcc72 --- /dev/null +++ b/sql/pathman_domains.sql @@ -0,0 +1,35 @@ +CREATE EXTENSION pg_pathman; +CREATE SCHEMA domains; + +CREATE DOMAIN domains.dom_test AS numeric CHECK (value < 1200); + +CREATE TABLE domains.dom_table(val domains.dom_test NOT NULL); +INSERT INTO domains.dom_table SELECT generate_series(1, 999); + +SELECT create_range_partitions('domains.dom_table', 'val', 1, 100); + +EXPLAIN (COSTS OFF) +SELECT * FROM domains.dom_table +WHERE val < 250; + +INSERT INTO domains.dom_table VALUES(1500); +INSERT INTO domains.dom_table VALUES(-10); + +SELECT append_range_partition('domains.dom_table'); +SELECT prepend_range_partition('domains.dom_table'); +SELECT merge_range_partitions('domains.dom_table_1', 'domains.dom_table_2'); +SELECT split_range_partition('domains.dom_table_1', 50); + +INSERT INTO domains.dom_table VALUES(1101); + +EXPLAIN (COSTS OFF) +SELECT * FROM domains.dom_table +WHERE val < 450; + + +SELECT * FROM pathman_partition_list +ORDER BY range_min::INT, range_max::INT; + + +DROP SCHEMA domains CASCADE; +DROP EXTENSION pg_pathman CASCADE; diff --git a/sql/pathman_foreign_keys.sql b/sql/pathman_foreign_keys.sql new file mode 100644 index 0000000000..9023ca5fd9 --- /dev/null +++ b/sql/pathman_foreign_keys.sql @@ -0,0 +1,27 @@ +CREATE EXTENSION pg_pathman; +CREATE SCHEMA fkeys; + +/* Check primary keys generation */ +CREATE TABLE fkeys.test_ref(comment TEXT UNIQUE); +INSERT INTO fkeys.test_ref VALUES('test'); + +CREATE TABLE fkeys.test_fkey( + id INT NOT NULL, + comment TEXT, + FOREIGN KEY (comment) REFERENCES fkeys.test_ref(comment)); + +INSERT INTO fkeys.test_fkey SELECT generate_series(1, 1000), 'test'; + +SELECT create_range_partitions('fkeys.test_fkey', 'id', 1, 100); +INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); +INSERT INTO fkeys.test_fkey VALUES(1, 'test'); +SELECT drop_partitions('fkeys.test_fkey'); + +SELECT create_hash_partitions('fkeys.test_fkey', 'id', 10); +INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); +INSERT INTO fkeys.test_fkey VALUES(1, 'test'); +SELECT drop_partitions('fkeys.test_fkey'); + + +DROP SCHEMA fkeys CASCADE; +DROP EXTENSION pg_pathman CASCADE; diff --git a/sql/pathman_runtime_nodes.sql b/sql/pathman_runtime_nodes.sql new file mode 100644 index 0000000000..5446afdefb --- /dev/null +++ b/sql/pathman_runtime_nodes.sql @@ -0,0 +1,270 @@ +CREATE SCHEMA pathman; +CREATE EXTENSION pg_pathman SCHEMA pathman; +CREATE SCHEMA test; + +/* + * Test RuntimeAppend + */ + +create or replace function test.pathman_assert(smt bool, error_msg text) returns text as $$ +begin + if not smt then + raise exception '%', error_msg; + end if; + + return 'ok'; +end; +$$ language plpgsql; + +create or replace function test.pathman_equal(a text, b text, error_msg text) returns text as $$ +begin + if a != b then + raise exception '''%'' is not equal to ''%'', %', a, b, error_msg; + end if; + + return 'equal'; +end; +$$ language plpgsql; + +create or replace function test.pathman_test(query text) returns jsonb as $$ +declare + plan jsonb; +begin + execute 'explain (analyze, format json)' || query into plan; + + return plan; +end; +$$ language plpgsql; + +create or replace function test.pathman_test_1() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.runtime_test_1 where id = (select * from test.run_values limit 1)'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Custom Plan Provider')::text, + '"RuntimeAppend"', + 'wrong plan provider'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Relation Name')::text, + format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(1), 6)), + 'wrong partition'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans') into num; + perform test.pathman_equal(num::text, '2', 'expected 2 child plans for custom scan'); + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; + +create or replace function test.pathman_test_2() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.runtime_test_1 where id = any (select * from test.run_values limit 4)'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Nested Loop"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, + '"RuntimeAppend"', + 'wrong plan provider'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; + perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); + + for i in 0..3 loop + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->i->'Relation Name')::text, + format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), + 'wrong partition'); + + num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; + perform test.pathman_equal(num::text, '1', 'expected 1 loop'); + end loop; + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; + +create or replace function test.pathman_test_3() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.runtime_test_1 a join test.run_values b on a.id = b.val'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Nested Loop"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, + '"RuntimeAppend"', + 'wrong plan provider'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; + perform test.pathman_equal(num::text, '6', 'expected 6 child plans for custom scan'); + + for i in 0..5 loop + num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; + perform test.pathman_assert(num > 0 and num <= 1718, 'expected no more than 1718 loops'); + end loop; + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; + +create or replace function test.pathman_test_4() returns text as $$ +declare + plan jsonb; + num int; +begin + plan = test.pathman_test('select * from test.category c, lateral' || + '(select * from test.runtime_test_2 g where g.category_id = c.id order by rating limit 4) as tg'); + + perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, + '"Nested Loop"', + 'wrong plan type'); + + /* Limit -> Custom Scan */ + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Node Type')::text, + '"Custom Scan"', + 'wrong plan type'); + + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Custom Plan Provider')::text, + '"RuntimeMergeAppend"', + 'wrong plan provider'); + + select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans') into num; + perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); + + for i in 0..3 loop + perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Relation Name')::text, + format('"runtime_test_2_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), + 'wrong partition'); + + num = plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Actual Loops'; + perform test.pathman_assert(num = 1, 'expected no more than 1 loops'); + end loop; + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_mergejoin = off +set enable_hashjoin = off; + +create or replace function test.pathman_test_5() returns text as $$ +declare + res record; +begin + select + from test.runtime_test_3 + where id = (select * from test.vals order by val limit 1) + limit 1 + into res; /* test empty tlist */ + + + select id, generate_series(1, 2) gen, val + from test.runtime_test_3 + where id = any (select * from test.vals order by val limit 5) + order by id, gen, val + offset 1 limit 1 + into res; /* without IndexOnlyScan */ + + perform test.pathman_equal(res.id::text, '1', 'id is incorrect (t2)'); + perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t2)'); + perform test.pathman_equal(res.val::text, 'k = 1', 'val is incorrect (t2)'); + + + select id + from test.runtime_test_3 + where id = any (select * from test.vals order by val limit 5) + order by id + offset 3 limit 1 + into res; /* with IndexOnlyScan */ + + perform test.pathman_equal(res.id::text, '4', 'id is incorrect (t3)'); + + + select v.val v1, generate_series(2, 2) gen, t.val v2 + from test.runtime_test_3 t join test.vals v on id = v.val + order by v1, gen, v2 + limit 1 + into res; + + perform test.pathman_equal(res.v1::text, '1', 'v1 is incorrect (t4)'); + perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t4)'); + perform test.pathman_equal(res.v2::text, 'k = 1', 'v2 is incorrect (t4)'); + + return 'ok'; +end; +$$ language plpgsql +set pg_pathman.enable = true +set enable_hashjoin = off +set enable_mergejoin = off; + + + +create table test.run_values as select generate_series(1, 10000) val; +create table test.runtime_test_1(id serial primary key, val real); +insert into test.runtime_test_1 select generate_series(1, 10000), random(); +select pathman.create_hash_partitions('test.runtime_test_1', 'id', 6); + +create table test.category as (select id, 'cat' || id::text as name from generate_series(1, 4) id); +create table test.runtime_test_2 (id serial, category_id int not null, name text, rating real); +insert into test.runtime_test_2 (select id, (id % 6) + 1 as category_id, 'good' || id::text as name, random() as rating from generate_series(1, 100000) id); +create index on test.runtime_test_2 (category_id, rating); +select pathman.create_hash_partitions('test.runtime_test_2', 'category_id', 6); + +create table test.vals as (select generate_series(1, 10000) as val); +create table test.runtime_test_3(val text, id serial not null); +insert into test.runtime_test_3(id, val) select * from generate_series(1, 10000) k, format('k = %s', k); +select pathman.create_hash_partitions('test.runtime_test_3', 'id', 4); +create index on test.runtime_test_3 (id); +create index on test.runtime_test_3_0 (id); + + +analyze test.run_values; +analyze test.runtime_test_1; +analyze test.runtime_test_2; +analyze test.runtime_test_3; +analyze test.runtime_test_3_0; + +set pg_pathman.enable_runtimeappend = on; +set pg_pathman.enable_runtimemergeappend = on; + +select test.pathman_test_1(); /* RuntimeAppend (select ... where id = (subquery)) */ +select test.pathman_test_2(); /* RuntimeAppend (select ... where id = any(subquery)) */ +select test.pathman_test_3(); /* RuntimeAppend (a join b on a.id = b.val) */ +select test.pathman_test_4(); /* RuntimeMergeAppend (lateral) */ +select test.pathman_test_5(); /* projection tests for RuntimeXXX nodes */ + + +DROP SCHEMA test CASCADE; +DROP EXTENSION pg_pathman CASCADE; +DROP SCHEMA pathman CASCADE; + diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 0b0e48e618..9f777c3aea 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -886,7 +886,7 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) fill_type_cmp_fmgr_info(&interval_type_cmp, base_value_type, base_atttype); if (SPI_connect() != SPI_OK_CONNECT) - elog(ERROR, "Could not connect using SPI"); + elog(ERROR, "could not connect using SPI"); /* while (value >= MAX) ... */ spawn_partitions(PrelParentRelid(prel), value, max_rvalue, @@ -964,12 +964,12 @@ create_partitions(Oid relid, Datum value, Oid value_type) } } else - elog(ERROR, "Relation \"%s\" is not partitioned by pg_pathman", + elog(ERROR, "relation \"%s\" is not partitioned by pg_pathman", get_rel_name_or_relid(relid)); /* Check that 'last_partition' is valid */ if (last_partition == InvalidOid) - elog(ERROR, "Could not create new partitions for relation \"%s\"", + elog(ERROR, "could not create new partitions for relation \"%s\"", get_rel_name_or_relid(relid)); return last_partition; diff --git a/src/utils.c b/src/utils.c index f972b9f131..ed57d0f62f 100644 --- a/src/utils.c +++ b/src/utils.c @@ -251,8 +251,8 @@ fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, Oid type2) BTORDER_PROC); if (cmp_proc_oid == InvalidOid) - elog(ERROR, "Missing comparison function for types %u & %u", - type1, type2); + elog(ERROR, "missing comparison function for types %s & %s", + format_type_be(type1), format_type_be(type2)); fmgr_info(cmp_proc_oid, finfo); @@ -649,6 +649,20 @@ get_rel_name_or_relid(Oid relid) return relname; } +/* + * Try to get opname or at least opid as cstring. + */ +char * +get_op_name_or_opid(Oid opid) +{ + char *opname = get_opname(opid); + + if (!opname) + return DatumGetCString(DirectFunctionCall1(oidout, + ObjectIdGetDatum(opid))); + return opname; +} + #if PG_VERSION_NUM < 90600 /* diff --git a/src/utils.h b/src/utils.h index 21070c7bae..1b2af2ee56 100644 --- a/src/utils.h +++ b/src/utils.h @@ -65,6 +65,8 @@ char get_rel_persistence(Oid relid); * Handy execution-stage functions. */ char * get_rel_name_or_relid(Oid relid); +char * get_op_name_or_opid(Oid opid); + Oid get_binary_operator_oid(char *opname, Oid arg1, Oid arg2); void fill_type_cmp_fmgr_info(FmgrInfo *finfo, Oid type1, From 44d88c6a4d2c792d66940c267a7c4d916b4bfdee Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 4 Oct 2016 19:42:24 +0300 Subject: [PATCH 171/184] use \set VERBOSITY terse --- expected/pathman_callbacks.out | 11 +---------- expected/pathman_domains.out | 18 +---------------- expected/pathman_foreign_keys.out | 6 +----- expected/pathman_runtime_nodes.out | 31 +----------------------------- results/pathman_callbacks.out | 11 +---------- results/pathman_domains.out | 18 +---------------- results/pathman_foreign_keys.out | 6 +----- results/pathman_runtime_nodes.out | 31 +----------------------------- sql/pathman_callbacks.sql | 2 ++ sql/pathman_domains.sql | 2 ++ sql/pathman_foreign_keys.sql | 2 ++ sql/pathman_runtime_nodes.sql | 2 ++ 12 files changed, 16 insertions(+), 124 deletions(-) diff --git a/expected/pathman_callbacks.out b/expected/pathman_callbacks.out index d168729cee..31e405e5d7 100644 --- a/expected/pathman_callbacks.out +++ b/expected/pathman_callbacks.out @@ -1,3 +1,4 @@ +\set VERBOSITY terse CREATE EXTENSION pg_pathman; CREATE SCHEMA callbacks; /* Check callbacks */ @@ -99,14 +100,4 @@ WARNING: callback: partition "abc_4" DROP SCHEMA callbacks CASCADE; NOTICE: drop cascades to 10 other objects -DETAIL: drop cascades to table callbacks.log -drop cascades to function callbacks.abc_on_part_created_range_callback(jsonb) -drop cascades to function callbacks.abc_on_part_created_hash_callback(jsonb) -drop cascades to table callbacks.abc -drop cascades to sequence callbacks.abc_seq -drop cascades to table callbacks.abc_0 -drop cascades to table callbacks.abc_1 -drop cascades to table callbacks.abc_2 -drop cascades to table callbacks.abc_3 -drop cascades to table callbacks.abc_4 DROP EXTENSION pg_pathman CASCADE; diff --git a/expected/pathman_domains.out b/expected/pathman_domains.out index 590531c400..283a6d5b83 100644 --- a/expected/pathman_domains.out +++ b/expected/pathman_domains.out @@ -1,3 +1,4 @@ +\set VERBOSITY terse CREATE EXTENSION pg_pathman; CREATE SCHEMA domains; CREATE DOMAIN domains.dom_test AS numeric CHECK (value < 1200); @@ -88,21 +89,4 @@ ORDER BY range_min::INT, range_max::INT; DROP SCHEMA domains CASCADE; NOTICE: drop cascades to 17 other objects -DETAIL: drop cascades to type domains.dom_test -drop cascades to table domains.dom_table -drop cascades to sequence domains.dom_table_seq -drop cascades to table domains.dom_table_1 -drop cascades to table domains.dom_table_3 -drop cascades to table domains.dom_table_4 -drop cascades to table domains.dom_table_5 -drop cascades to table domains.dom_table_6 -drop cascades to table domains.dom_table_7 -drop cascades to table domains.dom_table_8 -drop cascades to table domains.dom_table_9 -drop cascades to table domains.dom_table_10 -drop cascades to table domains.dom_table_11 -drop cascades to table domains.dom_table_12 -drop cascades to table domains.dom_table_13 -drop cascades to table domains.dom_table_14 -drop cascades to table domains.dom_table_15 DROP EXTENSION pg_pathman CASCADE; diff --git a/expected/pathman_foreign_keys.out b/expected/pathman_foreign_keys.out index dee09d99b8..20a4da60d4 100644 --- a/expected/pathman_foreign_keys.out +++ b/expected/pathman_foreign_keys.out @@ -1,3 +1,4 @@ +\set VERBOSITY terse CREATE EXTENSION pg_pathman; CREATE SCHEMA fkeys; /* Check primary keys generation */ @@ -17,7 +18,6 @@ NOTICE: sequence "test_fkey_seq" does not exist, skipping INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); ERROR: insert or update on table "test_fkey_1" violates foreign key constraint "test_fkey_1_comment_fkey" -DETAIL: Key (comment)=(wrong) is not present in table "test_ref". INSERT INTO fkeys.test_fkey VALUES(1, 'test'); SELECT drop_partitions('fkeys.test_fkey'); NOTICE: function fkeys.test_fkey_upd_trig_func() does not exist, skipping @@ -44,7 +44,6 @@ SELECT create_hash_partitions('fkeys.test_fkey', 'id', 10); INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); ERROR: insert or update on table "test_fkey_0" violates foreign key constraint "test_fkey_0_comment_fkey" -DETAIL: Key (comment)=(wrong) is not present in table "test_ref". INSERT INTO fkeys.test_fkey VALUES(1, 'test'); SELECT drop_partitions('fkeys.test_fkey'); NOTICE: function fkeys.test_fkey_upd_trig_func() does not exist, skipping @@ -65,7 +64,4 @@ NOTICE: 94 rows copied from fkeys.test_fkey_9 DROP SCHEMA fkeys CASCADE; NOTICE: drop cascades to 3 other objects -DETAIL: drop cascades to table fkeys.test_ref -drop cascades to table fkeys.test_fkey -drop cascades to sequence fkeys.test_fkey_seq DROP EXTENSION pg_pathman CASCADE; diff --git a/expected/pathman_runtime_nodes.out b/expected/pathman_runtime_nodes.out index cf75cfa049..98b08710e0 100644 --- a/expected/pathman_runtime_nodes.out +++ b/expected/pathman_runtime_nodes.out @@ -1,3 +1,4 @@ +\set VERBOSITY terse CREATE SCHEMA pathman; CREATE EXTENSION pg_pathman SCHEMA pathman; CREATE SCHEMA test; @@ -286,35 +287,5 @@ select test.pathman_test_5(); /* projection tests for RuntimeXXX nodes */ DROP SCHEMA test CASCADE; NOTICE: drop cascades to 30 other objects -DETAIL: drop cascades to function test.pathman_assert(boolean,text) -drop cascades to function test.pathman_equal(text,text,text) -drop cascades to function test.pathman_test(text) -drop cascades to function test.pathman_test_1() -drop cascades to function test.pathman_test_2() -drop cascades to function test.pathman_test_3() -drop cascades to function test.pathman_test_4() -drop cascades to function test.pathman_test_5() -drop cascades to table test.run_values -drop cascades to table test.runtime_test_1 -drop cascades to table test.runtime_test_1_0 -drop cascades to table test.runtime_test_1_1 -drop cascades to table test.runtime_test_1_2 -drop cascades to table test.runtime_test_1_3 -drop cascades to table test.runtime_test_1_4 -drop cascades to table test.runtime_test_1_5 -drop cascades to table test.category -drop cascades to table test.runtime_test_2 -drop cascades to table test.runtime_test_2_0 -drop cascades to table test.runtime_test_2_1 -drop cascades to table test.runtime_test_2_2 -drop cascades to table test.runtime_test_2_3 -drop cascades to table test.runtime_test_2_4 -drop cascades to table test.runtime_test_2_5 -drop cascades to table test.vals -drop cascades to table test.runtime_test_3 -drop cascades to table test.runtime_test_3_0 -drop cascades to table test.runtime_test_3_1 -drop cascades to table test.runtime_test_3_2 -drop cascades to table test.runtime_test_3_3 DROP EXTENSION pg_pathman CASCADE; DROP SCHEMA pathman CASCADE; diff --git a/results/pathman_callbacks.out b/results/pathman_callbacks.out index d168729cee..31e405e5d7 100644 --- a/results/pathman_callbacks.out +++ b/results/pathman_callbacks.out @@ -1,3 +1,4 @@ +\set VERBOSITY terse CREATE EXTENSION pg_pathman; CREATE SCHEMA callbacks; /* Check callbacks */ @@ -99,14 +100,4 @@ WARNING: callback: partition "abc_4" DROP SCHEMA callbacks CASCADE; NOTICE: drop cascades to 10 other objects -DETAIL: drop cascades to table callbacks.log -drop cascades to function callbacks.abc_on_part_created_range_callback(jsonb) -drop cascades to function callbacks.abc_on_part_created_hash_callback(jsonb) -drop cascades to table callbacks.abc -drop cascades to sequence callbacks.abc_seq -drop cascades to table callbacks.abc_0 -drop cascades to table callbacks.abc_1 -drop cascades to table callbacks.abc_2 -drop cascades to table callbacks.abc_3 -drop cascades to table callbacks.abc_4 DROP EXTENSION pg_pathman CASCADE; diff --git a/results/pathman_domains.out b/results/pathman_domains.out index 590531c400..283a6d5b83 100644 --- a/results/pathman_domains.out +++ b/results/pathman_domains.out @@ -1,3 +1,4 @@ +\set VERBOSITY terse CREATE EXTENSION pg_pathman; CREATE SCHEMA domains; CREATE DOMAIN domains.dom_test AS numeric CHECK (value < 1200); @@ -88,21 +89,4 @@ ORDER BY range_min::INT, range_max::INT; DROP SCHEMA domains CASCADE; NOTICE: drop cascades to 17 other objects -DETAIL: drop cascades to type domains.dom_test -drop cascades to table domains.dom_table -drop cascades to sequence domains.dom_table_seq -drop cascades to table domains.dom_table_1 -drop cascades to table domains.dom_table_3 -drop cascades to table domains.dom_table_4 -drop cascades to table domains.dom_table_5 -drop cascades to table domains.dom_table_6 -drop cascades to table domains.dom_table_7 -drop cascades to table domains.dom_table_8 -drop cascades to table domains.dom_table_9 -drop cascades to table domains.dom_table_10 -drop cascades to table domains.dom_table_11 -drop cascades to table domains.dom_table_12 -drop cascades to table domains.dom_table_13 -drop cascades to table domains.dom_table_14 -drop cascades to table domains.dom_table_15 DROP EXTENSION pg_pathman CASCADE; diff --git a/results/pathman_foreign_keys.out b/results/pathman_foreign_keys.out index dee09d99b8..20a4da60d4 100644 --- a/results/pathman_foreign_keys.out +++ b/results/pathman_foreign_keys.out @@ -1,3 +1,4 @@ +\set VERBOSITY terse CREATE EXTENSION pg_pathman; CREATE SCHEMA fkeys; /* Check primary keys generation */ @@ -17,7 +18,6 @@ NOTICE: sequence "test_fkey_seq" does not exist, skipping INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); ERROR: insert or update on table "test_fkey_1" violates foreign key constraint "test_fkey_1_comment_fkey" -DETAIL: Key (comment)=(wrong) is not present in table "test_ref". INSERT INTO fkeys.test_fkey VALUES(1, 'test'); SELECT drop_partitions('fkeys.test_fkey'); NOTICE: function fkeys.test_fkey_upd_trig_func() does not exist, skipping @@ -44,7 +44,6 @@ SELECT create_hash_partitions('fkeys.test_fkey', 'id', 10); INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); ERROR: insert or update on table "test_fkey_0" violates foreign key constraint "test_fkey_0_comment_fkey" -DETAIL: Key (comment)=(wrong) is not present in table "test_ref". INSERT INTO fkeys.test_fkey VALUES(1, 'test'); SELECT drop_partitions('fkeys.test_fkey'); NOTICE: function fkeys.test_fkey_upd_trig_func() does not exist, skipping @@ -65,7 +64,4 @@ NOTICE: 94 rows copied from fkeys.test_fkey_9 DROP SCHEMA fkeys CASCADE; NOTICE: drop cascades to 3 other objects -DETAIL: drop cascades to table fkeys.test_ref -drop cascades to table fkeys.test_fkey -drop cascades to sequence fkeys.test_fkey_seq DROP EXTENSION pg_pathman CASCADE; diff --git a/results/pathman_runtime_nodes.out b/results/pathman_runtime_nodes.out index cf75cfa049..98b08710e0 100644 --- a/results/pathman_runtime_nodes.out +++ b/results/pathman_runtime_nodes.out @@ -1,3 +1,4 @@ +\set VERBOSITY terse CREATE SCHEMA pathman; CREATE EXTENSION pg_pathman SCHEMA pathman; CREATE SCHEMA test; @@ -286,35 +287,5 @@ select test.pathman_test_5(); /* projection tests for RuntimeXXX nodes */ DROP SCHEMA test CASCADE; NOTICE: drop cascades to 30 other objects -DETAIL: drop cascades to function test.pathman_assert(boolean,text) -drop cascades to function test.pathman_equal(text,text,text) -drop cascades to function test.pathman_test(text) -drop cascades to function test.pathman_test_1() -drop cascades to function test.pathman_test_2() -drop cascades to function test.pathman_test_3() -drop cascades to function test.pathman_test_4() -drop cascades to function test.pathman_test_5() -drop cascades to table test.run_values -drop cascades to table test.runtime_test_1 -drop cascades to table test.runtime_test_1_0 -drop cascades to table test.runtime_test_1_1 -drop cascades to table test.runtime_test_1_2 -drop cascades to table test.runtime_test_1_3 -drop cascades to table test.runtime_test_1_4 -drop cascades to table test.runtime_test_1_5 -drop cascades to table test.category -drop cascades to table test.runtime_test_2 -drop cascades to table test.runtime_test_2_0 -drop cascades to table test.runtime_test_2_1 -drop cascades to table test.runtime_test_2_2 -drop cascades to table test.runtime_test_2_3 -drop cascades to table test.runtime_test_2_4 -drop cascades to table test.runtime_test_2_5 -drop cascades to table test.vals -drop cascades to table test.runtime_test_3 -drop cascades to table test.runtime_test_3_0 -drop cascades to table test.runtime_test_3_1 -drop cascades to table test.runtime_test_3_2 -drop cascades to table test.runtime_test_3_3 DROP EXTENSION pg_pathman CASCADE; DROP SCHEMA pathman CASCADE; diff --git a/sql/pathman_callbacks.sql b/sql/pathman_callbacks.sql index c8e12a824a..dd43a047d2 100644 --- a/sql/pathman_callbacks.sql +++ b/sql/pathman_callbacks.sql @@ -1,3 +1,5 @@ +\set VERBOSITY terse + CREATE EXTENSION pg_pathman; CREATE SCHEMA callbacks; diff --git a/sql/pathman_domains.sql b/sql/pathman_domains.sql index 78138bcc72..bc5d227e4e 100644 --- a/sql/pathman_domains.sql +++ b/sql/pathman_domains.sql @@ -1,3 +1,5 @@ +\set VERBOSITY terse + CREATE EXTENSION pg_pathman; CREATE SCHEMA domains; diff --git a/sql/pathman_foreign_keys.sql b/sql/pathman_foreign_keys.sql index 9023ca5fd9..a2032815de 100644 --- a/sql/pathman_foreign_keys.sql +++ b/sql/pathman_foreign_keys.sql @@ -1,3 +1,5 @@ +\set VERBOSITY terse + CREATE EXTENSION pg_pathman; CREATE SCHEMA fkeys; diff --git a/sql/pathman_runtime_nodes.sql b/sql/pathman_runtime_nodes.sql index 5446afdefb..517995b9be 100644 --- a/sql/pathman_runtime_nodes.sql +++ b/sql/pathman_runtime_nodes.sql @@ -1,3 +1,5 @@ +\set VERBOSITY terse + CREATE SCHEMA pathman; CREATE EXTENSION pg_pathman SCHEMA pathman; CREATE SCHEMA test; From bb05f9dd1b9840a75bdfa253c39fa0b2567a4719 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Tue, 4 Oct 2016 19:48:14 +0300 Subject: [PATCH 172/184] enable FDW tests by default --- tests/README.md | 7 +++---- tests/partitioning_test.py | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/README.md b/tests/README.md index d7f647b2cd..8d07cc445b 100644 --- a/tests/README.md +++ b/tests/README.md @@ -26,10 +26,9 @@ environment variable: export PG_CONFIG=/path/to/pg_config ``` -Tests concerning FDW features are disabled by default. To test FDW features -you need to install postgres_fdw contrib module first and then set the TEST_FDW -environment variable: +To test FDW features you need to install postgres_fdw contrib module first. +If you want to skip FDW tests set the FDW_DISABLED environment variable: ``` -export TEST_FDW=1 +export FDW_DISABLED=1 ``` diff --git a/tests/partitioning_test.py b/tests/partitioning_test.py index aacc6a415b..c71c9b9ba5 100644 --- a/tests/partitioning_test.py +++ b/tests/partitioning_test.py @@ -12,10 +12,10 @@ import os -def test_fdw(func): +def if_fdw_enabled(func): """To run tests with FDW support set environment variable TEST_FDW=1""" def wrapper(*args, **kwargs): - if os.environ.get('TEST_FDW'): + if os.environ.get('FDW_DISABLED') != '1': func(*args, **kwargs) else: print('Warning: FDW features tests are disabled, skipping...') @@ -341,7 +341,7 @@ def check_tablespace(node, tablename, tablespace): self.assertTrue(check_tablespace(node, 'abc_prepended_2', 'pg_default')) self.assertTrue(check_tablespace(node, 'abc_added_2', 'pg_default')) - @test_fdw + @if_fdw_enabled def test_foreign_table(self): """Test foreign tables""" From 69c1b6d6f8d7a11d1dea92707c81e0ecdfd7b18a Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 4 Oct 2016 23:06:28 +0300 Subject: [PATCH 173/184] purge regression results + update .gitignore, small improvements in range.sql --- .gitignore | 2 +- range.sql | 23 +- results/pathman_basic.out | 1434 ----------------------------- results/pathman_callbacks.out | 103 --- results/pathman_domains.out | 92 -- results/pathman_foreign_keys.out | 67 -- results/pathman_runtime_nodes.out | 291 ------ 7 files changed, 17 insertions(+), 1995 deletions(-) delete mode 100644 results/pathman_basic.out delete mode 100644 results/pathman_callbacks.out delete mode 100644 results/pathman_domains.out delete mode 100644 results/pathman_foreign_keys.out delete mode 100644 results/pathman_runtime_nodes.out diff --git a/.gitignore b/.gitignore index 7f9490f2be..50fb51a52f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ .deps isolation_output -results/pg_pathman.out +results/* regression.diffs regression.out *.o diff --git a/range.sql b/range.sql index 1e0f196561..f32a1c7c4c 100644 --- a/range.sql +++ b/range.sql @@ -89,6 +89,7 @@ RETURNS INTEGER AS $$ DECLARE v_rows_count INTEGER; + v_atttype REGTYPE; v_max p_start_value%TYPE; v_cur_value p_start_value%TYPE := p_start_value; p_end_value p_start_value%TYPE; @@ -128,6 +129,8 @@ BEGIN END LOOP; END IF; + v_atttype := @extschema@.get_base_type(pg_typeof(p_start_value)); + /* * In case when user doesn't want to automatically create partitions * and specifies partition count as 0 then do not check boundaries @@ -146,7 +149,7 @@ BEGIN p_attribute, p_start_value, p_end_value, - @extschema@.get_base_type(pg_typeof(p_start_value))::TEXT); + v_atttype::TEXT); END IF; /* Create sequence for child partitions names */ @@ -162,7 +165,7 @@ BEGIN LOOP EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $3::%s, tablespace:=$4)', - @extschema@.get_base_type(pg_typeof(p_start_value))::TEXT) + v_atttype::TEXT) USING parent_relid, p_start_value, @@ -831,15 +834,18 @@ RETURNS TEXT AS $$ DECLARE v_part_name TEXT; + v_atttype REGTYPE; BEGIN IF @extschema@.partitions_count(parent_relid) = 0 THEN RAISE EXCEPTION 'cannot append to empty partitions set'; END IF; + v_atttype := @extschema@.get_base_type(p_atttype); + /* We have to pass fake NULL casted to column's type */ EXECUTE format('SELECT @extschema@.get_part_range($1, -1, NULL::%s)', - @extschema@.get_base_type(p_atttype)::TEXT) + v_atttype::TEXT) USING parent_relid INTO p_range; @@ -853,7 +859,7 @@ BEGIN ELSE EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2, $2 + $3::%s, $4, $5)', - @extschema@.get_base_type(p_atttype)::TEXT) + v_atttype::TEXT) USING parent_relid, p_range[2], @@ -933,15 +939,18 @@ RETURNS TEXT AS $$ DECLARE v_part_name TEXT; + v_atttype REGTYPE; BEGIN IF @extschema@.partitions_count(parent_relid) = 0 THEN RAISE EXCEPTION 'cannot prepend to empty partitions set'; END IF; + v_atttype := @extschema@.get_base_type(p_atttype); + /* We have to pass fake NULL casted to column's type */ EXECUTE format('SELECT @extschema@.get_part_range($1, 0, NULL::%s)', - @extschema@.get_base_type(p_atttype)::TEXT) + v_atttype::TEXT) USING parent_relid INTO p_range; @@ -955,7 +964,7 @@ BEGIN ELSE EXECUTE format('SELECT @extschema@.create_single_range_partition($1, $2 - $3::%s, $2, $4, $5)', - @extschema@.get_base_type(p_atttype)::TEXT) + v_atttype::TEXT) USING parent_relid, p_range[1], @@ -1117,7 +1126,7 @@ DECLARE parent_relid REGCLASS; BEGIN - parent_relid = @extschema@.get_parent_of_partition(p_partition); + parent_relid := @extschema@.get_parent_of_partition(p_partition); /* Acquire lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); diff --git a/results/pathman_basic.out b/results/pathman_basic.out deleted file mode 100644 index 86ef91aef3..0000000000 --- a/results/pathman_basic.out +++ /dev/null @@ -1,1434 +0,0 @@ -\set VERBOSITY terse -CREATE SCHEMA pathman; -CREATE EXTENSION pg_pathman SCHEMA pathman; -CREATE SCHEMA test; -CREATE TABLE test.hash_rel ( - id SERIAL PRIMARY KEY, - value INTEGER); -INSERT INTO test.hash_rel VALUES (1, 1); -INSERT INTO test.hash_rel VALUES (2, 2); -INSERT INTO test.hash_rel VALUES (3, 3); -SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); -ERROR: partitioning key 'value' must be NOT NULL -ALTER TABLE test.hash_rel ALTER COLUMN value SET NOT NULL; -SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3, partition_data:=false); - create_hash_partitions ------------------------- - 3 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on hash_rel - -> Seq Scan on hash_rel_0 - -> Seq Scan on hash_rel_1 - -> Seq Scan on hash_rel_2 -(5 rows) - -SELECT * FROM test.hash_rel; - id | value -----+------- - 1 | 1 - 2 | 2 - 3 | 3 -(3 rows) - -SELECT pathman.set_enable_parent('test.hash_rel', false); - set_enable_parent -------------------- - -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on hash_rel_0 - -> Seq Scan on hash_rel_1 - -> Seq Scan on hash_rel_2 -(4 rows) - -SELECT * FROM test.hash_rel; - id | value -----+------- -(0 rows) - -SELECT pathman.set_enable_parent('test.hash_rel', true); - set_enable_parent -------------------- - -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on hash_rel - -> Seq Scan on hash_rel_0 - -> Seq Scan on hash_rel_1 - -> Seq Scan on hash_rel_2 -(5 rows) - -SELECT * FROM test.hash_rel; - id | value -----+------- - 1 | 1 - 2 | 2 - 3 | 3 -(3 rows) - -SELECT pathman.drop_partitions('test.hash_rel'); -NOTICE: function test.hash_rel_upd_trig_func() does not exist, skipping -NOTICE: 0 rows copied from test.hash_rel_0 -NOTICE: 0 rows copied from test.hash_rel_1 -NOTICE: 0 rows copied from test.hash_rel_2 - drop_partitions ------------------ - 3 -(1 row) - -SELECT pathman.create_hash_partitions('test.hash_rel', 'Value', 3); - create_hash_partitions ------------------------- - 3 -(1 row) - -SELECT COUNT(*) FROM test.hash_rel; - count -------- - 3 -(1 row) - -SELECT COUNT(*) FROM ONLY test.hash_rel; - count -------- - 0 -(1 row) - -INSERT INTO test.hash_rel VALUES (4, 4); -INSERT INTO test.hash_rel VALUES (5, 5); -INSERT INTO test.hash_rel VALUES (6, 6); -SELECT COUNT(*) FROM test.hash_rel; - count -------- - 6 -(1 row) - -SELECT COUNT(*) FROM ONLY test.hash_rel; - count -------- - 0 -(1 row) - -CREATE TABLE test.range_rel ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP, - txt TEXT); -CREATE INDEX ON test.range_rel (dt); -INSERT INTO test.range_rel (dt, txt) -SELECT g, md5(g::TEXT) FROM generate_series('2015-01-01', '2015-04-30', '1 day'::interval) as g; -SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DATE, '1 month'::INTERVAL, 2); -ERROR: partitioning key 'dt' must be NOT NULL -ALTER TABLE test.range_rel ALTER COLUMN dt SET NOT NULL; -SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DATE, '1 month'::INTERVAL, 2); -ERROR: not enough partitions to fit all values of 'dt' -SELECT pathman.create_range_partitions('test.range_rel', 'DT', '2015-01-01'::DATE, '1 month'::INTERVAL); -NOTICE: sequence "range_rel_seq" does not exist, skipping - create_range_partitions -------------------------- - 4 -(1 row) - -SELECT COUNT(*) FROM test.range_rel; - count -------- - 120 -(1 row) - -SELECT COUNT(*) FROM ONLY test.range_rel; - count -------- - 0 -(1 row) - -CREATE TABLE test.num_range_rel ( - id SERIAL PRIMARY KEY, - txt TEXT); -SELECT pathman.create_range_partitions('test.num_range_rel', 'id', 0, 1000, 4); -NOTICE: sequence "num_range_rel_seq" does not exist, skipping - create_range_partitions -------------------------- - 4 -(1 row) - -SELECT COUNT(*) FROM test.num_range_rel; - count -------- - 0 -(1 row) - -SELECT COUNT(*) FROM ONLY test.num_range_rel; - count -------- - 0 -(1 row) - -INSERT INTO test.num_range_rel - SELECT g, md5(g::TEXT) FROM generate_series(1, 3000) as g; -SELECT COUNT(*) FROM test.num_range_rel; - count -------- - 3000 -(1 row) - -SELECT COUNT(*) FROM ONLY test.num_range_rel; - count -------- - 0 -(1 row) - -SELECT * FROM ONLY test.range_rel UNION SELECT * FROM test.range_rel; -ERROR: It is prohibited to query partitioned tables both with and without ONLY modifier -SET pg_pathman.enable_runtimeappend = OFF; -SET pg_pathman.enable_runtimemergeappend = OFF; -VACUUM; -/* update triggers test */ -SELECT pathman.create_hash_update_trigger('test.hash_rel'); - create_hash_update_trigger ------------------------------ - test.hash_rel_upd_trig_func -(1 row) - -UPDATE test.hash_rel SET value = 7 WHERE value = 6; -EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value = 7; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on hash_rel_1 - Filter: (value = 7) -(3 rows) - -SELECT * FROM test.hash_rel WHERE value = 7; - id | value -----+------- - 6 | 7 -(1 row) - -SELECT pathman.create_range_update_trigger('test.num_range_rel'); - create_range_update_trigger ----------------------------------- - test.num_range_rel_upd_trig_func -(1 row) - -UPDATE test.num_range_rel SET id = 3001 WHERE id = 1; -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id = 3001; - QUERY PLAN ------------------------------------ - Append - -> Seq Scan on num_range_rel_4 - Filter: (id = 3001) -(3 rows) - -SELECT * FROM test.num_range_rel WHERE id = 3001; - id | txt -------+---------------------------------- - 3001 | c4ca4238a0b923820dcc509a6f75849b -(1 row) - -SET enable_indexscan = OFF; -SET enable_bitmapscan = OFF; -SET enable_seqscan = ON; -EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on hash_rel_0 - -> Seq Scan on hash_rel_1 - -> Seq Scan on hash_rel_2 -(4 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value = 2; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on hash_rel_1 - Filter: (value = 2) -(3 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value = 2 OR value = 1; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on hash_rel_1 - Filter: (value = 2) - -> Seq Scan on hash_rel_2 - Filter: (value = 1) -(5 rows) - --- Temporarily commented out --- EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value BETWEEN 1 AND 2; --- QUERY PLAN --- ------------------------------------------------- --- Append --- -> Seq Scan on hash_rel_1 --- Filter: ((value >= 1) AND (value <= 2)) --- -> Seq Scan on hash_rel_2 --- Filter: ((value >= 1) AND (value <= 2)) --- (5 rows) -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id > 2500; - QUERY PLAN ------------------------------------ - Append - -> Seq Scan on num_range_rel_3 - Filter: (id > 2500) - -> Seq Scan on num_range_rel_4 -(4 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 1000 AND id < 3000; - QUERY PLAN ------------------------------------ - Append - -> Seq Scan on num_range_rel_2 - -> Seq Scan on num_range_rel_3 -(3 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 1500 AND id < 2500; - QUERY PLAN ------------------------------------ - Append - -> Seq Scan on num_range_rel_2 - Filter: (id >= 1500) - -> Seq Scan on num_range_rel_3 - Filter: (id < 2500) -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE (id >= 500 AND id < 1500) OR (id > 2500); - QUERY PLAN ------------------------------------ - Append - -> Seq Scan on num_range_rel_1 - Filter: (id >= 500) - -> Seq Scan on num_range_rel_2 - Filter: (id < 1500) - -> Seq Scan on num_range_rel_3 - Filter: (id > 2500) - -> Seq Scan on num_range_rel_4 -(8 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt > '2015-02-15'; - QUERY PLAN --------------------------------------------------------------------------------- - Append - -> Seq Scan on range_rel_2 - Filter: (dt > 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) - -> Seq Scan on range_rel_3 - -> Seq Scan on range_rel_4 -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt >= '2015-02-01' AND dt < '2015-03-01'; - QUERY PLAN -------------------------------- - Append - -> Seq Scan on range_rel_2 -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt >= '2015-02-15' AND dt < '2015-03-15'; - QUERY PLAN ---------------------------------------------------------------------------------- - Append - -> Seq Scan on range_rel_2 - Filter: (dt >= 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) - -> Seq Scan on range_rel_3 - Filter: (dt < 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE (dt >= '2015-01-15' AND dt < '2015-02-15') OR (dt > '2015-03-15'); - QUERY PLAN ---------------------------------------------------------------------------------- - Append - -> Seq Scan on range_rel_1 - Filter: (dt >= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) - -> Seq Scan on range_rel_2 - Filter: (dt < 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) - -> Seq Scan on range_rel_3 - Filter: (dt > 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) - -> Seq Scan on range_rel_4 -(8 rows) - -SET enable_indexscan = ON; -SET enable_bitmapscan = OFF; -SET enable_seqscan = OFF; -EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on hash_rel_0 - -> Seq Scan on hash_rel_1 - -> Seq Scan on hash_rel_2 -(4 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value = 2; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on hash_rel_1 - Filter: (value = 2) -(3 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.hash_rel WHERE value = 2 OR value = 1; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on hash_rel_1 - Filter: (value = 2) - -> Seq Scan on hash_rel_2 - Filter: (value = 1) -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id > 2500; - QUERY PLAN ----------------------------------------------------------------- - Append - -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 - Index Cond: (id > 2500) - -> Seq Scan on num_range_rel_4 -(4 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 1000 AND id < 3000; - QUERY PLAN ------------------------------------ - Append - -> Seq Scan on num_range_rel_2 - -> Seq Scan on num_range_rel_3 -(3 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 1500 AND id < 2500; - QUERY PLAN ----------------------------------------------------------------- - Append - -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 - Index Cond: (id >= 1500) - -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 - Index Cond: (id < 2500) -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE (id >= 500 AND id < 1500) OR (id > 2500); - QUERY PLAN ----------------------------------------------------------------- - Append - -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 - Index Cond: (id >= 500) - -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 - Index Cond: (id < 1500) - -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 - Index Cond: (id > 2500) - -> Seq Scan on num_range_rel_4 -(8 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel ORDER BY id; - QUERY PLAN ----------------------------------------------------------------- - Append - -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 - -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 - -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 - -> Index Scan using num_range_rel_4_pkey on num_range_rel_4 -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id <= 2500 ORDER BY id; - QUERY PLAN ----------------------------------------------------------------- - Append - -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 - -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 - -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 - Index Cond: (id <= 2500) -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt > '2015-02-15'; - QUERY PLAN ------------------------------------------------------------------------------------- - Append - -> Index Scan using range_rel_2_dt_idx on range_rel_2 - Index Cond: (dt > 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) - -> Seq Scan on range_rel_3 - -> Seq Scan on range_rel_4 -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt >= '2015-02-01' AND dt < '2015-03-01'; - QUERY PLAN -------------------------------- - Append - -> Seq Scan on range_rel_2 -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt >= '2015-02-15' AND dt < '2015-03-15'; - QUERY PLAN -------------------------------------------------------------------------------------- - Append - -> Index Scan using range_rel_2_dt_idx on range_rel_2 - Index Cond: (dt >= 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) - -> Index Scan using range_rel_3_dt_idx on range_rel_3 - Index Cond: (dt < 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE (dt >= '2015-01-15' AND dt < '2015-02-15') OR (dt > '2015-03-15'); - QUERY PLAN -------------------------------------------------------------------------------------- - Append - -> Index Scan using range_rel_1_dt_idx on range_rel_1 - Index Cond: (dt >= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) - -> Index Scan using range_rel_2_dt_idx on range_rel_2 - Index Cond: (dt < 'Sun Feb 15 00:00:00 2015'::timestamp without time zone) - -> Index Scan using range_rel_3_dt_idx on range_rel_3 - Index Cond: (dt > 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) - -> Seq Scan on range_rel_4 -(8 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel ORDER BY dt; - QUERY PLAN ----------------------------------------------------------- - Append - -> Index Scan using range_rel_1_dt_idx on range_rel_1 - -> Index Scan using range_rel_2_dt_idx on range_rel_2 - -> Index Scan using range_rel_3_dt_idx on range_rel_3 - -> Index Scan using range_rel_4_dt_idx on range_rel_4 -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt >= '2015-01-15' ORDER BY dt DESC; - QUERY PLAN -------------------------------------------------------------------------------------- - Append - -> Index Scan Backward using range_rel_4_dt_idx on range_rel_4 - -> Index Scan Backward using range_rel_3_dt_idx on range_rel_3 - -> Index Scan Backward using range_rel_2_dt_idx on range_rel_2 - -> Index Scan Backward using range_rel_1_dt_idx on range_rel_1 - Index Cond: (dt >= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) -(6 rows) - -/* - * Sorting - */ -SET enable_indexscan = OFF; -SET enable_seqscan = ON; -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt < '2015-03-01' ORDER BY dt; - QUERY PLAN -------------------------------------- - Sort - Sort Key: range_rel_1.dt - -> Append - -> Seq Scan on range_rel_1 - -> Seq Scan on range_rel_2 -(5 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel_1 UNION ALL SELECT * FROM test.range_rel_2 ORDER BY dt; - QUERY PLAN -------------------------------------- - Sort - Sort Key: range_rel_1.dt - -> Append - -> Seq Scan on range_rel_1 - -> Seq Scan on range_rel_2 -(5 rows) - -SET enable_indexscan = ON; -SET enable_seqscan = OFF; -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt < '2015-03-01' ORDER BY dt; - QUERY PLAN ----------------------------------------------------------- - Append - -> Index Scan using range_rel_1_dt_idx on range_rel_1 - -> Index Scan using range_rel_2_dt_idx on range_rel_2 -(3 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel_1 UNION ALL SELECT * FROM test.range_rel_2 ORDER BY dt; - QUERY PLAN ----------------------------------------------------------- - Merge Append - Sort Key: range_rel_1.dt - -> Index Scan using range_rel_1_dt_idx on range_rel_1 - -> Index Scan using range_rel_2_dt_idx on range_rel_2 -(4 rows) - -/* - * Join - */ -SET enable_hashjoin = OFF; -set enable_nestloop = OFF; -SET enable_mergejoin = ON; -EXPLAIN (COSTS OFF) -SELECT * FROM test.range_rel j1 -JOIN test.range_rel j2 on j2.id = j1.id -JOIN test.num_range_rel j3 on j3.id = j1.id -WHERE j1.dt < '2015-03-01' AND j2.dt >= '2015-02-01' ORDER BY j2.dt; - QUERY PLAN -------------------------------------------------------------------------------------------- - Sort - Sort Key: j2.dt - -> Merge Join - Merge Cond: (j3.id = j2.id) - -> Append - -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 j3 - -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 j3_1 - -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 j3_2 - -> Index Scan using num_range_rel_4_pkey on num_range_rel_4 j3_3 - -> Materialize - -> Merge Join - Merge Cond: (j2.id = j1.id) - -> Merge Append - Sort Key: j2.id - -> Index Scan using range_rel_2_pkey on range_rel_2 j2 - -> Index Scan using range_rel_3_pkey on range_rel_3 j2_1 - -> Index Scan using range_rel_4_pkey on range_rel_4 j2_2 - -> Materialize - -> Merge Append - Sort Key: j1.id - -> Index Scan using range_rel_1_pkey on range_rel_1 j1 - -> Index Scan using range_rel_2_pkey on range_rel_2 j1_1 -(22 rows) - -SET enable_hashjoin = ON; -SET enable_mergejoin = OFF; -EXPLAIN (COSTS OFF) -SELECT * FROM test.range_rel j1 -JOIN test.range_rel j2 on j2.id = j1.id -JOIN test.num_range_rel j3 on j3.id = j1.id -WHERE j1.dt < '2015-03-01' AND j2.dt >= '2015-02-01' ORDER BY j2.dt; - QUERY PLAN -------------------------------------------------------------------------------------------- - Sort - Sort Key: j2.dt - -> Hash Join - Hash Cond: (j3.id = j2.id) - -> Append - -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 j3 - -> Index Scan using num_range_rel_2_pkey on num_range_rel_2 j3_1 - -> Index Scan using num_range_rel_3_pkey on num_range_rel_3 j3_2 - -> Index Scan using num_range_rel_4_pkey on num_range_rel_4 j3_3 - -> Hash - -> Hash Join - Hash Cond: (j2.id = j1.id) - -> Append - -> Index Scan using range_rel_2_dt_idx on range_rel_2 j2 - -> Index Scan using range_rel_3_dt_idx on range_rel_3 j2_1 - -> Index Scan using range_rel_4_dt_idx on range_rel_4 j2_2 - -> Hash - -> Append - -> Index Scan using range_rel_1_pkey on range_rel_1 j1 - -> Index Scan using range_rel_2_pkey on range_rel_2 j1_1 -(20 rows) - -/* - * Test CTE query - */ -EXPLAIN (COSTS OFF) - WITH ttt AS (SELECT * FROM test.range_rel WHERE dt >= '2015-02-01' AND dt < '2015-03-15') -SELECT * FROM ttt; - QUERY PLAN --------------------------------------------------------------------------------------------- - CTE Scan on ttt - CTE ttt - -> Append - -> Seq Scan on range_rel_2 - -> Index Scan using range_rel_3_dt_idx on range_rel_3 - Index Cond: (dt < 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) -(6 rows) - -EXPLAIN (COSTS OFF) - WITH ttt AS (SELECT * FROM test.hash_rel WHERE value = 2) -SELECT * FROM ttt; - QUERY PLAN --------------------------------------- - CTE Scan on ttt - CTE ttt - -> Append - -> Seq Scan on hash_rel_1 - Filter: (value = 2) -(5 rows) - -/* - * Test split and merge - */ -/* Split first partition in half */ -SELECT pathman.split_range_partition('test.num_range_rel_1', 500); - split_range_partition ------------------------ - {0,1000} -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id BETWEEN 100 AND 700; - QUERY PLAN ----------------------------------------------------------------- - Append - -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 - Index Cond: (id >= 100) - -> Index Scan using num_range_rel_5_pkey on num_range_rel_5 - Index Cond: (id <= 700) -(5 rows) - -SELECT pathman.split_range_partition('test.range_rel_1', '2015-01-15'::DATE); - split_range_partition -------------------------- - {01-01-2015,02-01-2015} -(1 row) - -/* Merge two partitions into one */ -SELECT pathman.merge_range_partitions('test.num_range_rel_1', 'test.num_range_rel_' || currval('test.num_range_rel_seq')); - merge_range_partitions ------------------------- - -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id BETWEEN 100 AND 700; - QUERY PLAN ----------------------------------------------------------------- - Append - -> Index Scan using num_range_rel_1_pkey on num_range_rel_1 - Index Cond: ((id >= 100) AND (id <= 700)) -(3 rows) - -SELECT pathman.merge_range_partitions('test.range_rel_1', 'test.range_rel_' || currval('test.range_rel_seq')); - merge_range_partitions ------------------------- - -(1 row) - -/* Append and prepend partitions */ -SELECT pathman.append_range_partition('test.num_range_rel'); - append_range_partition ------------------------- - test.num_range_rel_6 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id >= 4000; - QUERY PLAN ------------------------------------ - Append - -> Seq Scan on num_range_rel_6 -(2 rows) - -SELECT pathman.prepend_range_partition('test.num_range_rel'); - prepend_range_partition -------------------------- - test.num_range_rel_7 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.num_range_rel WHERE id < 0; - QUERY PLAN ------------------------------------ - Append - -> Seq Scan on num_range_rel_7 -(2 rows) - -SELECT pathman.drop_range_partition('test.num_range_rel_7'); - drop_range_partition ----------------------- - test.num_range_rel_7 -(1 row) - -SELECT pathman.append_range_partition('test.range_rel'); - append_range_partition ------------------------- - test.range_rel_6 -(1 row) - -SELECT pathman.prepend_range_partition('test.range_rel'); - prepend_range_partition -------------------------- - test.range_rel_7 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-12-15' AND '2015-01-15'; - QUERY PLAN -------------------------------------------------------------------------------------- - Append - -> Index Scan using range_rel_7_dt_idx on range_rel_7 - Index Cond: (dt >= 'Mon Dec 15 00:00:00 2014'::timestamp without time zone) - -> Index Scan using range_rel_1_dt_idx on range_rel_1 - Index Cond: (dt <= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) -(5 rows) - -SELECT pathman.drop_range_partition('test.range_rel_7'); - drop_range_partition ----------------------- - test.range_rel_7 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-12-15' AND '2015-01-15'; - QUERY PLAN -------------------------------------------------------------------------------------- - Append - -> Index Scan using range_rel_1_dt_idx on range_rel_1 - Index Cond: (dt <= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) -(3 rows) - -SELECT pathman.add_range_partition('test.range_rel', '2014-12-01'::DATE, '2015-01-02'::DATE); -ERROR: specified range overlaps with existing partitions -SELECT pathman.add_range_partition('test.range_rel', '2014-12-01'::DATE, '2015-01-01'::DATE); - add_range_partition ---------------------- - test.range_rel_8 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-12-15' AND '2015-01-15'; - QUERY PLAN -------------------------------------------------------------------------------------- - Append - -> Index Scan using range_rel_8_dt_idx on range_rel_8 - Index Cond: (dt >= 'Mon Dec 15 00:00:00 2014'::timestamp without time zone) - -> Index Scan using range_rel_1_dt_idx on range_rel_1 - Index Cond: (dt <= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) -(5 rows) - -CREATE TABLE test.range_rel_archive (LIKE test.range_rel INCLUDING ALL); -SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_archive', '2014-01-01'::DATE, '2015-01-01'::DATE); -ERROR: specified range overlaps with existing partitions -SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_archive', '2014-01-01'::DATE, '2014-12-01'::DATE); - attach_range_partition ------------------------- - test.range_rel_archive -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-11-15' AND '2015-01-15'; - QUERY PLAN -------------------------------------------------------------------------------------- - Append - -> Index Scan using range_rel_archive_dt_idx on range_rel_archive - Index Cond: (dt >= 'Sat Nov 15 00:00:00 2014'::timestamp without time zone) - -> Seq Scan on range_rel_8 - -> Index Scan using range_rel_1_dt_idx on range_rel_1 - Index Cond: (dt <= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) -(6 rows) - -SELECT pathman.detach_range_partition('test.range_rel_archive'); - detach_range_partition ------------------------- - test.range_rel_archive -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt BETWEEN '2014-11-15' AND '2015-01-15'; - QUERY PLAN -------------------------------------------------------------------------------------- - Append - -> Seq Scan on range_rel_8 - -> Index Scan using range_rel_1_dt_idx on range_rel_1 - Index Cond: (dt <= 'Thu Jan 15 00:00:00 2015'::timestamp without time zone) -(4 rows) - -CREATE TABLE test.range_rel_test1 ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP, - txt TEXT, - abc INTEGER); -SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test1', '2013-01-01'::DATE, '2014-01-01'::DATE); -ERROR: partition must have the exact same structure as parent -CREATE TABLE test.range_rel_test2 ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP); -SELECT pathman.attach_range_partition('test.range_rel', 'test.range_rel_test2', '2013-01-01'::DATE, '2014-01-01'::DATE); -ERROR: partition must have the exact same structure as parent -/* - * Zero partitions count and adding partitions with specified name - */ -CREATE TABLE test.zero( - id SERIAL PRIMARY KEY, - value INT NOT NULL); -INSERT INTO test.zero SELECT g, g FROM generate_series(1, 100) as g; -SELECT pathman.create_range_partitions('test.zero', 'value', 50, 10, 0); -NOTICE: sequence "zero_seq" does not exist, skipping - create_range_partitions -------------------------- - 0 -(1 row) - -SELECT pathman.append_range_partition('test.zero', 'test.zero_0'); -ERROR: cannot append to empty partitions set -SELECT pathman.prepend_range_partition('test.zero', 'test.zero_1'); -ERROR: cannot prepend to empty partitions set -SELECT pathman.add_range_partition('test.zero', 50, 70, 'test.zero_50'); - add_range_partition ---------------------- - test.zero_50 -(1 row) - -SELECT pathman.append_range_partition('test.zero', 'test.zero_appended'); - append_range_partition ------------------------- - test.zero_appended -(1 row) - -SELECT pathman.prepend_range_partition('test.zero', 'test.zero_prepended'); - prepend_range_partition -------------------------- - test.zero_prepended -(1 row) - -SELECT pathman.split_range_partition('test.zero_50', 60, 'test.zero_60'); - split_range_partition ------------------------ - {50,70} -(1 row) - -DROP TABLE test.zero CASCADE; -NOTICE: drop cascades to 4 other objects -/* - * Check that altering table columns doesn't break trigger - */ -ALTER TABLE test.hash_rel ADD COLUMN abc int; -INSERT INTO test.hash_rel (id, value, abc) VALUES (123, 456, 789); -SELECT * FROM test.hash_rel WHERE id = 123; - id | value | abc ------+-------+----- - 123 | 456 | 789 -(1 row) - -/* - * Clean up - */ -SELECT pathman.drop_partitions('test.hash_rel'); -NOTICE: drop cascades to 3 other objects -NOTICE: 2 rows copied from test.hash_rel_2 -NOTICE: 3 rows copied from test.hash_rel_1 -NOTICE: 2 rows copied from test.hash_rel_0 - drop_partitions ------------------ - 3 -(1 row) - -SELECT COUNT(*) FROM ONLY test.hash_rel; - count -------- - 7 -(1 row) - -SELECT pathman.create_hash_partitions('test.hash_rel', 'value', 3); - create_hash_partitions ------------------------- - 3 -(1 row) - -SELECT pathman.drop_partitions('test.hash_rel', TRUE); -NOTICE: function test.hash_rel_upd_trig_func() does not exist, skipping - drop_partitions ------------------ - 3 -(1 row) - -SELECT COUNT(*) FROM ONLY test.hash_rel; - count -------- - 0 -(1 row) - -DROP TABLE test.hash_rel CASCADE; -SELECT pathman.drop_partitions('test.num_range_rel'); -NOTICE: drop cascades to 4 other objects -NOTICE: 0 rows copied from test.num_range_rel_6 -NOTICE: 2 rows copied from test.num_range_rel_4 -NOTICE: 1000 rows copied from test.num_range_rel_3 -NOTICE: 1000 rows copied from test.num_range_rel_2 -NOTICE: 998 rows copied from test.num_range_rel_1 - drop_partitions ------------------ - 5 -(1 row) - -DROP TABLE test.num_range_rel CASCADE; -DROP TABLE test.range_rel CASCADE; -NOTICE: drop cascades to 7 other objects -/* Test automatic partition creation */ -CREATE TABLE test.range_rel ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP NOT NULL); -SELECT pathman.create_range_partitions('test.range_rel', 'dt', '2015-01-01'::DATE, '10 days'::INTERVAL, 1); - create_range_partitions -------------------------- - 1 -(1 row) - -INSERT INTO test.range_rel (dt) -SELECT generate_series('2015-01-01', '2015-04-30', '1 day'::interval); -INSERT INTO test.range_rel (dt) -SELECT generate_series('2014-12-31', '2014-12-01', '-1 day'::interval); -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt = '2014-12-15'; - QUERY PLAN --------------------------------------------------------------------------------- - Append - -> Seq Scan on range_rel_14 - Filter: (dt = 'Mon Dec 15 00:00:00 2014'::timestamp without time zone) -(3 rows) - -SELECT * FROM test.range_rel WHERE dt = '2014-12-15'; - id | dt ------+-------------------------- - 137 | Mon Dec 15 00:00:00 2014 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; - QUERY PLAN --------------------------------------------------------------------------------- - Append - -> Seq Scan on range_rel_8 - Filter: (dt = 'Sun Mar 15 00:00:00 2015'::timestamp without time zone) -(3 rows) - -SELECT * FROM test.range_rel WHERE dt = '2015-03-15'; - id | dt -----+-------------------------- - 74 | Sun Mar 15 00:00:00 2015 -(1 row) - -SELECT pathman.set_auto('test.range_rel', false); - set_auto ----------- - -(1 row) - -INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); -ERROR: no suitable partition for key 'Mon Jun 01 00:00:00 2015' -SELECT pathman.set_auto('test.range_rel', true); - set_auto ----------- - -(1 row) - -INSERT INTO test.range_rel (dt) VALUES ('2015-06-01'); -DROP TABLE test.range_rel CASCADE; -NOTICE: drop cascades to 20 other objects -SELECT * FROM pathman.pathman_config; - partrel | attname | parttype | range_interval ----------+---------+----------+---------------- -(0 rows) - -/* Check overlaps */ -CREATE TABLE test.num_range_rel ( - id SERIAL PRIMARY KEY, - txt TEXT); -SELECT pathman.create_range_partitions('test.num_range_rel', 'id', 1000, 1000, 4); - create_range_partitions -------------------------- - 4 -(1 row) - -SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 4001, 5000); - check_overlap ---------------- - t -(1 row) - -SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 4000, 5000); - check_overlap ---------------- - t -(1 row) - -SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 3999, 5000); - check_overlap ---------------- - t -(1 row) - -SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 3000, 3500); - check_overlap ---------------- - t -(1 row) - -SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 0, 999); - check_overlap ---------------- - f -(1 row) - -SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 0, 1000); - check_overlap ---------------- - f -(1 row) - -SELECT pathman.check_overlap('test.num_range_rel'::regclass::oid, 0, 1001); - check_overlap ---------------- - t -(1 row) - -/* CaMeL cAsE table names and attributes */ -CREATE TABLE test."TeSt" (a INT NOT NULL, b INT); -SELECT pathman.create_hash_partitions('test.TeSt', 'a', 3); -ERROR: relation "test.test" does not exist at character 39 -SELECT pathman.create_hash_partitions('test."TeSt"', 'a', 3); - create_hash_partitions ------------------------- - 3 -(1 row) - -INSERT INTO test."TeSt" VALUES (1, 1); -INSERT INTO test."TeSt" VALUES (2, 2); -INSERT INTO test."TeSt" VALUES (3, 3); -SELECT * FROM test."TeSt"; - a | b ----+--- - 3 | 3 - 2 | 2 - 1 | 1 -(3 rows) - -SELECT pathman.create_hash_update_trigger('test."TeSt"'); - create_hash_update_trigger ----------------------------- - test."TeSt_upd_trig_func" -(1 row) - -UPDATE test."TeSt" SET a = 1; -SELECT * FROM test."TeSt"; - a | b ----+--- - 1 | 3 - 1 | 2 - 1 | 1 -(3 rows) - -SELECT * FROM test."TeSt" WHERE a = 1; - a | b ----+--- - 1 | 3 - 1 | 2 - 1 | 1 -(3 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test."TeSt" WHERE a = 1; - QUERY PLAN ----------------------------- - Append - -> Seq Scan on "TeSt_2" - Filter: (a = 1) -(3 rows) - -SELECT pathman.drop_partitions('test."TeSt"'); -NOTICE: drop cascades to 3 other objects -NOTICE: 3 rows copied from test."TeSt_2" -NOTICE: 0 rows copied from test."TeSt_1" -NOTICE: 0 rows copied from test."TeSt_0" - drop_partitions ------------------ - 3 -(1 row) - -SELECT * FROM test."TeSt"; - a | b ----+--- - 1 | 3 - 1 | 2 - 1 | 1 -(3 rows) - -CREATE TABLE test."RangeRel" ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP NOT NULL, - txt TEXT); -INSERT INTO test."RangeRel" (dt, txt) -SELECT g, md5(g::TEXT) FROM generate_series('2015-01-01', '2015-01-03', '1 day'::interval) as g; -SELECT pathman.create_range_partitions('test."RangeRel"', 'dt', '2015-01-01'::DATE, '1 day'::INTERVAL); -NOTICE: sequence "RangeRel_seq" does not exist, skipping - create_range_partitions -------------------------- - 3 -(1 row) - -SELECT pathman.append_range_partition('test."RangeRel"'); - append_range_partition ------------------------- - test."RangeRel_4" -(1 row) - -SELECT pathman.prepend_range_partition('test."RangeRel"'); - prepend_range_partition -------------------------- - test."RangeRel_5" -(1 row) - -SELECT pathman.merge_range_partitions('test."RangeRel_1"', 'test."RangeRel_' || currval('test."RangeRel_seq"') || '"'); - merge_range_partitions ------------------------- - -(1 row) - -SELECT pathman.split_range_partition('test."RangeRel_1"', '2015-01-01'::DATE); - split_range_partition -------------------------- - {12-31-2014,01-02-2015} -(1 row) - -SELECT pathman.drop_partitions('test."RangeRel"'); -NOTICE: function test.RangeRel_upd_trig_func() does not exist, skipping -NOTICE: 1 rows copied from test."RangeRel_6" -NOTICE: 0 rows copied from test."RangeRel_4" -NOTICE: 1 rows copied from test."RangeRel_3" -NOTICE: 1 rows copied from test."RangeRel_2" -NOTICE: 0 rows copied from test."RangeRel_1" - drop_partitions ------------------ - 5 -(1 row) - -SELECT pathman.create_partitions_from_range('test."RangeRel"', 'dt', '2015-01-01'::DATE, '2015-01-05'::DATE, '1 day'::INTERVAL); - create_partitions_from_range ------------------------------- - 5 -(1 row) - -DROP TABLE test."RangeRel" CASCADE; -NOTICE: drop cascades to 5 other objects -SELECT * FROM pathman.pathman_config; - partrel | attname | parttype | range_interval ---------------------+---------+----------+---------------- - test.num_range_rel | id | 2 | 1000 -(1 row) - -CREATE TABLE test."RangeRel" ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP NOT NULL, - txt TEXT); -SELECT pathman.create_range_partitions('test."RangeRel"', 'id', 1, 100, 3); - create_range_partitions -------------------------- - 3 -(1 row) - -SELECT pathman.drop_partitions('test."RangeRel"'); -NOTICE: function test.RangeRel_upd_trig_func() does not exist, skipping -NOTICE: 0 rows copied from test."RangeRel_3" -NOTICE: 0 rows copied from test."RangeRel_2" -NOTICE: 0 rows copied from test."RangeRel_1" - drop_partitions ------------------ - 3 -(1 row) - -SELECT pathman.create_partitions_from_range('test."RangeRel"', 'id', 1, 300, 100); - create_partitions_from_range ------------------------------- - 3 -(1 row) - -DROP TABLE test."RangeRel" CASCADE; -NOTICE: drop cascades to 3 other objects -DROP EXTENSION pg_pathman; -/* Test that everithing works fine without schemas */ -CREATE EXTENSION pg_pathman; -/* Hash */ -CREATE TABLE hash_rel ( - id SERIAL PRIMARY KEY, - value INTEGER NOT NULL); -INSERT INTO hash_rel (value) SELECT g FROM generate_series(1, 10000) as g; -SELECT create_hash_partitions('hash_rel', 'value', 3); - create_hash_partitions ------------------------- - 3 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM hash_rel WHERE id = 1234; - QUERY PLAN ------------------------------------------------------- - Append - -> Index Scan using hash_rel_0_pkey on hash_rel_0 - Index Cond: (id = 1234) - -> Index Scan using hash_rel_1_pkey on hash_rel_1 - Index Cond: (id = 1234) - -> Index Scan using hash_rel_2_pkey on hash_rel_2 - Index Cond: (id = 1234) -(7 rows) - -/* Range */ -CREATE TABLE range_rel ( - id SERIAL PRIMARY KEY, - dt TIMESTAMP NOT NULL, - value INTEGER); -INSERT INTO range_rel (dt, value) SELECT g, extract(day from g) FROM generate_series('2010-01-01'::date, '2010-12-31'::date, '1 day') as g; -SELECT create_range_partitions('range_rel', 'dt', '2010-01-01'::date, '1 month'::interval, 12); -NOTICE: sequence "range_rel_seq" does not exist, skipping - create_range_partitions -------------------------- - 12 -(1 row) - -SELECT merge_range_partitions('range_rel_1', 'range_rel_2'); - merge_range_partitions ------------------------- - -(1 row) - -SELECT split_range_partition('range_rel_1', '2010-02-15'::date); - split_range_partition -------------------------- - {01-01-2010,03-01-2010} -(1 row) - -SELECT append_range_partition('range_rel'); - append_range_partition ------------------------- - public.range_rel_14 -(1 row) - -SELECT prepend_range_partition('range_rel'); - prepend_range_partition -------------------------- - public.range_rel_15 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM range_rel WHERE dt < '2010-03-01'; - QUERY PLAN --------------------------------- - Append - -> Seq Scan on range_rel_15 - -> Seq Scan on range_rel_1 - -> Seq Scan on range_rel_13 -(4 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM range_rel WHERE dt > '2010-12-15'; - QUERY PLAN --------------------------------------------------------------------------------- - Append - -> Seq Scan on range_rel_12 - Filter: (dt > 'Wed Dec 15 00:00:00 2010'::timestamp without time zone) - -> Seq Scan on range_rel_14 -(4 rows) - -/* Temporary table for JOINs */ -CREATE TABLE tmp (id INTEGER NOT NULL, value INTEGER NOT NULL); -INSERT INTO tmp VALUES (1, 1), (2, 2); -/* Test UPDATE and DELETE */ -EXPLAIN (COSTS OFF) UPDATE range_rel SET value = 111 WHERE dt = '2010-06-15'; - QUERY PLAN --------------------------------------------------------------------------------- - Update on range_rel_6 - -> Seq Scan on range_rel_6 - Filter: (dt = 'Tue Jun 15 00:00:00 2010'::timestamp without time zone) -(3 rows) - -UPDATE range_rel SET value = 111 WHERE dt = '2010-06-15'; -SELECT * FROM range_rel WHERE dt = '2010-06-15'; - id | dt | value ------+--------------------------+------- - 166 | Tue Jun 15 00:00:00 2010 | 111 -(1 row) - -EXPLAIN (COSTS OFF) DELETE FROM range_rel WHERE dt = '2010-06-15'; - QUERY PLAN --------------------------------------------------------------------------------- - Delete on range_rel_6 - -> Seq Scan on range_rel_6 - Filter: (dt = 'Tue Jun 15 00:00:00 2010'::timestamp without time zone) -(3 rows) - -DELETE FROM range_rel WHERE dt = '2010-06-15'; -SELECT * FROM range_rel WHERE dt = '2010-06-15'; - id | dt | value -----+----+------- -(0 rows) - -EXPLAIN (COSTS OFF) UPDATE range_rel r SET value = t.value FROM tmp t WHERE r.dt = '2010-01-01' AND r.id = t.id; - QUERY PLAN --------------------------------------------------------------------------------------------- - Update on range_rel_1 r - -> Hash Join - Hash Cond: (t.id = r.id) - -> Seq Scan on tmp t - -> Hash - -> Index Scan using range_rel_1_pkey on range_rel_1 r - Filter: (dt = 'Fri Jan 01 00:00:00 2010'::timestamp without time zone) -(7 rows) - -UPDATE range_rel r SET value = t.value FROM tmp t WHERE r.dt = '2010-01-01' AND r.id = t.id; -EXPLAIN (COSTS OFF) DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02' AND r.id = t.id; - QUERY PLAN --------------------------------------------------------------------------------------------- - Delete on range_rel_1 r - -> Hash Join - Hash Cond: (t.id = r.id) - -> Seq Scan on tmp t - -> Hash - -> Index Scan using range_rel_1_pkey on range_rel_1 r - Filter: (dt = 'Sat Jan 02 00:00:00 2010'::timestamp without time zone) -(7 rows) - -DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02' AND r.id = t.id; -/* Create range partitions from whole range */ -SELECT drop_partitions('range_rel'); -NOTICE: function public.range_rel_upd_trig_func() does not exist, skipping -NOTICE: 0 rows copied from range_rel_15 -NOTICE: 0 rows copied from range_rel_14 -NOTICE: 14 rows copied from range_rel_13 -NOTICE: 31 rows copied from range_rel_12 -NOTICE: 30 rows copied from range_rel_11 -NOTICE: 31 rows copied from range_rel_10 -NOTICE: 30 rows copied from range_rel_9 -NOTICE: 31 rows copied from range_rel_8 -NOTICE: 31 rows copied from range_rel_7 -NOTICE: 29 rows copied from range_rel_6 -NOTICE: 31 rows copied from range_rel_5 -NOTICE: 30 rows copied from range_rel_4 -NOTICE: 31 rows copied from range_rel_3 -NOTICE: 44 rows copied from range_rel_1 - drop_partitions ------------------ - 14 -(1 row) - -SELECT create_partitions_from_range('range_rel', 'id', 1, 1000, 100); - create_partitions_from_range ------------------------------- - 10 -(1 row) - -SELECT drop_partitions('range_rel', TRUE); -NOTICE: function public.range_rel_upd_trig_func() does not exist, skipping - drop_partitions ------------------ - 10 -(1 row) - -SELECT create_partitions_from_range('range_rel', 'dt', '2015-01-01'::date, '2015-12-01'::date, '1 month'::interval); - create_partitions_from_range ------------------------------- - 12 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM range_rel WHERE dt = '2015-12-15'; - QUERY PLAN --------------------------------------------------------------------------------- - Append - -> Seq Scan on range_rel_12 - Filter: (dt = 'Tue Dec 15 00:00:00 2015'::timestamp without time zone) -(3 rows) - -CREATE TABLE messages(id SERIAL PRIMARY KEY, msg TEXT); -CREATE TABLE replies(id SERIAL PRIMARY KEY, message_id INTEGER REFERENCES messages(id), msg TEXT); -INSERT INTO messages SELECT g, md5(g::text) FROM generate_series(1, 10) as g; -INSERT INTO replies SELECT g, g, md5(g::text) FROM generate_series(1, 10) as g; -SELECT create_range_partitions('messages', 'id', 1, 100, 2); -WARNING: foreign key 'replies_message_id_fkey' references relation 'messages' -ERROR: relation "messages" is referenced from other relations -ALTER TABLE replies DROP CONSTRAINT replies_message_id_fkey; -SELECT create_range_partitions('messages', 'id', 1, 100, 2); -NOTICE: sequence "messages_seq" does not exist, skipping - create_range_partitions -------------------------- - 2 -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM messages; - QUERY PLAN ------------------------------- - Append - -> Seq Scan on messages_1 - -> Seq Scan on messages_2 -(3 rows) - -DROP SCHEMA test CASCADE; -NOTICE: drop cascades to 13 other objects -DROP EXTENSION pg_pathman CASCADE; -NOTICE: drop cascades to 3 other objects -DROP SCHEMA pathman CASCADE; diff --git a/results/pathman_callbacks.out b/results/pathman_callbacks.out deleted file mode 100644 index 31e405e5d7..0000000000 --- a/results/pathman_callbacks.out +++ /dev/null @@ -1,103 +0,0 @@ -\set VERBOSITY terse -CREATE EXTENSION pg_pathman; -CREATE SCHEMA callbacks; -/* Check callbacks */ -CREATE TABLE callbacks.log(id serial, message text); -CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_range_callback( - args JSONB) -RETURNS VOID AS $$ -DECLARE - start_value TEXT := args->>'start'; - end_value TEXT := args->'end'; -BEGIN - INSERT INTO callbacks.log(message) - VALUES (start_value || '-' || end_value); -END -$$ language plpgsql; -CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_hash_callback( - args JSONB) -RETURNS VOID AS $$ -BEGIN - RAISE WARNING 'callback: partition %', args->'partition'; -END -$$ language plpgsql; -/* set callback to be called on RANGE partitions */ -CREATE TABLE callbacks.abc(a serial, b int); -SELECT create_range_partitions('callbacks.abc', 'a', 1, 100, 2); -NOTICE: sequence "abc_seq" does not exist, skipping - create_range_partitions -------------------------- - 2 -(1 row) - -SELECT set_part_init_callback('callbacks.abc', - 'callbacks.abc_on_part_created_range_callback'); - set_part_init_callback ------------------------- - -(1 row) - -INSERT INTO callbacks.abc VALUES (123, 1); -INSERT INTO callbacks.abc VALUES (223, 1); -SELECT append_range_partition('callbacks.abc'); - append_range_partition ------------------------- - callbacks.abc_4 -(1 row) - -SELECT prepend_range_partition('callbacks.abc'); - prepend_range_partition -------------------------- - callbacks.abc_5 -(1 row) - -SELECT add_range_partition('callbacks.abc', 401, 502); - add_range_partition ---------------------- - callbacks.abc_6 -(1 row) - -SELECT message FROM callbacks.log ORDER BY id; - message ------------ - 201-"301" - 301-"401" - -99-"1" - 401-"502" -(4 rows) - -SELECT drop_partitions('callbacks.abc'); -NOTICE: function callbacks.abc_upd_trig_func() does not exist, skipping -NOTICE: 0 rows copied from callbacks.abc_1 -NOTICE: 1 rows copied from callbacks.abc_2 -NOTICE: 1 rows copied from callbacks.abc_3 -NOTICE: 0 rows copied from callbacks.abc_4 -NOTICE: 0 rows copied from callbacks.abc_5 -NOTICE: 0 rows copied from callbacks.abc_6 - drop_partitions ------------------ - 6 -(1 row) - -/* set callback to be called on HASH partitions */ -SELECT set_part_init_callback('callbacks.abc', - 'callbacks.abc_on_part_created_hash_callback'); - set_part_init_callback ------------------------- - -(1 row) - -SELECT create_hash_partitions('callbacks.abc', 'a', 5); -WARNING: callback: partition "abc_0" -WARNING: callback: partition "abc_1" -WARNING: callback: partition "abc_2" -WARNING: callback: partition "abc_3" -WARNING: callback: partition "abc_4" - create_hash_partitions ------------------------- - 5 -(1 row) - -DROP SCHEMA callbacks CASCADE; -NOTICE: drop cascades to 10 other objects -DROP EXTENSION pg_pathman CASCADE; diff --git a/results/pathman_domains.out b/results/pathman_domains.out deleted file mode 100644 index 283a6d5b83..0000000000 --- a/results/pathman_domains.out +++ /dev/null @@ -1,92 +0,0 @@ -\set VERBOSITY terse -CREATE EXTENSION pg_pathman; -CREATE SCHEMA domains; -CREATE DOMAIN domains.dom_test AS numeric CHECK (value < 1200); -CREATE TABLE domains.dom_table(val domains.dom_test NOT NULL); -INSERT INTO domains.dom_table SELECT generate_series(1, 999); -SELECT create_range_partitions('domains.dom_table', 'val', 1, 100); -NOTICE: sequence "dom_table_seq" does not exist, skipping - create_range_partitions -------------------------- - 10 -(1 row) - -EXPLAIN (COSTS OFF) -SELECT * FROM domains.dom_table -WHERE val < 250; - QUERY PLAN ---------------------------------------------------- - Append - -> Seq Scan on dom_table_1 - -> Seq Scan on dom_table_2 - -> Seq Scan on dom_table_3 - Filter: ((val)::numeric < '250'::numeric) -(5 rows) - -INSERT INTO domains.dom_table VALUES(1500); -ERROR: value for domain domains.dom_test violates check constraint "dom_test_check" -INSERT INTO domains.dom_table VALUES(-10); -SELECT append_range_partition('domains.dom_table'); - append_range_partition ------------------------- - domains.dom_table_12 -(1 row) - -SELECT prepend_range_partition('domains.dom_table'); - prepend_range_partition -------------------------- - domains.dom_table_13 -(1 row) - -SELECT merge_range_partitions('domains.dom_table_1', 'domains.dom_table_2'); - merge_range_partitions ------------------------- - -(1 row) - -SELECT split_range_partition('domains.dom_table_1', 50); - split_range_partition ------------------------ - {1,201} -(1 row) - -INSERT INTO domains.dom_table VALUES(1101); -EXPLAIN (COSTS OFF) -SELECT * FROM domains.dom_table -WHERE val < 450; - QUERY PLAN ---------------------------------------------------- - Append - -> Seq Scan on dom_table_13 - -> Seq Scan on dom_table_11 - -> Seq Scan on dom_table_1 - -> Seq Scan on dom_table_14 - -> Seq Scan on dom_table_3 - -> Seq Scan on dom_table_4 - -> Seq Scan on dom_table_5 - Filter: ((val)::numeric < '450'::numeric) -(9 rows) - -SELECT * FROM pathman_partition_list -ORDER BY range_min::INT, range_max::INT; - parent | partition | parttype | partattr | range_min | range_max --------------------+----------------------+----------+----------+-----------+----------- - domains.dom_table | domains.dom_table_13 | 2 | val | -199 | -99 - domains.dom_table | domains.dom_table_11 | 2 | val | -99 | 1 - domains.dom_table | domains.dom_table_1 | 2 | val | 1 | 50 - domains.dom_table | domains.dom_table_14 | 2 | val | 50 | 201 - domains.dom_table | domains.dom_table_3 | 2 | val | 201 | 301 - domains.dom_table | domains.dom_table_4 | 2 | val | 301 | 401 - domains.dom_table | domains.dom_table_5 | 2 | val | 401 | 501 - domains.dom_table | domains.dom_table_6 | 2 | val | 501 | 601 - domains.dom_table | domains.dom_table_7 | 2 | val | 601 | 701 - domains.dom_table | domains.dom_table_8 | 2 | val | 701 | 801 - domains.dom_table | domains.dom_table_9 | 2 | val | 801 | 901 - domains.dom_table | domains.dom_table_10 | 2 | val | 901 | 1001 - domains.dom_table | domains.dom_table_12 | 2 | val | 1001 | 1101 - domains.dom_table | domains.dom_table_15 | 2 | val | 1101 | 1201 -(14 rows) - -DROP SCHEMA domains CASCADE; -NOTICE: drop cascades to 17 other objects -DROP EXTENSION pg_pathman CASCADE; diff --git a/results/pathman_foreign_keys.out b/results/pathman_foreign_keys.out deleted file mode 100644 index 20a4da60d4..0000000000 --- a/results/pathman_foreign_keys.out +++ /dev/null @@ -1,67 +0,0 @@ -\set VERBOSITY terse -CREATE EXTENSION pg_pathman; -CREATE SCHEMA fkeys; -/* Check primary keys generation */ -CREATE TABLE fkeys.test_ref(comment TEXT UNIQUE); -INSERT INTO fkeys.test_ref VALUES('test'); -CREATE TABLE fkeys.test_fkey( - id INT NOT NULL, - comment TEXT, - FOREIGN KEY (comment) REFERENCES fkeys.test_ref(comment)); -INSERT INTO fkeys.test_fkey SELECT generate_series(1, 1000), 'test'; -SELECT create_range_partitions('fkeys.test_fkey', 'id', 1, 100); -NOTICE: sequence "test_fkey_seq" does not exist, skipping - create_range_partitions -------------------------- - 10 -(1 row) - -INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); -ERROR: insert or update on table "test_fkey_1" violates foreign key constraint "test_fkey_1_comment_fkey" -INSERT INTO fkeys.test_fkey VALUES(1, 'test'); -SELECT drop_partitions('fkeys.test_fkey'); -NOTICE: function fkeys.test_fkey_upd_trig_func() does not exist, skipping -NOTICE: 101 rows copied from fkeys.test_fkey_1 -NOTICE: 100 rows copied from fkeys.test_fkey_2 -NOTICE: 100 rows copied from fkeys.test_fkey_3 -NOTICE: 100 rows copied from fkeys.test_fkey_4 -NOTICE: 100 rows copied from fkeys.test_fkey_5 -NOTICE: 100 rows copied from fkeys.test_fkey_6 -NOTICE: 100 rows copied from fkeys.test_fkey_7 -NOTICE: 100 rows copied from fkeys.test_fkey_8 -NOTICE: 100 rows copied from fkeys.test_fkey_9 -NOTICE: 100 rows copied from fkeys.test_fkey_10 - drop_partitions ------------------ - 10 -(1 row) - -SELECT create_hash_partitions('fkeys.test_fkey', 'id', 10); - create_hash_partitions ------------------------- - 10 -(1 row) - -INSERT INTO fkeys.test_fkey VALUES(1, 'wrong'); -ERROR: insert or update on table "test_fkey_0" violates foreign key constraint "test_fkey_0_comment_fkey" -INSERT INTO fkeys.test_fkey VALUES(1, 'test'); -SELECT drop_partitions('fkeys.test_fkey'); -NOTICE: function fkeys.test_fkey_upd_trig_func() does not exist, skipping -NOTICE: 100 rows copied from fkeys.test_fkey_0 -NOTICE: 90 rows copied from fkeys.test_fkey_1 -NOTICE: 90 rows copied from fkeys.test_fkey_2 -NOTICE: 116 rows copied from fkeys.test_fkey_3 -NOTICE: 101 rows copied from fkeys.test_fkey_4 -NOTICE: 90 rows copied from fkeys.test_fkey_5 -NOTICE: 95 rows copied from fkeys.test_fkey_6 -NOTICE: 118 rows copied from fkeys.test_fkey_7 -NOTICE: 108 rows copied from fkeys.test_fkey_8 -NOTICE: 94 rows copied from fkeys.test_fkey_9 - drop_partitions ------------------ - 10 -(1 row) - -DROP SCHEMA fkeys CASCADE; -NOTICE: drop cascades to 3 other objects -DROP EXTENSION pg_pathman CASCADE; diff --git a/results/pathman_runtime_nodes.out b/results/pathman_runtime_nodes.out deleted file mode 100644 index 98b08710e0..0000000000 --- a/results/pathman_runtime_nodes.out +++ /dev/null @@ -1,291 +0,0 @@ -\set VERBOSITY terse -CREATE SCHEMA pathman; -CREATE EXTENSION pg_pathman SCHEMA pathman; -CREATE SCHEMA test; -/* - * Test RuntimeAppend - */ -create or replace function test.pathman_assert(smt bool, error_msg text) returns text as $$ -begin - if not smt then - raise exception '%', error_msg; - end if; - - return 'ok'; -end; -$$ language plpgsql; -create or replace function test.pathman_equal(a text, b text, error_msg text) returns text as $$ -begin - if a != b then - raise exception '''%'' is not equal to ''%'', %', a, b, error_msg; - end if; - - return 'equal'; -end; -$$ language plpgsql; -create or replace function test.pathman_test(query text) returns jsonb as $$ -declare - plan jsonb; -begin - execute 'explain (analyze, format json)' || query into plan; - - return plan; -end; -$$ language plpgsql; -create or replace function test.pathman_test_1() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.runtime_test_1 where id = (select * from test.run_values limit 1)'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Custom Plan Provider')::text, - '"RuntimeAppend"', - 'wrong plan provider'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Relation Name')::text, - format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(1), 6)), - 'wrong partition'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans') into num; - perform test.pathman_equal(num::text, '2', 'expected 2 child plans for custom scan'); - - return 'ok'; -end; -$$ language plpgsql -set pg_pathman.enable = true -set enable_mergejoin = off -set enable_hashjoin = off; -create or replace function test.pathman_test_2() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.runtime_test_1 where id = any (select * from test.run_values limit 4)'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Nested Loop"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, - '"RuntimeAppend"', - 'wrong plan provider'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; - perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); - - for i in 0..3 loop - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->i->'Relation Name')::text, - format('"runtime_test_1_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), - 'wrong partition'); - - num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; - perform test.pathman_equal(num::text, '1', 'expected 1 loop'); - end loop; - - return 'ok'; -end; -$$ language plpgsql -set pg_pathman.enable = true -set enable_mergejoin = off -set enable_hashjoin = off; -create or replace function test.pathman_test_3() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.runtime_test_1 a join test.run_values b on a.id = b.val'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Nested Loop"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Custom Plan Provider')::text, - '"RuntimeAppend"', - 'wrong plan provider'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans') into num; - perform test.pathman_equal(num::text, '6', 'expected 6 child plans for custom scan'); - - for i in 0..5 loop - num = plan->0->'Plan'->'Plans'->1->'Plans'->i->'Actual Loops'; - perform test.pathman_assert(num > 0 and num <= 1718, 'expected no more than 1718 loops'); - end loop; - - return 'ok'; -end; -$$ language plpgsql -set pg_pathman.enable = true -set enable_mergejoin = off -set enable_hashjoin = off; -create or replace function test.pathman_test_4() returns text as $$ -declare - plan jsonb; - num int; -begin - plan = test.pathman_test('select * from test.category c, lateral' || - '(select * from test.runtime_test_2 g where g.category_id = c.id order by rating limit 4) as tg'); - - perform test.pathman_equal((plan->0->'Plan'->'Node Type')::text, - '"Nested Loop"', - 'wrong plan type'); - - /* Limit -> Custom Scan */ - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Node Type')::text, - '"Custom Scan"', - 'wrong plan type'); - - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->0->'Custom Plan Provider')::text, - '"RuntimeMergeAppend"', - 'wrong plan provider'); - - select count(*) from jsonb_array_elements_text(plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans') into num; - perform test.pathman_equal(num::text, '4', 'expected 4 child plans for custom scan'); - - for i in 0..3 loop - perform test.pathman_equal((plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Relation Name')::text, - format('"runtime_test_2_%s"', pathman.get_hash_part_idx(hashint4(i + 1), 6)), - 'wrong partition'); - - num = plan->0->'Plan'->'Plans'->1->'Plans'->0->'Plans'->i->'Actual Loops'; - perform test.pathman_assert(num = 1, 'expected no more than 1 loops'); - end loop; - - return 'ok'; -end; -$$ language plpgsql -set pg_pathman.enable = true -set enable_mergejoin = off -set enable_hashjoin = off; -create or replace function test.pathman_test_5() returns text as $$ -declare - res record; -begin - select - from test.runtime_test_3 - where id = (select * from test.vals order by val limit 1) - limit 1 - into res; /* test empty tlist */ - - - select id, generate_series(1, 2) gen, val - from test.runtime_test_3 - where id = any (select * from test.vals order by val limit 5) - order by id, gen, val - offset 1 limit 1 - into res; /* without IndexOnlyScan */ - - perform test.pathman_equal(res.id::text, '1', 'id is incorrect (t2)'); - perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t2)'); - perform test.pathman_equal(res.val::text, 'k = 1', 'val is incorrect (t2)'); - - - select id - from test.runtime_test_3 - where id = any (select * from test.vals order by val limit 5) - order by id - offset 3 limit 1 - into res; /* with IndexOnlyScan */ - - perform test.pathman_equal(res.id::text, '4', 'id is incorrect (t3)'); - - - select v.val v1, generate_series(2, 2) gen, t.val v2 - from test.runtime_test_3 t join test.vals v on id = v.val - order by v1, gen, v2 - limit 1 - into res; - - perform test.pathman_equal(res.v1::text, '1', 'v1 is incorrect (t4)'); - perform test.pathman_equal(res.gen::text, '2', 'gen is incorrect (t4)'); - perform test.pathman_equal(res.v2::text, 'k = 1', 'v2 is incorrect (t4)'); - - return 'ok'; -end; -$$ language plpgsql -set pg_pathman.enable = true -set enable_hashjoin = off -set enable_mergejoin = off; -create table test.run_values as select generate_series(1, 10000) val; -create table test.runtime_test_1(id serial primary key, val real); -insert into test.runtime_test_1 select generate_series(1, 10000), random(); -select pathman.create_hash_partitions('test.runtime_test_1', 'id', 6); - create_hash_partitions ------------------------- - 6 -(1 row) - -create table test.category as (select id, 'cat' || id::text as name from generate_series(1, 4) id); -create table test.runtime_test_2 (id serial, category_id int not null, name text, rating real); -insert into test.runtime_test_2 (select id, (id % 6) + 1 as category_id, 'good' || id::text as name, random() as rating from generate_series(1, 100000) id); -create index on test.runtime_test_2 (category_id, rating); -select pathman.create_hash_partitions('test.runtime_test_2', 'category_id', 6); - create_hash_partitions ------------------------- - 6 -(1 row) - -create table test.vals as (select generate_series(1, 10000) as val); -create table test.runtime_test_3(val text, id serial not null); -insert into test.runtime_test_3(id, val) select * from generate_series(1, 10000) k, format('k = %s', k); -select pathman.create_hash_partitions('test.runtime_test_3', 'id', 4); - create_hash_partitions ------------------------- - 4 -(1 row) - -create index on test.runtime_test_3 (id); -create index on test.runtime_test_3_0 (id); -analyze test.run_values; -analyze test.runtime_test_1; -analyze test.runtime_test_2; -analyze test.runtime_test_3; -analyze test.runtime_test_3_0; -set pg_pathman.enable_runtimeappend = on; -set pg_pathman.enable_runtimemergeappend = on; -select test.pathman_test_1(); /* RuntimeAppend (select ... where id = (subquery)) */ - pathman_test_1 ----------------- - ok -(1 row) - -select test.pathman_test_2(); /* RuntimeAppend (select ... where id = any(subquery)) */ - pathman_test_2 ----------------- - ok -(1 row) - -select test.pathman_test_3(); /* RuntimeAppend (a join b on a.id = b.val) */ - pathman_test_3 ----------------- - ok -(1 row) - -select test.pathman_test_4(); /* RuntimeMergeAppend (lateral) */ - pathman_test_4 ----------------- - ok -(1 row) - -select test.pathman_test_5(); /* projection tests for RuntimeXXX nodes */ - pathman_test_5 ----------------- - ok -(1 row) - -DROP SCHEMA test CASCADE; -NOTICE: drop cascades to 30 other objects -DROP EXTENSION pg_pathman CASCADE; -DROP SCHEMA pathman CASCADE; From c9f9831631070fa702481ebaf00a480d1b5732ad Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 5 Oct 2016 00:33:46 +0300 Subject: [PATCH 174/184] fix partition creation callback invocation --- expected/pathman_callbacks.out | 44 ++++++++++------------------------ sql/pathman_callbacks.sql | 24 ++++--------------- src/pathman_workers.c | 26 ++++++++++---------- src/pl_funcs.c | 37 +++++++++++++++++----------- src/relation_info.c | 14 +++++++---- src/relation_info.h | 2 +- 6 files changed, 62 insertions(+), 85 deletions(-) diff --git a/expected/pathman_callbacks.out b/expected/pathman_callbacks.out index 31e405e5d7..d310c3ef43 100644 --- a/expected/pathman_callbacks.out +++ b/expected/pathman_callbacks.out @@ -2,23 +2,11 @@ CREATE EXTENSION pg_pathman; CREATE SCHEMA callbacks; /* Check callbacks */ -CREATE TABLE callbacks.log(id serial, message text); -CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_range_callback( +CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_callback( args JSONB) RETURNS VOID AS $$ -DECLARE - start_value TEXT := args->>'start'; - end_value TEXT := args->'end'; BEGIN - INSERT INTO callbacks.log(message) - VALUES (start_value || '-' || end_value); -END -$$ language plpgsql; -CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_hash_callback( - args JSONB) -RETURNS VOID AS $$ -BEGIN - RAISE WARNING 'callback: partition %', args->'partition'; + RAISE WARNING 'callback arg: %', args::TEXT; END $$ language plpgsql; /* set callback to be called on RANGE partitions */ @@ -31,7 +19,7 @@ NOTICE: sequence "abc_seq" does not exist, skipping (1 row) SELECT set_part_init_callback('callbacks.abc', - 'callbacks.abc_on_part_created_range_callback'); + 'callbacks.abc_on_part_created_callback'); set_part_init_callback ------------------------ @@ -40,32 +28,26 @@ SELECT set_part_init_callback('callbacks.abc', INSERT INTO callbacks.abc VALUES (123, 1); INSERT INTO callbacks.abc VALUES (223, 1); SELECT append_range_partition('callbacks.abc'); +WARNING: callback arg: {"parent": "abc", "parttype": "2", "partition": "abc_4", "range_max": "401", "range_min": "301"} append_range_partition ------------------------ callbacks.abc_4 (1 row) SELECT prepend_range_partition('callbacks.abc'); +WARNING: callback arg: {"parent": "abc", "parttype": "2", "partition": "abc_5", "range_max": "1", "range_min": "-99"} prepend_range_partition ------------------------- callbacks.abc_5 (1 row) SELECT add_range_partition('callbacks.abc', 401, 502); +WARNING: callback arg: {"parent": "abc", "parttype": "2", "partition": "abc_6", "range_max": "502", "range_min": "401"} add_range_partition --------------------- callbacks.abc_6 (1 row) -SELECT message FROM callbacks.log ORDER BY id; - message ------------ - 201-"301" - 301-"401" - -99-"1" - 401-"502" -(4 rows) - SELECT drop_partitions('callbacks.abc'); NOTICE: function callbacks.abc_upd_trig_func() does not exist, skipping NOTICE: 0 rows copied from callbacks.abc_1 @@ -81,23 +63,23 @@ NOTICE: 0 rows copied from callbacks.abc_6 /* set callback to be called on HASH partitions */ SELECT set_part_init_callback('callbacks.abc', - 'callbacks.abc_on_part_created_hash_callback'); + 'callbacks.abc_on_part_created_callback'); set_part_init_callback ------------------------ (1 row) SELECT create_hash_partitions('callbacks.abc', 'a', 5); -WARNING: callback: partition "abc_0" -WARNING: callback: partition "abc_1" -WARNING: callback: partition "abc_2" -WARNING: callback: partition "abc_3" -WARNING: callback: partition "abc_4" +WARNING: callback arg: {"parent": "abc", "parttype": "1", "partition": "abc_0"} +WARNING: callback arg: {"parent": "abc", "parttype": "1", "partition": "abc_1"} +WARNING: callback arg: {"parent": "abc", "parttype": "1", "partition": "abc_2"} +WARNING: callback arg: {"parent": "abc", "parttype": "1", "partition": "abc_3"} +WARNING: callback arg: {"parent": "abc", "parttype": "1", "partition": "abc_4"} create_hash_partitions ------------------------ 5 (1 row) DROP SCHEMA callbacks CASCADE; -NOTICE: drop cascades to 10 other objects +NOTICE: drop cascades to 8 other objects DROP EXTENSION pg_pathman CASCADE; diff --git a/sql/pathman_callbacks.sql b/sql/pathman_callbacks.sql index dd43a047d2..c5581979bf 100644 --- a/sql/pathman_callbacks.sql +++ b/sql/pathman_callbacks.sql @@ -4,26 +4,12 @@ CREATE EXTENSION pg_pathman; CREATE SCHEMA callbacks; /* Check callbacks */ -CREATE TABLE callbacks.log(id serial, message text); -CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_range_callback( +CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_callback( args JSONB) RETURNS VOID AS $$ -DECLARE - start_value TEXT := args->>'start'; - end_value TEXT := args->'end'; BEGIN - INSERT INTO callbacks.log(message) - VALUES (start_value || '-' || end_value); -END -$$ language plpgsql; - - -CREATE OR REPLACE FUNCTION callbacks.abc_on_part_created_hash_callback( - args JSONB) -RETURNS VOID AS $$ -BEGIN - RAISE WARNING 'callback: partition %', args->'partition'; + RAISE WARNING 'callback arg: %', args::TEXT; END $$ language plpgsql; @@ -33,7 +19,7 @@ CREATE TABLE callbacks.abc(a serial, b int); SELECT create_range_partitions('callbacks.abc', 'a', 1, 100, 2); SELECT set_part_init_callback('callbacks.abc', - 'callbacks.abc_on_part_created_range_callback'); + 'callbacks.abc_on_part_created_callback'); INSERT INTO callbacks.abc VALUES (123, 1); INSERT INTO callbacks.abc VALUES (223, 1); @@ -42,14 +28,12 @@ SELECT append_range_partition('callbacks.abc'); SELECT prepend_range_partition('callbacks.abc'); SELECT add_range_partition('callbacks.abc', 401, 502); -SELECT message FROM callbacks.log ORDER BY id; - SELECT drop_partitions('callbacks.abc'); /* set callback to be called on HASH partitions */ SELECT set_part_init_callback('callbacks.abc', - 'callbacks.abc_on_part_created_hash_callback'); + 'callbacks.abc_on_part_created_callback'); SELECT create_hash_partitions('callbacks.abc', 'a', 5); diff --git a/src/pathman_workers.c b/src/pathman_workers.c index c038dea976..e845319283 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -488,7 +488,7 @@ bgw_main_concurrent_part(Datum main_arg) failures_count++; ereport(LOG, (errmsg("%s: %s", concurrent_part_bgw, error->message), - errdetail("Attempt: %d/%d, sleep time: %s", + errdetail("attempt: %d/%d, sleep time: %s", failures_count, PART_WORKER_MAX_ATTEMPTS, sleep_time_str))); @@ -507,9 +507,9 @@ bgw_main_concurrent_part(Datum main_arg) cps_set_status(part_slot, CPS_FREE); elog(LOG, - "Concurrent partitioning worker has canceled the task because " - "maximum amount of attempts (%d) had been exceeded. " - "See the error message below", + "concurrent partitioning worker has canceled the task because " + "maximum amount of attempts (%d) had been exceeded, " + "see the error message below", PART_WORKER_MAX_ATTEMPTS); return; /* exit quickly */ @@ -573,11 +573,9 @@ bgw_main_concurrent_part(Datum main_arg) Datum partition_table_concurrently(PG_FUNCTION_ARGS) { -#define tostr(str) ( #str ) /* convert function's name to literal */ - Oid relid = PG_GETARG_OID(0); - int empty_slot_idx = -1; /* do we have a slot for BGWorker? */ - int i; + int empty_slot_idx = -1, /* do we have a slot for BGWorker? */ + i; /* Check if relation is a partitioned table */ shout_if_prel_is_invalid(relid, @@ -617,7 +615,7 @@ partition_table_concurrently(PG_FUNCTION_ARGS) SpinLockRelease(&concurrent_part_slots[empty_slot_idx].mutex); elog(ERROR, - "Table \"%s\" is already being partitioned", + "table \"%s\" is already being partitioned", get_rel_name(relid)); } @@ -628,7 +626,7 @@ partition_table_concurrently(PG_FUNCTION_ARGS) /* Looks like we could not find an empty slot */ if (empty_slot_idx < 0) - elog(ERROR, "No empty worker slots found"); + elog(ERROR, "no empty worker slots found"); else { /* Initialize concurrent part slot */ @@ -648,9 +646,9 @@ partition_table_concurrently(PG_FUNCTION_ARGS) /* Tell user everything's fine */ elog(NOTICE, - "Worker started. You can stop it " + "worker started, you can stop it " "with the following command: select %s('%s');", - tostr(stop_concurrent_part_task), /* convert function's name to literal */ + CppAsString(stop_concurrent_part_task), get_rel_name(relid)); PG_RETURN_VOID(); @@ -785,7 +783,7 @@ stop_concurrent_part_task(PG_FUNCTION_ARGS) cur_slot->relid == relid && cur_slot->dbid == MyDatabaseId) { - elog(NOTICE, "Worker will stop after it finishes current batch"); + elog(NOTICE, "worker will stop after it finishes current batch"); /* Change worker's state & set 'worker_found' */ cur_slot->worker_status = CPS_STOPPING; @@ -800,7 +798,7 @@ stop_concurrent_part_task(PG_FUNCTION_ARGS) PG_RETURN_BOOL(true); else { - elog(ERROR, "Cannot find worker for relation \"%s\"", + elog(ERROR, "cannot find worker for relation \"%s\"", get_rel_name_or_relid(relid)); PG_RETURN_BOOL(false); /* keep compiler happy */ diff --git a/src/pl_funcs.c b/src/pl_funcs.c index cd3f63e965..be9a19e01c 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -769,16 +769,25 @@ invoke_on_partition_created_callback(PG_FUNCTION_ARGS) if (PG_ARGISNULL(ARG_CHILD)) elog(ERROR, "partition should not be null"); - /* Both RANGE_START & RANGE_END are not available (HASH) */ - if (PG_ARGISNULL(ARG_RANGE_START) && PG_ARGISNULL(ARG_RANGE_START)) - part_type = PT_HASH; + switch (PG_NARGS()) + { + case 3: + part_type = PT_HASH; + break; + + case 5: + { + if (PG_ARGISNULL(ARG_RANGE_START) || PG_ARGISNULL(ARG_RANGE_START)) + elog(ERROR, "both bounds must be provided for RANGE partition"); - /* Either RANGE_START or RANGE_END is missing */ - else if (PG_ARGISNULL(ARG_RANGE_START) || PG_ARGISNULL(ARG_RANGE_START)) - elog(ERROR, "both boundaries must be provided for RANGE partition"); + part_type = PT_RANGE; + } + break; - /* Both RANGE_START & RANGE_END are provided */ - else part_type = PT_RANGE; + default: + elog(ERROR, "error in function \"%s\"", + CppAsString(invoke_on_partition_created_callback)); + } /* Build JSONB according to partitioning type */ switch (part_type) @@ -791,8 +800,8 @@ invoke_on_partition_created_callback(PG_FUNCTION_ARGS) JSB_INIT_VAL(&val, WJB_VALUE, get_rel_name_or_relid(parent_oid)); JSB_INIT_VAL(&key, WJB_KEY, "partition"); JSB_INIT_VAL(&val, WJB_VALUE, get_rel_name_or_relid(partition_oid)); - JSB_INIT_VAL(&key, WJB_KEY, "part_type"); - JSB_INIT_VAL(&val, WJB_VALUE, "HASH"); + JSB_INIT_VAL(&key, WJB_KEY, "parttype"); + JSB_INIT_VAL(&val, WJB_VALUE, PartTypeToCString(PT_HASH)); result = pushJsonbValue(&jsonb_state, WJB_END_OBJECT, NULL); } @@ -814,11 +823,11 @@ invoke_on_partition_created_callback(PG_FUNCTION_ARGS) JSB_INIT_VAL(&val, WJB_VALUE, get_rel_name_or_relid(parent_oid)); JSB_INIT_VAL(&key, WJB_KEY, "partition"); JSB_INIT_VAL(&val, WJB_VALUE, get_rel_name_or_relid(partition_oid)); - JSB_INIT_VAL(&key, WJB_KEY, "part_type"); - JSB_INIT_VAL(&val, WJB_VALUE, "RANGE"); - JSB_INIT_VAL(&key, WJB_KEY, "start"); + JSB_INIT_VAL(&key, WJB_KEY, "parttype"); + JSB_INIT_VAL(&val, WJB_VALUE, PartTypeToCString(PT_RANGE)); + JSB_INIT_VAL(&key, WJB_KEY, "range_min"); JSB_INIT_VAL(&val, WJB_VALUE, start_value); - JSB_INIT_VAL(&key, WJB_KEY, "end"); + JSB_INIT_VAL(&key, WJB_KEY, "range_max"); JSB_INIT_VAL(&val, WJB_VALUE, end_value); result = pushJsonbValue(&jsonb_state, WJB_END_OBJECT, NULL); diff --git a/src/relation_info.c b/src/relation_info.c index fc7de1c285..ab9669fcd0 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -627,19 +627,23 @@ DatumGetPartType(Datum datum) return (PartType) val; } -Datum -PartTypeGetTextDatum(PartType parttype) +char * +PartTypeToCString(PartType parttype) { - switch(parttype) + static char *hash_str = "1", + *range_str = "2"; + + switch (parttype) { case PT_HASH: - return CStringGetTextDatum("HASH"); + return hash_str; case PT_RANGE: - return CStringGetTextDatum("RANGE"); + return range_str; default: elog(ERROR, "Unknown partitioning type %u", parttype); + return NULL; /* keep compiler happy */ } } diff --git a/src/relation_info.h b/src/relation_info.h index 215e1257fc..5b50005a9c 100644 --- a/src/relation_info.h +++ b/src/relation_info.h @@ -137,7 +137,7 @@ Oid forget_parent_of_partition(Oid partition, PartParentSearch *status); Oid get_parent_of_partition(Oid partition, PartParentSearch *status); PartType DatumGetPartType(Datum datum); -Datum PartTypeGetTextDatum(PartType parttype); +char * PartTypeToCString(PartType parttype); void shout_if_prel_is_invalid(Oid parent_oid, const PartRelationInfo *prel, From ac5a2bb086937740fce7ecee791cf329d9fafbd2 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 5 Oct 2016 15:23:51 +0300 Subject: [PATCH 175/184] improve function drop_partitions(), fix pathman_process_utility_hook(): do not handle COPY stmt if pathman is not initialized, call partition creation callbacks on attached RANGE partitions --- init.sql | 21 +++++++++++---------- range.sql | 15 +++++++++++++++ src/copy_stmt_hooking.c | 2 ++ src/hooks.c | 4 ++-- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/init.sql b/init.sql index 18d41bdfe6..ad31573f64 100644 --- a/init.sql +++ b/init.sql @@ -530,36 +530,37 @@ BEGIN DELETE FROM @extschema@.pathman_config_params WHERE partrel = parent_relid; IF conf_num_del = 0 THEN - RAISE EXCEPTION 'relation "%" has no partitions', parent_relid::text; + RAISE EXCEPTION 'relation "%" has no partitions', parent_relid::TEXT; END IF; - FOR v_rec IN (SELECT inhrelid::regclass::text AS tbl + FOR v_rec IN (SELECT inhrelid::REGCLASS AS tbl FROM pg_catalog.pg_inherits WHERE inhparent::regclass = parent_relid) LOOP IF NOT delete_data THEN EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) INSERT INTO %s SELECT * FROM part_data', - v_rec.tbl, + v_rec.tbl::TEXT, parent_relid::text); GET DIAGNOSTICS v_rows = ROW_COUNT; /* Show number of copied rows */ - RAISE NOTICE '% rows copied from %', v_rows, v_rec.tbl; + RAISE NOTICE '% rows copied from %', v_rows, v_rec.tbl::TEXT; END IF; + SELECT relkind FROM pg_catalog.pg_class + WHERE oid = v_rec.tbl + INTO v_relkind; + /* * Determine the kind of child relation. It can be either regular * table (r) or foreign table (f). Depending on relkind we use - * DROP TABLE or DROP FOREIGN TABLE + * DROP TABLE or DROP FOREIGN TABLE. */ - EXECUTE format('SELECT relkind FROM pg_class WHERE oid = ''%s''::regclass', v_rec.tbl) - INTO v_relkind; - IF v_relkind = 'f' THEN - EXECUTE format('DROP FOREIGN TABLE %s', v_rec.tbl); + EXECUTE format('DROP FOREIGN TABLE %s', v_rec.tbl::TEXT); ELSE - EXECUTE format('DROP TABLE %s', v_rec.tbl); + EXECUTE format('DROP TABLE %s', v_rec.tbl::TEXT); END IF; v_part_count := v_part_count + 1; diff --git a/range.sql b/range.sql index f32a1c7c4c..7b62620e52 100644 --- a/range.sql +++ b/range.sql @@ -1066,6 +1066,7 @@ $$ DECLARE v_attname TEXT; rel_persistence CHAR; + v_init_callback REGPROCEDURE; BEGIN /* Acquire lock on parent */ @@ -1105,6 +1106,20 @@ BEGIN p_start_value, p_end_value)); + /* Fetch init_callback from 'params' table */ + WITH stub_callback(stub) as (values (0)) + SELECT coalesce(init_callback, 0::REGPROCEDURE) + FROM stub_callback + LEFT JOIN @extschema@.pathman_config_params AS params + ON params.partrel = parent_relid + INTO v_init_callback; + + PERFORM @extschema@.invoke_on_partition_created_callback(parent_relid, + p_partition, + v_init_callback, + p_start_value, + p_end_value); + /* Invalidate cache */ PERFORM @extschema@.on_update_partitions(parent_relid); diff --git a/src/copy_stmt_hooking.c b/src/copy_stmt_hooking.c index 788442f10b..7b06a4b436 100644 --- a/src/copy_stmt_hooking.c +++ b/src/copy_stmt_hooking.c @@ -54,6 +54,8 @@ is_pathman_related_copy(Node *parsetree) CopyStmt *copy_stmt = (CopyStmt *) parsetree; Oid partitioned_table; + Assert(IsPathmanReady()); + if (!IsOverrideCopyEnabled()) { elog(DEBUG1, "COPY statement hooking is disabled"); diff --git a/src/hooks.c b/src/hooks.c index 389e3e40ea..8d7f88f313 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -611,9 +611,9 @@ pathman_process_utility_hook(Node *parsetree, dest, completionTag); /* Override standard COPY statement if needed */ - if (is_pathman_related_copy(parsetree)) + if (IsPathmanReady() && is_pathman_related_copy(parsetree)) { - uint64 processed; + uint64 processed; PathmanDoCopy((CopyStmt *) parsetree, queryString, &processed); if (completionTag) From f831fc2de92c494bb273b9de934fbef3856aa61f Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 5 Oct 2016 17:16:10 +0300 Subject: [PATCH 176/184] unprivileged user should only be able to change GUCs of Runtime[Merge]Append --- src/init.c | 6 +++--- src/partition_filter.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/init.c b/src/init.c index 6169bb936f..83d153713b 100644 --- a/src/init.c +++ b/src/init.c @@ -118,7 +118,7 @@ init_main_pathman_toggles(void) NULL, &pg_pathman_init_state.pg_pathman_enable, true, - PGC_USERSET, + PGC_SUSET, 0, NULL, pg_pathman_enable_assign_hook, @@ -130,7 +130,7 @@ init_main_pathman_toggles(void) NULL, &pg_pathman_init_state.auto_partition, true, - PGC_USERSET, + PGC_SUSET, 0, NULL, NULL, @@ -142,7 +142,7 @@ init_main_pathman_toggles(void) NULL, &pg_pathman_init_state.override_copy, true, - PGC_USERSET, + PGC_SUSET, 0, NULL, NULL, diff --git a/src/partition_filter.c b/src/partition_filter.c index 51f09923e0..fdb2a3726b 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -92,7 +92,7 @@ init_partition_filter_static_data(void) NULL, &pg_pathman_enable_partition_filter, true, - PGC_USERSET, + PGC_SUSET, 0, NULL, NULL, From 27fcbf66bcf72ac5ffaa2af7108bd8b1bcf5fbb8 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 5 Oct 2016 19:00:02 +0300 Subject: [PATCH 177/184] update README.md (new features & corrections), minor improvements in pl/PgSQL API, rename function set_part_init_callback() -> set_init_callback() --- README.md | 153 ++++++++++++++++++++++++--------- expected/pathman_basic.out | 54 ++++++------ expected/pathman_callbacks.out | 16 ++-- hash.sql | 2 +- init.sql | 7 +- sql/pathman_callbacks.sql | 8 +- src/utils.c | 3 + 7 files changed, 161 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index a0c91b0c37..11374dd3fa 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ More interesting features are yet to come. Stay tuned! ## Installation guide To install `pg_pathman`, execute this in the module's directory: -``` +```shell make install USE_PGXS=1 ``` Modify the **`shared_preload_libraries`** parameter in `postgresql.conf` as following: @@ -52,7 +52,7 @@ Modify the **`shared_preload_libraries`** parameter in `postgresql.conf` as foll shared_preload_libraries = 'pg_pathman' ``` It is essential to restart the PostgreSQL instance. After that, execute the following query in psql: -``` +```plpgsql CREATE EXTENSION pg_pathman; ``` @@ -67,9 +67,10 @@ Done! Now it's time to setup your partitioning schemes. create_hash_partitions(relation REGCLASS, attribute TEXT, partitions_count INTEGER, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL, + partition_data BOOOLEAN DEFAULT TRUE) ``` -Performs HASH partitioning for `relation` by integer key `attribute`. The `partitions_count` parameter specifies the number of partitions to create; it cannot be changed afterwards. If `partition_data` is `true` then all the data will be automatically copied from the parent table to partitions. Note that data migration may took a while to finish and the table will be locked until transaction commits. See `partition_table_concurrently()` for a lock-free way to migrate data. +Performs HASH partitioning for `relation` by integer key `attribute`. The `partitions_count` parameter specifies the number of partitions to create; it cannot be changed afterwards. If `partition_data` is `true` then all the data will be automatically copied from the parent table to partitions. Note that data migration may took a while to finish and the table will be locked until transaction commits. See `partition_table_concurrently()` for a lock-free way to migrate data. Partition creation callback is invoked for each partition if set beforehand (see `set_part_init_callback()`). ```plpgsql create_range_partitions(relation REGCLASS, @@ -77,16 +78,16 @@ create_range_partitions(relation REGCLASS, start_value ANYELEMENT, interval ANYELEMENT, count INTEGER DEFAULT NULL - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT TRUE) create_range_partitions(relation REGCLASS, attribute TEXT, start_value ANYELEMENT, interval INTERVAL, count INTEGER DEFAULT NULL, - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT TRUE) ``` -Performs RANGE partitioning for `relation` by partitioning key `attribute`. `start_value` argument specifies initial value, `interval` sets the range of values in a single partition, `count` is the number of premade partitions (if not set then pathman tries to determine it based on attribute values). +Performs RANGE partitioning for `relation` by partitioning key `attribute`. `start_value` argument specifies initial value, `interval` sets the range of values in a single partition, `count` is the number of premade partitions (if not set then pathman tries to determine it based on attribute values). Partition creation callback is invoked for each partition if set beforehand. ```plpgsql create_partitions_from_range(relation REGCLASS, @@ -94,16 +95,16 @@ create_partitions_from_range(relation REGCLASS, start_value ANYELEMENT, end_value ANYELEMENT, interval ANYELEMENT, - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT TRUE) create_partitions_from_range(relation REGCLASS, attribute TEXT, start_value ANYELEMENT, end_value ANYELEMENT, interval INTERVAL, - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT TRUE) ``` -Performs RANGE-partitioning from specified range for `relation` by partitioning key `attribute`. +Performs RANGE-partitioning from specified range for `relation` by partitioning key `attribute`. Partition creation callback is invoked for each partition if set beforehand. ### Data migration @@ -131,9 +132,9 @@ Same as above, but for a RANGE-partitioned table. ```plpgsql split_range_partition(partition REGCLASS, value ANYELEMENT, - partition_name TEXT DEFAULT NULL,) + partition_name TEXT DEFAULT NULL) ``` -Split RANGE `partition` in two by `value`. +Split RANGE `partition` in two by `value`. Partition creation callback is invoked for a new partition if available. ```plpgsql merge_range_partitions(partition1 REGCLASS, partition2 REGCLASS) @@ -142,13 +143,15 @@ Merge two adjacent RANGE partitions. First, data from `partition2` is copied to ```plpgsql append_range_partition(p_relation REGCLASS, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL, + tablespace TEXT DEFAULT NULL) ``` Append new RANGE partition with `pathman_config.range_interval` as interval. ```plpgsql prepend_range_partition(p_relation REGCLASS, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL, + tablespace TEXT DEFAULT NULL) ``` Prepend new RANGE partition with `pathman_config.range_interval` as interval. @@ -156,7 +159,8 @@ Prepend new RANGE partition with `pathman_config.range_interval` as interval. add_range_partition(relation REGCLASS, start_value ANYELEMENT, end_value ANYELEMENT, - partition_name TEXT DEFAULT NULL) + partition_name TEXT DEFAULT NULL, + tablespace TEXT DEFAULT NULL) ``` Create new RANGE partition for `relation` with specified range bounds. @@ -171,7 +175,7 @@ attach_range_partition(relation REGCLASS, start_value ANYELEMENT, end_value ANYELEMENT) ``` -Attach partition to the existing RANGE-partitioned relation. The attached table must have exactly the same structure as the parent table, including the dropped columns. +Attach partition to the existing RANGE-partitioned relation. The attached table must have exactly the same structure as the parent table, including the dropped columns. Partition creation callback is invoked if set (see `pathman_config_params`). ```plpgsql detach_range_partition(partition REGCLASS) @@ -187,23 +191,91 @@ Permanently disable `pg_pathman` partitioning mechanism for the specified parent drop_partitions(parent REGCLASS, delete_data BOOLEAN DEFAULT FALSE) ``` -Drop partitions of the `parent` table. If `delete_data` is `false` then the data is copied to the parent table first. Default is `false`. +Drop partitions of the `parent` table (both foreign and local relations). If `delete_data` is `false`, the data is copied to the parent table first. Default is `false`. ### Additional parameters ```plpgsql -enable_parent(relation REGCLASS) -disable_parent(relation REGCLASS) +set_enable_parent(relation REGCLASS, value BOOLEAN) ``` Include/exclude parent table into/from query plan. In original PostgreSQL planner parent table is always included into query plan even if it's empty which can lead to additional overhead. You can use `disable_parent()` if you are never going to use parent table as a storage. Default value depends on the `partition_data` parameter that was specified during initial partitioning in `create_range_partitions()` or `create_partitions_from_range()` functions. If the `partition_data` parameter was `true` then all data have already been migrated to partitions and parent table disabled. Otherwise it is enabled. ```plpgsql -enable_auto(relation REGCLASS) -disable_auto(relation REGCLASS) +set_auto(relation REGCLASS, value BOOLEAN) ``` Enable/disable auto partition propagation (only for RANGE partitioning). It is enabled by default. +```plpgsql +set_init_callback(relation REGCLASS, callback REGPROC DEFAULT 0) +``` +Set partition creation callback to be invoked for each attached or created partition (both HASH and RANGE). + +## Views and tables + +#### `pathman_config` --- main config storage +```plpgsql +CREATE TABLE IF NOT EXISTS pathman_config ( + partrel REGCLASS NOT NULL PRIMARY KEY, + attname TEXT NOT NULL, + parttype INTEGER NOT NULL, + range_interval TEXT, + + CHECK (parttype IN (1, 2)) /* check for allowed part types */ +); +``` +This table stores a list of partitioned tables. + +#### `pathman_config_params` --- optional parameters +```plpgsql +CREATE TABLE IF NOT EXISTS pathman_config_params ( + partrel REGCLASS NOT NULL PRIMARY KEY, + enable_parent BOOLEAN NOT NULL DEFAULT TRUE, + auto BOOLEAN NOT NULL DEFAULT TRUE, + init_callback REGPROCEDURE NOT NULL DEFAULT 0 +); +``` +This table stores optional parameters which override standard behavior. + +#### `pathman_concurrent_part_tasks` --- currently running partitioning workers +```plpgsql +-- helper SRF function +CREATE OR REPLACE FUNCTION show_concurrent_part_tasks() +RETURNS TABLE ( + userid REGROLE, + pid INT, + dbid OID, + relid REGCLASS, + processed INT, + status TEXT) +AS 'pg_pathman', 'show_concurrent_part_tasks_internal' +LANGUAGE C STRICT; + +CREATE OR REPLACE VIEW pathman_concurrent_part_tasks +AS SELECT * FROM show_concurrent_part_tasks(); +``` +This view lists all currently running concurrent partitioning tasks. + +#### `pathman_partition_list` --- list of all existing partitions +```plpgsql +-- helper SRF function +CREATE OR REPLACE FUNCTION @extschema@.show_partition_list() +RETURNS TABLE ( + parent REGCLASS, + partition REGCLASS, + parttype INT4, + partattr TEXT, + range_min TEXT, + range_max TEXT) +AS 'pg_pathman', 'show_partition_list_internal' +LANGUAGE C STRICT; + +CREATE OR REPLACE VIEW pathman_partition_list +AS SELECT * FROM show_partition_list(); +``` +This view lists all existing partitions, as well as their parents and range boundaries (NULL for HASH partitions). + + ## Custom plan nodes `pg_pathman` provides a couple of [custom plan nodes](https://wiki.postgresql.org/wiki/CustomScanAPI) which aim to reduce execution time, namely: @@ -213,7 +285,7 @@ Enable/disable auto partition propagation (only for RANGE partitioning). It is e `PartitionFilter` acts as a *proxy node* for INSERT's child scan, which means it can redirect output tuples to the corresponding partition: -``` +```plpgsql EXPLAIN (COSTS OFF) INSERT INTO partitioned_table SELECT generate_series(1, 10), random(); @@ -236,7 +308,7 @@ This kind of expressions can no longer be optimized at planning time since the p There are at least several cases that demonstrate usefulness of these nodes: -``` +```plpgsql /* create table we're going to partition */ CREATE TABLE partitioned_table(id INT NOT NULL, payload REAL); @@ -253,7 +325,7 @@ CREATE TABLE some_table AS SELECT generate_series(1, 100) AS VAL; - **`id = (select ... limit 1)`** -``` +```plpgsql EXPLAIN (COSTS OFF, ANALYZE) SELECT * FROM partitioned_table WHERE id = (SELECT * FROM some_table LIMIT 1); QUERY PLAN @@ -293,7 +365,7 @@ WHERE id = (SELECT * FROM some_table LIMIT 1); ``` - **`id = ANY (select ...)`** -``` +```plpgsql EXPLAIN (COSTS OFF, ANALYZE) SELECT * FROM partitioned_table WHERE id = any (SELECT * FROM some_table limit 4); QUERY PLAN @@ -345,11 +417,11 @@ In case you're interested, you can read more about custom nodes at Alexander Kor ### Common tips - You can easily add **_partition_** column containing the names of the underlying partitions using the system attribute called **_tableoid_**: -``` +```plpgsql SELECT tableoid::regclass AS partition, * FROM partitioned_table; ``` -- Though indices on a parent table aren't particularly useful (since it's empty), they act as prototypes for indices on partitions. For each index on the parent table, `pg_pathman` will create a similar index on every partition. +- Though indices on a parent table aren't particularly useful (since it's supposed to be empty), they act as prototypes for indices on partitions. For each index on the parent table, `pg_pathman` will create a similar index on every partition. - All running concurrent partitioning tasks can be listed using the `pathman_concurrent_part_tasks` view: ```plpgsql @@ -362,7 +434,7 @@ SELECT * FROM pathman_concurrent_part_tasks; ### HASH partitioning Consider an example of HASH partitioning. First create a table with some integer column: -``` +```plpgsql CREATE TABLE items ( id SERIAL PRIMARY KEY, name TEXT, @@ -373,13 +445,13 @@ SELECT g, md5(g::text), random() * 100000 FROM generate_series(1, 100000) as g; ``` Now run the `create_hash_partitions()` function with appropriate arguments: -``` +```plpgsql SELECT create_hash_partitions('items', 'id', 100); ``` This will create new partitions and move the data from parent to partitions. Here's an example of the query performing filtering by partitioning key: -``` +```plpgsql SELECT * FROM items WHERE id = 1234; id | name | code ------+----------------------------------+------ @@ -407,7 +479,7 @@ EXPLAIN SELECT * FROM ONLY items; ``` ### RANGE partitioning Consider an example of RANGE partitioning. Let's create a table containing some dummy logs: -``` +```plpgsql CREATE TABLE journal ( id SERIAL, dt TIMESTAMP NOT NULL, @@ -424,21 +496,21 @@ SELECT g, random() * 6, md5(g::text) FROM generate_series('2015-01-01'::date, '2015-12-31'::date, '1 minute') as g; ``` Run the `create_range_partitions()` function to create partitions so that each partition would contain the data for one day: -``` +```plpgsql SELECT create_range_partitions('journal', 'dt', '2015-01-01'::date, '1 day'::interval); ``` It will create 365 partitions and move the data from parent to partitions. New partitions are appended automaticaly by insert trigger, but it can be done manually with the following functions: -``` --- append new partition with specified range +```plpgsql +-- add new partition with specified range SELECT add_range_partition('journal', '2016-01-01'::date, '2016-01-07'::date); -- append new partition with default range SELECT append_range_partition('journal'); ``` The first one creates a partition with specified range. The second one creates a partition with default interval and appends it to the partition list. It is also possible to attach an existing table as partition. For example, we may want to attach an archive table (or even foreign table from another server) for some outdated data: -``` +```plpgsql CREATE FOREIGN TABLE journal_archive ( id INTEGER NOT NULL, dt TIMESTAMP NOT NULL, @@ -451,15 +523,15 @@ SELECT attach_range_partition('journal', 'journal_archive', '2014-01-01'::date, > **Important:** the definition of the attached table must match the one of the existing partitioned table, including the dropped columns. To merge to adjacent partitions, use the `merge_range_partitions()` function: -``` +```plpgsql SELECT merge_range_partitions('journal_archive', 'journal_1'); ``` To split partition by value, use the `split_range_partition()` function: -``` +```plpgsql SELECT split_range_partition('journal_366', '2016-01-03'::date); ``` To detach partition, use the `detach_range_partition()` function: -``` +```plpgsql SELECT detach_range_partition('journal_archive'); ``` @@ -490,8 +562,11 @@ There are several user-accessible [GUC](https://www.postgresql.org/docs/9.5/stat - `pg_pathman.enable_runtimeappend` --- toggle `RuntimeAppend` custom node on\off - `pg_pathman.enable_runtimemergeappend` --- toggle `RuntimeMergeAppend` custom node on\off - `pg_pathman.enable_partitionfilter` --- toggle `PartitionFilter` custom node on\off + - `pg_pathman.enable_auto_partition` --- toggle automatic partition creation on\off (per session) + - `pg_pathman.insert_into_fdw` --- allow INSERTs into various FDWs `(disabled | postgres | any_fdw)` + - `pg_pathman.override_copy` --- toggle COPY statement hooking on\off -To **permanently** disable `pg_pathman` for some previously partitioned table, use the `disable_partitioning()` function: +To **permanently** disable `pg_pathman` for some previously partitioned table, use the `disable_pathman_for()` function: ``` SELECT disable_pathman_for('range_rel'); ``` diff --git a/expected/pathman_basic.out b/expected/pathman_basic.out index 86ef91aef3..b905b02a0b 100644 --- a/expected/pathman_basic.out +++ b/expected/pathman_basic.out @@ -889,9 +889,9 @@ SELECT * FROM test.hash_rel WHERE id = 123; */ SELECT pathman.drop_partitions('test.hash_rel'); NOTICE: drop cascades to 3 other objects -NOTICE: 2 rows copied from test.hash_rel_2 -NOTICE: 3 rows copied from test.hash_rel_1 NOTICE: 2 rows copied from test.hash_rel_0 +NOTICE: 3 rows copied from test.hash_rel_1 +NOTICE: 2 rows copied from test.hash_rel_2 drop_partitions ----------------- 3 @@ -925,11 +925,11 @@ SELECT COUNT(*) FROM ONLY test.hash_rel; DROP TABLE test.hash_rel CASCADE; SELECT pathman.drop_partitions('test.num_range_rel'); NOTICE: drop cascades to 4 other objects -NOTICE: 0 rows copied from test.num_range_rel_6 -NOTICE: 2 rows copied from test.num_range_rel_4 -NOTICE: 1000 rows copied from test.num_range_rel_3 -NOTICE: 1000 rows copied from test.num_range_rel_2 NOTICE: 998 rows copied from test.num_range_rel_1 +NOTICE: 1000 rows copied from test.num_range_rel_2 +NOTICE: 1000 rows copied from test.num_range_rel_3 +NOTICE: 2 rows copied from test.num_range_rel_4 +NOTICE: 0 rows copied from test.num_range_rel_6 drop_partitions ----------------- 5 @@ -1108,9 +1108,9 @@ EXPLAIN (COSTS OFF) SELECT * FROM test."TeSt" WHERE a = 1; SELECT pathman.drop_partitions('test."TeSt"'); NOTICE: drop cascades to 3 other objects -NOTICE: 3 rows copied from test."TeSt_2" -NOTICE: 0 rows copied from test."TeSt_1" NOTICE: 0 rows copied from test."TeSt_0" +NOTICE: 0 rows copied from test."TeSt_1" +NOTICE: 3 rows copied from test."TeSt_2" drop_partitions ----------------- 3 @@ -1163,11 +1163,11 @@ SELECT pathman.split_range_partition('test."RangeRel_1"', '2015-01-01'::DATE); SELECT pathman.drop_partitions('test."RangeRel"'); NOTICE: function test.RangeRel_upd_trig_func() does not exist, skipping -NOTICE: 1 rows copied from test."RangeRel_6" -NOTICE: 0 rows copied from test."RangeRel_4" -NOTICE: 1 rows copied from test."RangeRel_3" -NOTICE: 1 rows copied from test."RangeRel_2" NOTICE: 0 rows copied from test."RangeRel_1" +NOTICE: 1 rows copied from test."RangeRel_2" +NOTICE: 1 rows copied from test."RangeRel_3" +NOTICE: 0 rows copied from test."RangeRel_4" +NOTICE: 1 rows copied from test."RangeRel_6" drop_partitions ----------------- 5 @@ -1199,9 +1199,9 @@ SELECT pathman.create_range_partitions('test."RangeRel"', 'id', 1, 100, 3); SELECT pathman.drop_partitions('test."RangeRel"'); NOTICE: function test.RangeRel_upd_trig_func() does not exist, skipping -NOTICE: 0 rows copied from test."RangeRel_3" -NOTICE: 0 rows copied from test."RangeRel_2" NOTICE: 0 rows copied from test."RangeRel_1" +NOTICE: 0 rows copied from test."RangeRel_2" +NOTICE: 0 rows copied from test."RangeRel_3" drop_partitions ----------------- 3 @@ -1358,20 +1358,20 @@ DELETE FROM range_rel r USING tmp t WHERE r.dt = '2010-01-02' AND r.id = t.id; /* Create range partitions from whole range */ SELECT drop_partitions('range_rel'); NOTICE: function public.range_rel_upd_trig_func() does not exist, skipping -NOTICE: 0 rows copied from range_rel_15 -NOTICE: 0 rows copied from range_rel_14 -NOTICE: 14 rows copied from range_rel_13 -NOTICE: 31 rows copied from range_rel_12 -NOTICE: 30 rows copied from range_rel_11 -NOTICE: 31 rows copied from range_rel_10 -NOTICE: 30 rows copied from range_rel_9 -NOTICE: 31 rows copied from range_rel_8 -NOTICE: 31 rows copied from range_rel_7 -NOTICE: 29 rows copied from range_rel_6 -NOTICE: 31 rows copied from range_rel_5 -NOTICE: 30 rows copied from range_rel_4 -NOTICE: 31 rows copied from range_rel_3 NOTICE: 44 rows copied from range_rel_1 +NOTICE: 31 rows copied from range_rel_3 +NOTICE: 30 rows copied from range_rel_4 +NOTICE: 31 rows copied from range_rel_5 +NOTICE: 29 rows copied from range_rel_6 +NOTICE: 31 rows copied from range_rel_7 +NOTICE: 31 rows copied from range_rel_8 +NOTICE: 30 rows copied from range_rel_9 +NOTICE: 31 rows copied from range_rel_10 +NOTICE: 30 rows copied from range_rel_11 +NOTICE: 31 rows copied from range_rel_12 +NOTICE: 14 rows copied from range_rel_13 +NOTICE: 0 rows copied from range_rel_14 +NOTICE: 0 rows copied from range_rel_15 drop_partitions ----------------- 14 diff --git a/expected/pathman_callbacks.out b/expected/pathman_callbacks.out index d310c3ef43..6a997e9ee6 100644 --- a/expected/pathman_callbacks.out +++ b/expected/pathman_callbacks.out @@ -18,10 +18,10 @@ NOTICE: sequence "abc_seq" does not exist, skipping 2 (1 row) -SELECT set_part_init_callback('callbacks.abc', - 'callbacks.abc_on_part_created_callback'); - set_part_init_callback ------------------------- +SELECT set_init_callback('callbacks.abc', + 'callbacks.abc_on_part_created_callback'); + set_init_callback +------------------- (1 row) @@ -62,10 +62,10 @@ NOTICE: 0 rows copied from callbacks.abc_6 (1 row) /* set callback to be called on HASH partitions */ -SELECT set_part_init_callback('callbacks.abc', - 'callbacks.abc_on_part_created_callback'); - set_part_init_callback ------------------------- +SELECT set_init_callback('callbacks.abc', + 'callbacks.abc_on_part_created_callback'); + set_init_callback +------------------- (1 row) diff --git a/hash.sql b/hash.sql index af0bf673be..e4001bdceb 100644 --- a/hash.sql +++ b/hash.sql @@ -15,7 +15,7 @@ CREATE OR REPLACE FUNCTION @extschema@.create_hash_partitions( parent_relid REGCLASS, attribute TEXT, partitions_count INTEGER, - partition_data BOOLEAN DEFAULT true) + partition_data BOOLEAN DEFAULT TRUE) RETURNS INTEGER AS $$ DECLARE diff --git a/init.sql b/init.sql index ad31573f64..e648395a94 100644 --- a/init.sql +++ b/init.sql @@ -135,9 +135,9 @@ LANGUAGE plpgsql STRICT; /* * Set partition creation callback */ -CREATE OR REPLACE FUNCTION @extschema@.set_part_init_callback( +CREATE OR REPLACE FUNCTION @extschema@.set_init_callback( relation REGCLASS, - callback REGPROC) + callback REGPROC DEFAULT 0) RETURNS VOID AS $$ BEGIN @@ -535,7 +535,8 @@ BEGIN FOR v_rec IN (SELECT inhrelid::REGCLASS AS tbl FROM pg_catalog.pg_inherits - WHERE inhparent::regclass = parent_relid) + WHERE inhparent::regclass = parent_relid + ORDER BY inhrelid ASC) LOOP IF NOT delete_data THEN EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) diff --git a/sql/pathman_callbacks.sql b/sql/pathman_callbacks.sql index c5581979bf..3aa174cd23 100644 --- a/sql/pathman_callbacks.sql +++ b/sql/pathman_callbacks.sql @@ -18,8 +18,8 @@ $$ language plpgsql; CREATE TABLE callbacks.abc(a serial, b int); SELECT create_range_partitions('callbacks.abc', 'a', 1, 100, 2); -SELECT set_part_init_callback('callbacks.abc', - 'callbacks.abc_on_part_created_callback'); +SELECT set_init_callback('callbacks.abc', + 'callbacks.abc_on_part_created_callback'); INSERT INTO callbacks.abc VALUES (123, 1); INSERT INTO callbacks.abc VALUES (223, 1); @@ -32,8 +32,8 @@ SELECT drop_partitions('callbacks.abc'); /* set callback to be called on HASH partitions */ -SELECT set_part_init_callback('callbacks.abc', - 'callbacks.abc_on_part_created_callback'); +SELECT set_init_callback('callbacks.abc', + 'callbacks.abc_on_part_created_callback'); SELECT create_hash_partitions('callbacks.abc', 'a', 5); diff --git a/src/utils.c b/src/utils.c index ed57d0f62f..cde31ebc22 100644 --- a/src/utils.c +++ b/src/utils.c @@ -700,6 +700,9 @@ validate_on_part_init_cb(Oid procid, bool emit_error) Form_pg_proc functup; bool is_ok = true; + if (procid == InvalidOid) + return true; + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(procid)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for function %u", procid); From a98a052fd7652aff8047a581135b100e5b817569 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 5 Oct 2016 19:07:08 +0300 Subject: [PATCH 178/184] fix minor typo in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 11374dd3fa..d9f7731d4d 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ create_hash_partitions(relation REGCLASS, attribute TEXT, partitions_count INTEGER, partition_name TEXT DEFAULT NULL, - partition_data BOOOLEAN DEFAULT TRUE) + partition_data BOOLEAN DEFAULT TRUE) ``` Performs HASH partitioning for `relation` by integer key `attribute`. The `partitions_count` parameter specifies the number of partitions to create; it cannot be changed afterwards. If `partition_data` is `true` then all the data will be automatically copied from the parent table to partitions. Note that data migration may took a while to finish and the table will be locked until transaction commits. See `partition_table_concurrently()` for a lock-free way to migrate data. Partition creation callback is invoked for each partition if set beforehand (see `set_part_init_callback()`). From e0cfda6f457c7ba44fe0d61ff4fc7a0db5a64732 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 6 Oct 2016 00:28:15 +0300 Subject: [PATCH 179/184] fix incorrect assert in get_pathman_relation_info(), function drop_range_partition() can drop both foreign and local tables and preserve partition's data, insert a few otimizations into pathman_relcache_hook() and finish_delayed_invalidation() --- README.md | 21 +++++++++------------ init.sql | 9 ++++----- range.sql | 34 ++++++++++++++++++++++++++++++---- src/hooks.c | 11 +++++++++-- src/pathman_workers.c | 9 +++++---- src/pl_funcs.c | 4 ++-- src/relation_info.c | 17 +++++++++++++---- 7 files changed, 72 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index d9f7731d4d..59fb3cf456 100644 --- a/README.md +++ b/README.md @@ -165,9 +165,9 @@ add_range_partition(relation REGCLASS, Create new RANGE partition for `relation` with specified range bounds. ```plpgsql -drop_range_partition(partition TEXT) +drop_range_partition(partition TEXT, delete_data BOOLEAN DEFAULT TRUE) ``` -Drop RANGE partition and all its data. +Drop RANGE partition and all of its data if `delete_data` is true. ```plpgsql attach_range_partition(relation REGCLASS, @@ -221,8 +221,7 @@ CREATE TABLE IF NOT EXISTS pathman_config ( parttype INTEGER NOT NULL, range_interval TEXT, - CHECK (parttype IN (1, 2)) /* check for allowed part types */ -); + CHECK (parttype IN (1, 2)) /* check for allowed part types */ ); ``` This table stores a list of partitioned tables. @@ -232,8 +231,7 @@ CREATE TABLE IF NOT EXISTS pathman_config_params ( partrel REGCLASS NOT NULL PRIMARY KEY, enable_parent BOOLEAN NOT NULL DEFAULT TRUE, auto BOOLEAN NOT NULL DEFAULT TRUE, - init_callback REGPROCEDURE NOT NULL DEFAULT 0 -); + init_callback REGPROCEDURE NOT NULL DEFAULT 0); ``` This table stores optional parameters which override standard behavior. @@ -259,7 +257,7 @@ This view lists all currently running concurrent partitioning tasks. #### `pathman_partition_list` --- list of all existing partitions ```plpgsql -- helper SRF function -CREATE OR REPLACE FUNCTION @extschema@.show_partition_list() +CREATE OR REPLACE FUNCTION show_partition_list() RETURNS TABLE ( parent REGCLASS, partition REGCLASS, @@ -471,7 +469,7 @@ Notice that the `Append` node contains only one child scan which corresponds to > **Important:** pay attention to the fact that `pg_pathman` excludes the parent table from the query plan. To access parent table use ONLY modifier: -``` +```plpgsql EXPLAIN SELECT * FROM ONLY items; QUERY PLAN ------------------------------------------------------ @@ -484,8 +482,7 @@ CREATE TABLE journal ( id SERIAL, dt TIMESTAMP NOT NULL, level INTEGER, - msg TEXT -); + msg TEXT); -- similar index will also be created for each partition CREATE INDEX ON journal(dt); @@ -515,8 +512,8 @@ CREATE FOREIGN TABLE journal_archive ( id INTEGER NOT NULL, dt TIMESTAMP NOT NULL, level INTEGER, - msg TEXT -) SERVER archive_server; + msg TEXT) +SERVER archive_server; SELECT attach_range_partition('journal', 'journal_archive', '2014-01-01'::date, '2015-01-01'::date); ``` diff --git a/init.sql b/init.sql index e648395a94..4d56242bae 100644 --- a/init.sql +++ b/init.sql @@ -511,7 +511,7 @@ RETURNS INTEGER AS $$ DECLARE v_rec RECORD; - v_rows INTEGER; + v_rows BIGINT; v_part_count INTEGER := 0; conf_num_del INTEGER; v_relkind CHAR; @@ -539,10 +539,9 @@ BEGIN ORDER BY inhrelid ASC) LOOP IF NOT delete_data THEN - EXECUTE format('WITH part_data AS (DELETE FROM %s RETURNING *) - INSERT INTO %s SELECT * FROM part_data', - v_rec.tbl::TEXT, - parent_relid::text); + EXECUTE format('INSERT INTO %s SELECT * FROM %s', + parent_relid::TEXT, + v_rec.tbl::TEXT); GET DIAGNOSTICS v_rows = ROW_COUNT; /* Show number of copied rows */ diff --git a/range.sql b/range.sql index 7b62620e52..2dbd762773 100644 --- a/range.sql +++ b/range.sql @@ -1027,12 +1027,15 @@ LANGUAGE plpgsql; * Drop range partition */ CREATE OR REPLACE FUNCTION @extschema@.drop_range_partition( - p_partition REGCLASS) + p_partition REGCLASS, + delete_data BOOLEAN DEFAULT TRUE) RETURNS TEXT AS $$ DECLARE parent_relid REGCLASS; part_name TEXT; + v_relkind CHAR; + v_rows BIGINT; BEGIN parent_relid := @extschema@.get_parent_of_partition(p_partition); @@ -1041,8 +1044,30 @@ BEGIN /* Acquire lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); - /* Drop table */ - EXECUTE format('DROP TABLE %s', part_name); + IF NOT delete_data THEN + EXECUTE format('INSERT INTO %s SELECT * FROM %s', + parent_relid::TEXT, + p_partition::TEXT); + GET DIAGNOSTICS v_rows = ROW_COUNT; + + /* Show number of copied rows */ + RAISE NOTICE '% rows copied from %', v_rows, p_partition::TEXT; + END IF; + + SELECT relkind FROM pg_catalog.pg_class + WHERE oid = p_partition + INTO v_relkind; + + /* + * Determine the kind of child relation. It can be either regular + * table (r) or foreign table (f). Depending on relkind we use + * DROP TABLE or DROP FOREIGN TABLE. + */ + IF v_relkind = 'f' THEN + EXECUTE format('DROP FOREIGN TABLE %s', p_partition::TEXT); + ELSE + EXECUTE format('DROP TABLE %s', p_partition::TEXT); + END IF; /* Invalidate cache */ PERFORM @extschema@.on_update_partitions(parent_relid); @@ -1050,7 +1075,8 @@ BEGIN RETURN part_name; END $$ -LANGUAGE plpgsql; +LANGUAGE plpgsql +SET pg_pathman.enable_partitionfilter = off; /* ensures that PartitionFilter is OFF */ /* diff --git a/src/hooks.c b/src/hooks.c index 8d7f88f313..e3a368b6ab 100644 --- a/src/hooks.c +++ b/src/hooks.c @@ -18,6 +18,7 @@ #include "utils.h" #include "xact_handling.h" +#include "access/transam.h" #include "miscadmin.h" #include "optimizer/cost.h" #include "optimizer/restrictinfo.h" @@ -546,6 +547,10 @@ pathman_relcache_hook(Datum arg, Oid relid) if (!IsPathmanReady()) return; + /* We shouldn't even consider special OIDs */ + if (relid < FirstNormalObjectId) + return; + /* Invalidation event for PATHMAN_CONFIG table (probably DROP) */ if (relid == get_pathman_config_relid()) delay_pathman_shutdown(); @@ -569,7 +574,8 @@ pathman_relcache_hook(Datum arg, Oid relid) /* Both syscache and pathman's cache say it isn't a partition */ case PPS_ENTRY_NOT_FOUND: { - delay_invalidation_parent_rel(partitioned_table); + if (partitioned_table != InvalidOid) + delay_invalidation_parent_rel(partitioned_table); #ifdef NOT_USED elog(DEBUG2, "Invalidation message for relation %u [%u]", relid, MyProcPid); @@ -588,7 +594,8 @@ pathman_relcache_hook(Datum arg, Oid relid) break; default: - elog(ERROR, "Not implemented yet"); + elog(ERROR, "Not implemented yet (%s)", + CppAsString(pathman_relcache_hook)); break; } } diff --git a/src/pathman_workers.c b/src/pathman_workers.c index e845319283..c913a71061 100644 --- a/src/pathman_workers.c +++ b/src/pathman_workers.c @@ -213,7 +213,7 @@ start_bg_worker(const char bgworker_name[BGW_MAXLEN], case BGW_PM_DIED: ereport(ERROR, (errmsg("Postmaster died during the pg_pathman background worker process"), - errhint("More details may be available in the server log."))); + errhint("More details may be available in the server log."))); break; default: @@ -300,9 +300,10 @@ create_partitions_bg_worker(Oid relid, Datum value, Oid value_type) dsm_detach(segment); if (child_oid == InvalidOid) - elog(ERROR, - "Attempt to append new partitions to relation \"%s\" failed", - get_rel_name_or_relid(relid)); + ereport(ERROR, + (errmsg("Attempt to spawn new partitions of relation \"%s\" failed", + get_rel_name_or_relid(relid)), + errhint("See server log for more details."))); return child_oid; } diff --git a/src/pl_funcs.c b/src/pl_funcs.c index be9a19e01c..38a0d26aec 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -106,13 +106,13 @@ on_partitions_created_internal(Oid partitioned_table, bool add_callbacks) static void on_partitions_updated_internal(Oid partitioned_table, bool add_callbacks) { - bool found; + bool entry_found; elog(DEBUG2, "on_partitions_updated() [add_callbacks = %s] " "triggered for relation %u", (add_callbacks ? "true" : "false"), partitioned_table); - invalidate_pathman_relation_info(partitioned_table, &found); + invalidate_pathman_relation_info(partitioned_table, &entry_found); } static void diff --git a/src/relation_info.c b/src/relation_info.c index ab9669fcd0..7028726535 100644 --- a/src/relation_info.c +++ b/src/relation_info.c @@ -15,6 +15,7 @@ #include "access/htup_details.h" #include "access/xact.h" +#include "catalog/catalog.h" #include "catalog/indexing.h" #include "catalog/pg_inherits.h" #include "miscadmin.h" @@ -242,12 +243,11 @@ get_pathman_relation_info(Oid relid) part_type = DatumGetPartType(values[Anum_pathman_config_parttype - 1]); attname = TextDatumGetCString(values[Anum_pathman_config_attname - 1]); - /* Refresh partitioned table cache entry */ + /* Refresh partitioned table cache entry (might turn NULL) */ /* TODO: possible refactoring, pass found 'prel' instead of searching */ prel = refresh_pathman_relation_info(relid, part_type, attname); - Assert(PrelIsValid(prel)); /* it MUST be valid if we got here */ } /* Else clear remaining cache entry */ else remove_pathman_relation_info(relid); @@ -346,7 +346,7 @@ finish_delayed_invalidation(void) /* Handle the probable 'DROP EXTENSION' case */ if (delayed_shutdown) { - Oid cur_pathman_config_relid; + Oid cur_pathman_config_relid; /* Unset 'shutdown' flag */ delayed_shutdown = false; @@ -376,9 +376,14 @@ finish_delayed_invalidation(void) { Oid parent = lfirst_oid(lc); + /* Skip if it's a TOAST table */ + if (IsToastNamespace(get_rel_namespace(parent))) + continue; + if (!pathman_config_contains_relation(parent, NULL, NULL, NULL)) remove_pathman_relation_info(parent); else + /* get_pathman_relation_info() will refresh this entry */ invalidate_pathman_relation_info(parent, NULL); } @@ -387,6 +392,10 @@ finish_delayed_invalidation(void) { Oid vague_rel = lfirst_oid(lc); + /* Skip if it's a TOAST table */ + if (IsToastNamespace(get_rel_namespace(vague_rel))) + continue; + /* It might be a partitioned table or a partition */ if (!try_perform_parent_refresh(vague_rel)) { @@ -656,7 +665,7 @@ shout_if_prel_is_invalid(Oid parent_oid, PartType expected_part_type) { if (!prel) - elog(ERROR, "relation \"%s\" is not partitioned by pg_pathman", + elog(ERROR, "relation \"%s\" has no partitions", get_rel_name_or_relid(parent_oid)); if (!PrelIsValid(prel)) From 593f47aade617f13213ed4a16107cfa014567c7c Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 6 Oct 2016 03:16:46 +0300 Subject: [PATCH 180/184] drop_range_partition(): check that it's indeed a RANGE partition, allow unprivileged user to change GUC variable pg_pathman_enable_partition_filter --- README.md | 2 +- range.sql | 13 ++++++++++++- src/partition_filter.c | 2 +- src/pl_funcs.c | 2 +- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 59fb3cf456..52cf0106b7 100644 --- a/README.md +++ b/README.md @@ -533,7 +533,7 @@ SELECT detach_range_partition('journal_archive'); ``` Here's an example of the query performing filtering by partitioning key: -``` +```plpgsql SELECT * FROM journal WHERE dt >= '2015-06-01' AND dt < '2015-06-03'; id | dt | level | msg --------+---------------------+-------+---------------------------------- diff --git a/range.sql b/range.sql index 2dbd762773..dfad1fdcd8 100644 --- a/range.sql +++ b/range.sql @@ -582,7 +582,7 @@ BEGIN /* Check if this is a RANGE partition */ IF v_part_type != 2 THEN - RAISE EXCEPTION 'specified partition isn''t RANGE partition'; + RAISE EXCEPTION '"%" is not a RANGE partition', p_partition::TEXT; END IF; v_atttype = @extschema@.get_attribute_type(v_parent, v_attname); @@ -1036,11 +1036,22 @@ DECLARE part_name TEXT; v_relkind CHAR; v_rows BIGINT; + v_part_type INTEGER; BEGIN parent_relid := @extschema@.get_parent_of_partition(p_partition); part_name := p_partition::TEXT; /* save the name to be returned */ + SELECT parttype + FROM @extschema@.pathman_config + WHERE partrel = parent_relid + INTO v_part_type; + + /* Check if this is a RANGE partition */ + IF v_part_type != 2 THEN + RAISE EXCEPTION '"%" is not a RANGE partition', p_partition::TEXT; + END IF; + /* Acquire lock on parent */ PERFORM @extschema@.lock_partitioned_relation(parent_relid); diff --git a/src/partition_filter.c b/src/partition_filter.c index fdb2a3726b..51f09923e0 100644 --- a/src/partition_filter.c +++ b/src/partition_filter.c @@ -92,7 +92,7 @@ init_partition_filter_static_data(void) NULL, &pg_pathman_enable_partition_filter, true, - PGC_SUSET, + PGC_USERSET, 0, NULL, NULL, diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 38a0d26aec..19ca873ac5 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -173,7 +173,7 @@ get_parent_of_partition_pl(PG_FUNCTION_ARGS) PG_RETURN_OID(parent); else { - elog(ERROR, "\%s\" is not pg_pathman's partition", + elog(ERROR, "\"%s\" is not a partition", get_rel_name_or_relid(partition)); PG_RETURN_NULL(); From 84b3b69e47fbaace31f8a29275e274b1c5fd0154 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 6 Oct 2016 03:26:37 +0300 Subject: [PATCH 181/184] add more tips to README.md --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index 52cf0106b7..afed526a5c 100644 --- a/README.md +++ b/README.md @@ -430,6 +430,22 @@ SELECT * FROM pathman_concurrent_part_tasks; (1 row) ``` +- `pathman_partition_list` in conjunction with `drop_range_partition()` can be used to drop RANGE partitions in a more flexible way compared to good old `DROP TABLE`: +```plpgsql +SELECT drop_range_partition(partition, false) /* move data to parent */ +FROM pathman_partition_list +WHERE parent = 'part_test'::regclass AND range_min::int < 500; +NOTICE: 1 rows copied from part_test_11 +NOTICE: 100 rows copied from part_test_1 +NOTICE: 100 rows copied from part_test_2 + drop_range_partition +---------------------- + dummy_test_11 + dummy_test_1 + dummy_test_2 +(3 rows) +``` + ### HASH partitioning Consider an example of HASH partitioning. First create a table with some integer column: ```plpgsql From c61cb62284366cc0d6acec1892dafc8a43106332 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 6 Oct 2016 17:11:34 +0400 Subject: [PATCH 182/184] add more info on init_callbacks to README.md --- README.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index afed526a5c..4b7d48ac10 100644 --- a/README.md +++ b/README.md @@ -209,7 +209,24 @@ Enable/disable auto partition propagation (only for RANGE partitioning). It is e ```plpgsql set_init_callback(relation REGCLASS, callback REGPROC DEFAULT 0) ``` -Set partition creation callback to be invoked for each attached or created partition (both HASH and RANGE). +Set partition creation callback to be invoked for each attached or created partition (both HASH and RANGE). The callback must have the following signature: `part_init_callback(args JSONB) RETURNS VOID`. Parameter `arg` consists of several fields whose presence depends on partitioning type: +```json +/* RANGE-partitioned table abc (child abc_4) */ +{ + "parent": "abc", + "parttype": "2", + "partition": "abc_4", + "range_max": "401", + "range_min": "301" +} + +/* HASH-partitioned table abc (child abc_0) */ +{ + "parent": "abc", + "parttype": "1", + "partition": "abc_0" +} +``` ## Views and tables From 4b2382bbcb5652122e9971bd2030f0cacface55a Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 6 Oct 2016 16:43:38 +0300 Subject: [PATCH 183/184] add more comments, remove completed tasks from README.md --- README.md | 1 - src/pathman.h | 12 ++++++------ src/pl_funcs.c | 10 ++++++++-- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 4b7d48ac10..ec176bb822 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,6 @@ More interesting features are yet to come. Stay tuned! ## Roadmap - * Provide a way to create user-defined partition creation\destruction callbacks (issue [#22](https://github.com/postgrespro/pg_pathman/issues/22)) * Implement LIST partitioning scheme; * Optimize hash join (both tables are partitioned by join key). diff --git a/src/pathman.h b/src/pathman.h index c078ed1045..84d71dd9ab 100644 --- a/src/pathman.h +++ b/src/pathman.h @@ -66,12 +66,12 @@ */ #define PATHMAN_PARTITION_LIST "pathman_partition_list" #define Natts_pathman_partition_list 6 -#define Anum_pathman_pl_parent 1 -#define Anum_pathman_pl_partition 2 -#define Anum_pathman_pl_parttype 3 -#define Anum_pathman_pl_partattr 4 -#define Anum_pathman_pl_range_min 5 -#define Anum_pathman_pl_range_max 6 +#define Anum_pathman_pl_parent 1 /* partitioned relation (regclass) */ +#define Anum_pathman_pl_partition 2 /* child partition (regclass) */ +#define Anum_pathman_pl_parttype 3 /* partitioning type (1|2) */ +#define Anum_pathman_pl_partattr 4 /* partitioned column (text) */ +#define Anum_pathman_pl_range_min 5 /* partition's min value */ +#define Anum_pathman_pl_range_max 6 /* partition's max value */ /* diff --git a/src/pl_funcs.c b/src/pl_funcs.c index 19ca873ac5..a7c19b2e3b 100644 --- a/src/pl_funcs.c +++ b/src/pl_funcs.c @@ -62,15 +62,18 @@ PG_FUNCTION_INFO_V1( invoke_on_partition_created_callback ); PG_FUNCTION_INFO_V1( debug_capture ); +/* + * User context for function show_partition_list_internal(). + */ typedef struct { Relation pathman_config; HeapScanDesc pathman_config_scan; Snapshot snapshot; - const PartRelationInfo *current_prel; + const PartRelationInfo *current_prel; /* selected PartRelationInfo */ - uint32 child_number; + uint32 child_number; /* child we're looking at */ } show_partition_list_cxt; @@ -348,6 +351,7 @@ show_partition_list_internal(PG_FUNCTION_ARGS) /* Alias to 'usercxt->current_prel' */ prel = usercxt->current_prel; + /* If we've run out of partitions, switch to the next 'prel' */ if (usercxt->child_number >= PrelChildrenCount(prel)) { usercxt->current_prel = NULL; @@ -359,10 +363,12 @@ show_partition_list_internal(PG_FUNCTION_ARGS) partattr_cstr = get_attname(PrelParentRelid(prel), prel->attnum); if (!partattr_cstr) { + /* Parent does not exist, go to the next 'prel' */ usercxt->current_prel = NULL; continue; } + /* Fill in common values */ values[Anum_pathman_pl_parent - 1] = PrelParentRelid(prel); values[Anum_pathman_pl_parttype - 1] = prel->parttype; values[Anum_pathman_pl_partattr - 1] = CStringGetTextDatum(partattr_cstr); From ad87ba189465db55544905be1faa731e3d8091ae Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 6 Oct 2016 20:40:24 +0300 Subject: [PATCH 184/184] prevent use-after-free reported by sparc64 --- src/pg_pathman.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pg_pathman.c b/src/pg_pathman.c index 9f777c3aea..113df1b4bf 100644 --- a/src/pg_pathman.c +++ b/src/pg_pathman.c @@ -874,6 +874,10 @@ create_partitions_internal(Oid relid, Datum value, Oid value_type) min_rvalue = PrelGetRangesArray(prel)[0].min; max_rvalue = PrelGetRangesArray(prel)[PrelLastChild(prel)].max; + /* Copy datums on order to protect them from cache invalidation */ + min_rvalue = datumCopy(min_rvalue, prel->attbyval, prel->attlen); + max_rvalue = datumCopy(max_rvalue, prel->attbyval, prel->attlen); + /* Retrieve interval as TEXT from tuple */ interval_text = values[Anum_pathman_config_range_interval - 1];