Skip to content

Commit 33001fd

Browse files
committed
Collect JIT instrumentation from workers.
Previously, when using parallel query, EXPLAIN (ANALYZE)'s JIT compilation timings did not include the overhead from doing so on the workers. Fix that. We do so by simply aggregating the cost of doing JIT compilation on workers and the leader together. Arguably that's not quite accurate, because the total time spend doing so is spent in parallel - but it's hard to do much better. For additional detail, when VERBOSE is specified, the stats for workers are displayed separately. Author: Amit Khandekar and Andres Freund Discussion: https://postgr.es/m/CAJ3gD9eLrz51RK_gTkod+71iDcjpB_N8eC6vU2AW-VicsAERpQ@mail.gmail.com Backpatch: 11-
1 parent 5e22171 commit 33001fd

File tree

11 files changed

+211
-46
lines changed

11 files changed

+211
-46
lines changed

contrib/auto_explain/auto_explain.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,9 +362,9 @@ explain_ExecutorEnd(QueryDesc *queryDesc)
362362
ExplainPrintPlan(es, queryDesc);
363363
if (es->analyze && auto_explain_log_triggers)
364364
ExplainPrintTriggers(es, queryDesc);
365-
if (queryDesc->estate->es_jit && es->costs &&
366-
queryDesc->estate->es_jit->created_functions > 0)
367-
ExplainPrintJIT(es, queryDesc);
365+
if (es->costs)
366+
ExplainPrintJIT(es, queryDesc->estate->es_jit_flags,
367+
queryDesc->estate->es_jit_combined_instr, -1);
368368
ExplainEndOutput(es);
369369

370370
/* Remove last line break */

src/backend/commands/explain.c

Lines changed: 59 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -563,9 +563,9 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
563563
* depending on build options. Might want to separate that out from COSTS
564564
* at a later stage.
565565
*/
566-
if (queryDesc->estate->es_jit && es->costs &&
567-
queryDesc->estate->es_jit->created_functions > 0)
568-
ExplainPrintJIT(es, queryDesc);
566+
if (es->costs)
567+
ExplainPrintJIT(es, queryDesc->estate->es_jit_flags,
568+
queryDesc->estate->es_jit_combined_instr, -1);
569569

570570
/*
571571
* Close down the query and free resources. Include time for this in the
@@ -691,78 +691,90 @@ ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc)
691691
/*
692692
* ExplainPrintJIT -
693693
* Append information about JITing to es->str.
694+
*
695+
* Can be used to print the JIT instrumentation of the backend (worker_num =
696+
* -1) or that of a specific worker (worker_num = ...).
694697
*/
695698
void
696-
ExplainPrintJIT(ExplainState *es, QueryDesc *queryDesc)
699+
ExplainPrintJIT(ExplainState *es, int jit_flags,
700+
JitInstrumentation *ji, int worker_num)
697701
{
698-
JitContext *jc = queryDesc->estate->es_jit;
699702
instr_time total_time;
703+
bool for_workers = (worker_num >= 0);
704+
705+
/* don't print information if no JITing happened */
706+
if (!ji || ji->created_functions == 0)
707+
return;
700708

701709
/* calculate total time */
702710
INSTR_TIME_SET_ZERO(total_time);
703-
INSTR_TIME_ADD(total_time, jc->generation_counter);
704-
INSTR_TIME_ADD(total_time, jc->inlining_counter);
705-
INSTR_TIME_ADD(total_time, jc->optimization_counter);
706-
INSTR_TIME_ADD(total_time, jc->emission_counter);
711+
INSTR_TIME_ADD(total_time, ji->generation_counter);
712+
INSTR_TIME_ADD(total_time, ji->inlining_counter);
713+
INSTR_TIME_ADD(total_time, ji->optimization_counter);
714+
INSTR_TIME_ADD(total_time, ji->emission_counter);
707715

708716
ExplainOpenGroup("JIT", "JIT", true, es);
709717

710718
/* for higher density, open code the text output format */
711719
if (es->format == EXPLAIN_FORMAT_TEXT)
712720
{
713721
appendStringInfoSpaces(es->str, es->indent * 2);
714-
appendStringInfo(es->str, "JIT:\n");
722+
if (for_workers)
723+
appendStringInfo(es->str, "JIT for worker %u:\n", worker_num);
724+
else
725+
appendStringInfo(es->str, "JIT:\n");
715726
es->indent += 1;
716727

717-
ExplainPropertyInteger("Functions", NULL, jc->created_functions, es);
728+
ExplainPropertyInteger("Functions", NULL, ji->created_functions, es);
718729

719730
appendStringInfoSpaces(es->str, es->indent * 2);
720731
appendStringInfo(es->str, "Options: %s %s, %s %s, %s %s, %s %s\n",
721-
"Inlining", jc->flags & PGJIT_INLINE ? "true" : "false",
722-
"Optimization", jc->flags & PGJIT_OPT3 ? "true" : "false",
723-
"Expressions", jc->flags & PGJIT_EXPR ? "true" : "false",
724-
"Deforming", jc->flags & PGJIT_DEFORM ? "true" : "false");
732+
"Inlining", jit_flags & PGJIT_INLINE ? "true" : "false",
733+
"Optimization", jit_flags & PGJIT_OPT3 ? "true" : "false",
734+
"Expressions", jit_flags & PGJIT_EXPR ? "true" : "false",
735+
"Deforming", jit_flags & PGJIT_DEFORM ? "true" : "false");
725736

726737
if (es->analyze && es->timing)
727738
{
728739
appendStringInfoSpaces(es->str, es->indent * 2);
729740
appendStringInfo(es->str,
730741
"Timing: %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms\n",
731-
"Generation", 1000.0 * INSTR_TIME_GET_DOUBLE(jc->generation_counter),
732-
"Inlining", 1000.0 * INSTR_TIME_GET_DOUBLE(jc->inlining_counter),
733-
"Optimization", 1000.0 * INSTR_TIME_GET_DOUBLE(jc->optimization_counter),
734-
"Emission", 1000.0 * INSTR_TIME_GET_DOUBLE(jc->emission_counter),
742+
"Generation", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter),
743+
"Inlining", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->inlining_counter),
744+
"Optimization", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->optimization_counter),
745+
"Emission", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->emission_counter),
735746
"Total", 1000.0 * INSTR_TIME_GET_DOUBLE(total_time));
736747
}
737748

738749
es->indent -= 1;
739750
}
740751
else
741752
{
742-
ExplainPropertyInteger("Functions", NULL, jc->created_functions, es);
753+
ExplainPropertyInteger("Worker Number", NULL, worker_num, es);
754+
ExplainPropertyInteger("Functions", NULL, ji->created_functions, es);
743755

744756
ExplainOpenGroup("Options", "Options", true, es);
745-
ExplainPropertyBool("Inlining", jc->flags & PGJIT_INLINE, es);
746-
ExplainPropertyBool("Optimization", jc->flags & PGJIT_OPT3, es);
747-
ExplainPropertyBool("Expressions", jc->flags & PGJIT_EXPR, es);
748-
ExplainPropertyBool("Deforming", jc->flags & PGJIT_DEFORM, es);
757+
ExplainPropertyBool("Inlining", jit_flags & PGJIT_INLINE, es);
758+
ExplainPropertyBool("Optimization", jit_flags & PGJIT_OPT3, es);
759+
ExplainPropertyBool("Expressions", jit_flags & PGJIT_EXPR, es);
760+
ExplainPropertyBool("Deforming", jit_flags & PGJIT_DEFORM, es);
749761
ExplainCloseGroup("Options", "Options", true, es);
750762

751763
if (es->analyze && es->timing)
752764
{
753765
ExplainOpenGroup("Timing", "Timing", true, es);
754766

755767
ExplainPropertyFloat("Generation", "ms",
756-
1000.0 * INSTR_TIME_GET_DOUBLE(jc->generation_counter),
768+
1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter),
757769
3, es);
758770
ExplainPropertyFloat("Inlining", "ms",
759-
1000.0 * INSTR_TIME_GET_DOUBLE(jc->inlining_counter),
771+
1000.0 * INSTR_TIME_GET_DOUBLE(ji->inlining_counter),
760772
3, es);
761773
ExplainPropertyFloat("Optimization", "ms",
762-
1000.0 * INSTR_TIME_GET_DOUBLE(jc->optimization_counter),
774+
1000.0 * INSTR_TIME_GET_DOUBLE(ji->optimization_counter),
763775
3, es);
764776
ExplainPropertyFloat("Emission", "ms",
765-
1000.0 * INSTR_TIME_GET_DOUBLE(jc->emission_counter),
777+
1000.0 * INSTR_TIME_GET_DOUBLE(ji->emission_counter),
766778
3, es);
767779
ExplainPropertyFloat("Total", "ms",
768780
1000.0 * INSTR_TIME_GET_DOUBLE(total_time),
@@ -1554,6 +1566,25 @@ ExplainNode(PlanState *planstate, List *ancestors,
15541566
ExplainPropertyInteger("Workers Launched", NULL,
15551567
nworkers, es);
15561568
}
1569+
1570+
/*
1571+
* Print per-worker Jit instrumentation. Use same conditions
1572+
* as for the leader's JIT instrumentation, see comment there.
1573+
*/
1574+
if (es->costs && es->verbose &&
1575+
outerPlanState(planstate)->worker_jit_instrument)
1576+
{
1577+
PlanState *child = outerPlanState(planstate);
1578+
int n;
1579+
SharedJitInstrumentation *w = child->worker_jit_instrument;
1580+
1581+
for (n = 0; n < w->num_workers; ++n)
1582+
{
1583+
ExplainPrintJIT(es, child->state->es_jit_flags,
1584+
&w->jit_instr[n], n);
1585+
}
1586+
}
1587+
15571588
if (gather->single_copy || es->format != EXPLAIN_FORMAT_TEXT)
15581589
ExplainPropertyBool("Single Copy", gather->single_copy, es);
15591590
}

src/backend/executor/execMain.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "executor/execdebug.h"
4949
#include "executor/nodeSubplan.h"
5050
#include "foreign/fdwapi.h"
51+
#include "jit/jit.h"
5152
#include "mb/pg_wchar.h"
5253
#include "miscadmin.h"
5354
#include "optimizer/clauses.h"
@@ -494,6 +495,21 @@ standard_ExecutorEnd(QueryDesc *queryDesc)
494495

495496
ExecEndPlan(queryDesc->planstate, estate);
496497

498+
/*
499+
* If this process has done JIT, either merge stats into worker stats, or
500+
* use this process' stats as the global stats if no parallelism was used
501+
* / no workers did JIT.
502+
*/
503+
if (estate->es_instrument && queryDesc->estate->es_jit)
504+
{
505+
if (queryDesc->estate->es_jit_combined_instr)
506+
InstrJitAgg(queryDesc->estate->es_jit_combined_instr,
507+
&queryDesc->estate->es_jit->instr);
508+
else
509+
queryDesc->estate->es_jit_combined_instr =
510+
&queryDesc->estate->es_jit->instr;
511+
}
512+
497513
/* do away with our snapshots */
498514
UnregisterSnapshot(estate->es_snapshot);
499515
UnregisterSnapshot(estate->es_crosscheck_snapshot);

src/backend/executor/execParallel.c

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "executor/nodeSort.h"
3838
#include "executor/nodeSubplan.h"
3939
#include "executor/tqueue.h"
40+
#include "jit/jit.h"
4041
#include "nodes/nodeFuncs.h"
4142
#include "optimizer/planmain.h"
4243
#include "optimizer/planner.h"
@@ -62,6 +63,7 @@
6263
#define PARALLEL_KEY_INSTRUMENTATION UINT64CONST(0xE000000000000006)
6364
#define PARALLEL_KEY_DSA UINT64CONST(0xE000000000000007)
6465
#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xE000000000000008)
66+
#define PARALLEL_KEY_JIT_INSTRUMENTATION UINT64CONST(0xE000000000000009)
6567

6668
#define PARALLEL_TUPLE_QUEUE_SIZE 65536
6769

@@ -573,9 +575,11 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
573575
char *paramlistinfo_space;
574576
BufferUsage *bufusage_space;
575577
SharedExecutorInstrumentation *instrumentation = NULL;
578+
SharedJitInstrumentation *jit_instrumentation = NULL;
576579
int pstmt_len;
577580
int paramlistinfo_len;
578581
int instrumentation_len = 0;
582+
int jit_instrumentation_len = 0;
579583
int instrument_offset = 0;
580584
Size dsa_minsize = dsa_minimum_size();
581585
char *query_string;
@@ -669,6 +673,16 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
669673
mul_size(e.nnodes, nworkers));
670674
shm_toc_estimate_chunk(&pcxt->estimator, instrumentation_len);
671675
shm_toc_estimate_keys(&pcxt->estimator, 1);
676+
677+
/* Estimate space for JIT instrumentation, if required. */
678+
if (estate->es_jit_flags != PGJIT_NONE)
679+
{
680+
jit_instrumentation_len =
681+
offsetof(SharedJitInstrumentation, jit_instr) +
682+
sizeof(JitInstrumentation) * nworkers;
683+
shm_toc_estimate_chunk(&pcxt->estimator, jit_instrumentation_len);
684+
shm_toc_estimate_keys(&pcxt->estimator, 1);
685+
}
672686
}
673687

674688
/* Estimate space for DSA area. */
@@ -742,6 +756,18 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
742756
shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTRUMENTATION,
743757
instrumentation);
744758
pei->instrumentation = instrumentation;
759+
760+
if (estate->es_jit_flags != PGJIT_NONE)
761+
{
762+
jit_instrumentation = shm_toc_allocate(pcxt->toc,
763+
jit_instrumentation_len);
764+
jit_instrumentation->num_workers = nworkers;
765+
memset(jit_instrumentation->jit_instr, 0,
766+
sizeof(JitInstrumentation) * nworkers);
767+
shm_toc_insert(pcxt->toc, PARALLEL_KEY_JIT_INSTRUMENTATION,
768+
jit_instrumentation);
769+
pei->jit_instrumentation = jit_instrumentation;
770+
}
745771
}
746772

747773
/*
@@ -1003,6 +1029,46 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
10031029
instrumentation);
10041030
}
10051031

1032+
/*
1033+
* Add up the workers' JIT instrumentation from dynamic shared memory.
1034+
*/
1035+
static void
1036+
ExecParallelRetrieveJitInstrumentation(PlanState *planstate,
1037+
SharedJitInstrumentation *shared_jit)
1038+
{
1039+
JitInstrumentation *combined;
1040+
int ibytes;
1041+
1042+
int n;
1043+
1044+
/*
1045+
* Accumulate worker JIT instrumentation into the combined JIT
1046+
* instrumentation, allocating it if required. Note this is kept separate
1047+
* from the leader's own instrumentation.
1048+
*/
1049+
if (!planstate->state->es_jit_combined_instr)
1050+
planstate->state->es_jit_combined_instr =
1051+
MemoryContextAllocZero(planstate->state->es_query_cxt, sizeof(JitInstrumentation));
1052+
combined = planstate->state->es_jit_combined_instr;
1053+
1054+
/* Accummulate all the workers' instrumentations. */
1055+
for (n = 0; n < shared_jit->num_workers; ++n)
1056+
InstrJitAgg(combined, &shared_jit->jit_instr[n]);
1057+
1058+
/*
1059+
* Store the per-worker detail.
1060+
*
1061+
* Similar to ExecParallelRetrieveInstrumentation(), allocate the
1062+
* instrumentation in per-query context.
1063+
*/
1064+
ibytes = offsetof(SharedJitInstrumentation, jit_instr)
1065+
+ mul_size(shared_jit->num_workers, sizeof(JitInstrumentation));
1066+
planstate->worker_jit_instrument =
1067+
MemoryContextAlloc(planstate->state->es_query_cxt, ibytes);
1068+
1069+
memcpy(planstate->worker_jit_instrument, shared_jit, ibytes);
1070+
}
1071+
10061072
/*
10071073
* Finish parallel execution. We wait for parallel workers to finish, and
10081074
* accumulate their buffer usage.
@@ -1068,6 +1134,11 @@ ExecParallelCleanup(ParallelExecutorInfo *pei)
10681134
ExecParallelRetrieveInstrumentation(pei->planstate,
10691135
pei->instrumentation);
10701136

1137+
/* Accumulate JIT instrumentation, if any. */
1138+
if (pei->jit_instrumentation)
1139+
ExecParallelRetrieveJitInstrumentation(pei->planstate,
1140+
pei->jit_instrumentation);
1141+
10711142
/* Free any serialized parameters. */
10721143
if (DsaPointerIsValid(pei->param_exec))
10731144
{
@@ -1274,6 +1345,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
12741345
DestReceiver *receiver;
12751346
QueryDesc *queryDesc;
12761347
SharedExecutorInstrumentation *instrumentation;
1348+
SharedJitInstrumentation *jit_instrumentation;
12771349
int instrument_options = 0;
12781350
void *area_space;
12791351
dsa_area *area;
@@ -1287,6 +1359,8 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
12871359
instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION, true);
12881360
if (instrumentation != NULL)
12891361
instrument_options = instrumentation->instrument_options;
1362+
jit_instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_JIT_INSTRUMENTATION,
1363+
true);
12901364
queryDesc = ExecParallelGetQueryDesc(toc, receiver, instrument_options);
12911365

12921366
/* Setting debug_query_string for individual workers */
@@ -1350,6 +1424,14 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
13501424
ExecParallelReportInstrumentation(queryDesc->planstate,
13511425
instrumentation);
13521426

1427+
/* Report JIT instrumentation data if any */
1428+
if (queryDesc->estate->es_jit && jit_instrumentation != NULL)
1429+
{
1430+
Assert(ParallelWorkerNumber < jit_instrumentation->num_workers);
1431+
jit_instrumentation->jit_instr[ParallelWorkerNumber] =
1432+
queryDesc->estate->es_jit->instr;
1433+
}
1434+
13531435
/* Must do this after capturing instrumentation. */
13541436
ExecutorEnd(queryDesc);
13551437

src/backend/jit/jit.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,17 @@ jit_compile_expr(struct ExprState *state)
182182
return false;
183183
}
184184

185+
/* Aggregate JIT instrumentation information */
186+
void
187+
InstrJitAgg(JitInstrumentation *dst, JitInstrumentation *add)
188+
{
189+
dst->created_functions += add->created_functions;
190+
INSTR_TIME_ADD(dst->generation_counter, add->generation_counter);
191+
INSTR_TIME_ADD(dst->inlining_counter, add->inlining_counter);
192+
INSTR_TIME_ADD(dst->optimization_counter, add->optimization_counter);
193+
INSTR_TIME_ADD(dst->emission_counter, add->emission_counter);
194+
}
195+
185196
static bool
186197
file_exists(const char *name)
187198
{

0 commit comments

Comments
 (0)