@@ -2379,70 +2379,95 @@ show_sort_info(SortState *sortstate, ExplainState *es)
2379
2379
static void
2380
2380
show_hash_info (HashState * hashstate , ExplainState * es )
2381
2381
{
2382
- HashInstrumentation * hinstrument = NULL ;
2382
+ HashInstrumentation hinstrument = { 0 } ;
2383
2383
2384
2384
/*
2385
2385
* In a parallel query, the leader process may or may not have run the
2386
2386
* hash join, and even if it did it may not have built a hash table due to
2387
2387
* timing (if it started late it might have seen no tuples in the outer
2388
2388
* relation and skipped building the hash table). Therefore we have to be
2389
- * prepared to get instrumentation data from a worker if there is no hash
2390
- * table.
2389
+ * prepared to get instrumentation data from all participants.
2391
2390
*/
2392
2391
if (hashstate -> hashtable )
2393
- {
2394
- hinstrument = (HashInstrumentation * )
2395
- palloc (sizeof (HashInstrumentation ));
2396
- ExecHashGetInstrumentation (hinstrument , hashstate -> hashtable );
2397
- }
2398
- else if (hashstate -> shared_info )
2392
+ ExecHashGetInstrumentation (& hinstrument , hashstate -> hashtable );
2393
+
2394
+ /*
2395
+ * Merge results from workers. In the parallel-oblivious case, the
2396
+ * results from all participants should be identical, except where
2397
+ * participants didn't run the join at all so have no data. In the
2398
+ * parallel-aware case, we need to consider all the results. Each worker
2399
+ * may have seen a different subset of batches and we want to find the
2400
+ * highest memory usage for any one batch across all batches.
2401
+ */
2402
+ if (hashstate -> shared_info )
2399
2403
{
2400
2404
SharedHashInfo * shared_info = hashstate -> shared_info ;
2401
- int i ;
2405
+ int i ;
2402
2406
2403
- /* Find the first worker that built a hash table. */
2404
2407
for (i = 0 ; i < shared_info -> num_workers ; ++ i )
2405
2408
{
2406
- if (shared_info -> hinstrument [i ].nbatch > 0 )
2409
+ HashInstrumentation * worker_hi = & shared_info -> hinstrument [i ];
2410
+
2411
+ if (worker_hi -> nbatch > 0 )
2407
2412
{
2408
- hinstrument = & shared_info -> hinstrument [i ];
2409
- break ;
2413
+ /*
2414
+ * Every participant should agree on the buckets, so to be
2415
+ * sure we have a value we'll just overwrite each time.
2416
+ */
2417
+ hinstrument .nbuckets = worker_hi -> nbuckets ;
2418
+ hinstrument .nbuckets_original = worker_hi -> nbuckets_original ;
2419
+
2420
+ /*
2421
+ * Normally every participant should agree on the number of
2422
+ * batches too, but it's possible for a backend that started
2423
+ * late and missed the whole join not to have the final nbatch
2424
+ * number. So we'll take the largest number.
2425
+ */
2426
+ hinstrument .nbatch = Max (hinstrument .nbatch , worker_hi -> nbatch );
2427
+ hinstrument .nbatch_original = worker_hi -> nbatch_original ;
2428
+
2429
+ /*
2430
+ * In a parallel-aware hash join, for now we report the
2431
+ * maximum peak memory reported by any worker.
2432
+ */
2433
+ hinstrument .space_peak =
2434
+ Max (hinstrument .space_peak , worker_hi -> space_peak );
2410
2435
}
2411
2436
}
2412
2437
}
2413
2438
2414
- if (hinstrument )
2439
+ if (hinstrument . nbatch > 0 )
2415
2440
{
2416
- long spacePeakKb = (hinstrument -> space_peak + 1023 ) / 1024 ;
2441
+ long spacePeakKb = (hinstrument . space_peak + 1023 ) / 1024 ;
2417
2442
2418
2443
if (es -> format != EXPLAIN_FORMAT_TEXT )
2419
2444
{
2420
- ExplainPropertyLong ("Hash Buckets" , hinstrument -> nbuckets , es );
2445
+ ExplainPropertyLong ("Hash Buckets" , hinstrument . nbuckets , es );
2421
2446
ExplainPropertyLong ("Original Hash Buckets" ,
2422
- hinstrument -> nbuckets_original , es );
2423
- ExplainPropertyLong ("Hash Batches" , hinstrument -> nbatch , es );
2447
+ hinstrument . nbuckets_original , es );
2448
+ ExplainPropertyLong ("Hash Batches" , hinstrument . nbatch , es );
2424
2449
ExplainPropertyLong ("Original Hash Batches" ,
2425
- hinstrument -> nbatch_original , es );
2450
+ hinstrument . nbatch_original , es );
2426
2451
ExplainPropertyLong ("Peak Memory Usage" , spacePeakKb , es );
2427
2452
}
2428
- else if (hinstrument -> nbatch_original != hinstrument -> nbatch ||
2429
- hinstrument -> nbuckets_original != hinstrument -> nbuckets )
2453
+ else if (hinstrument . nbatch_original != hinstrument . nbatch ||
2454
+ hinstrument . nbuckets_original != hinstrument . nbuckets )
2430
2455
{
2431
2456
appendStringInfoSpaces (es -> str , es -> indent * 2 );
2432
2457
appendStringInfo (es -> str ,
2433
2458
"Buckets: %d (originally %d) Batches: %d (originally %d) Memory Usage: %ldkB\n" ,
2434
- hinstrument -> nbuckets ,
2435
- hinstrument -> nbuckets_original ,
2436
- hinstrument -> nbatch ,
2437
- hinstrument -> nbatch_original ,
2459
+ hinstrument . nbuckets ,
2460
+ hinstrument . nbuckets_original ,
2461
+ hinstrument . nbatch ,
2462
+ hinstrument . nbatch_original ,
2438
2463
spacePeakKb );
2439
2464
}
2440
2465
else
2441
2466
{
2442
2467
appendStringInfoSpaces (es -> str , es -> indent * 2 );
2443
2468
appendStringInfo (es -> str ,
2444
2469
"Buckets: %d Batches: %d Memory Usage: %ldkB\n" ,
2445
- hinstrument -> nbuckets , hinstrument -> nbatch ,
2470
+ hinstrument . nbuckets , hinstrument . nbatch ,
2446
2471
spacePeakKb );
2447
2472
}
2448
2473
}
0 commit comments