Skip to content

Commit b3219c6

Browse files
aio: Add errcontext for processing I/Os for another backend
Push an ErrorContextCallback adding additional detail about the process performing the I/O and the owner of the I/O when those are not the same. For io_method worker, this adds context specifying which process owns the I/O that the I/O worker is processing. For io_method io_uring, this adds context only when a backend is *completing* I/O for another backend. It specifies the pid of the owning process. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/rdml3fpukrqnas7qc5uimtl2fyytrnu6ymc2vjf2zuflbsjuul%40hyizyjsexwmm
1 parent b136db0 commit b3219c6

File tree

2 files changed

+60
-0
lines changed

2 files changed

+60
-0
lines changed

src/backend/storage/aio/method_io_uring.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,14 +302,41 @@ pgaio_uring_submit(uint16 num_staged_ios, PgAioHandle **staged_ios)
302302
return num_staged_ios;
303303
}
304304

305+
static void
306+
pgaio_uring_completion_error_callback(void *arg)
307+
{
308+
ProcNumber owner;
309+
PGPROC *owner_proc;
310+
int32 owner_pid;
311+
PgAioHandle *ioh = arg;
312+
313+
if (!ioh)
314+
return;
315+
316+
/* No need for context if a backend is completing the IO for itself */
317+
if (ioh->owner_procno == MyProcNumber)
318+
return;
319+
320+
owner = ioh->owner_procno;
321+
owner_proc = GetPGProcByNumber(owner);
322+
owner_pid = owner_proc->pid;
323+
324+
errcontext("completing I/O on behalf of process %d", owner_pid);
325+
}
326+
305327
static void
306328
pgaio_uring_drain_locked(PgAioUringContext *context)
307329
{
308330
int ready;
309331
int orig_ready;
332+
ErrorContextCallback errcallback = {0};
310333

311334
Assert(LWLockHeldByMeInMode(&context->completion_lock, LW_EXCLUSIVE));
312335

336+
errcallback.callback = pgaio_uring_completion_error_callback;
337+
errcallback.previous = error_context_stack;
338+
error_context_stack = &errcallback;
339+
313340
/*
314341
* Don't drain more events than available right now. Otherwise it's
315342
* plausible that one backend could get stuck, for a while, receiving CQEs
@@ -337,9 +364,11 @@ pgaio_uring_drain_locked(PgAioUringContext *context)
337364
PgAioHandle *ioh;
338365

339366
ioh = io_uring_cqe_get_data(cqe);
367+
errcallback.arg = ioh;
340368
io_uring_cqe_seen(&context->io_uring_ring, cqe);
341369

342370
pgaio_io_process_completion(ioh, cqe->res);
371+
errcallback.arg = NULL;
343372
}
344373

345374
END_CRIT_SECTION();
@@ -348,6 +377,8 @@ pgaio_uring_drain_locked(PgAioUringContext *context)
348377
"drained %d/%d, now expecting %d",
349378
ncqes, orig_ready, io_uring_cq_ready(&context->io_uring_ring));
350379
}
380+
381+
error_context_stack = errcallback.previous;
351382
}
352383

353384
static void

src/backend/storage/aio/method_worker.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,11 +357,33 @@ pgaio_worker_register(void)
357357
on_shmem_exit(pgaio_worker_die, 0);
358358
}
359359

360+
static void
361+
pgaio_worker_error_callback(void *arg)
362+
{
363+
ProcNumber owner;
364+
PGPROC *owner_proc;
365+
int32 owner_pid;
366+
PgAioHandle *ioh = arg;
367+
368+
if (!ioh)
369+
return;
370+
371+
Assert(ioh->owner_procno != MyProcNumber);
372+
Assert(MyBackendType == B_IO_WORKER);
373+
374+
owner = ioh->owner_procno;
375+
owner_proc = GetPGProcByNumber(owner);
376+
owner_pid = owner_proc->pid;
377+
378+
errcontext("I/O worker executing I/O on behalf of process %d", owner_pid);
379+
}
380+
360381
void
361382
IoWorkerMain(const void *startup_data, size_t startup_data_len)
362383
{
363384
sigjmp_buf local_sigjmp_buf;
364385
PgAioHandle *volatile error_ioh = NULL;
386+
ErrorContextCallback errcallback = {0};
365387
volatile int error_errno = 0;
366388
char cmd[128];
367389

@@ -388,6 +410,10 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
388410
sprintf(cmd, "%d", MyIoWorkerId);
389411
set_ps_display(cmd);
390412

413+
errcallback.callback = pgaio_worker_error_callback;
414+
errcallback.previous = error_context_stack;
415+
error_context_stack = &errcallback;
416+
391417
/* see PostgresMain() */
392418
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
393419
{
@@ -471,6 +497,7 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
471497

472498
ioh = &pgaio_ctl->io_handles[io_index];
473499
error_ioh = ioh;
500+
errcallback.arg = ioh;
474501

475502
pgaio_debug_io(DEBUG4, ioh,
476503
"worker %d processing IO",
@@ -511,6 +538,7 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
511538
pgaio_io_perform_synchronously(ioh);
512539

513540
RESUME_INTERRUPTS();
541+
errcallback.arg = NULL;
514542
}
515543
else
516544
{
@@ -522,6 +550,7 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
522550
CHECK_FOR_INTERRUPTS();
523551
}
524552

553+
error_context_stack = errcallback.previous;
525554
proc_exit(0);
526555
}
527556

0 commit comments

Comments
 (0)