Skip to content

Commit 1c6821b

Browse files
committed
Fix and enhance the assertion of no palloc's in a critical section.
The assertion failed if WAL_DEBUG or LWLOCK_STATS was enabled; fix that by using separate memory contexts for the allocations made within those code blocks. This patch introduces a mechanism for marking any memory context as allowed in a critical section. Previously ErrorContext was exempt as a special case. Instead of a blanket exception of the checkpointer process, only exempt the memory context used for the pending ops hash table.
1 parent a749a23 commit 1c6821b

File tree

9 files changed

+147
-42
lines changed

9 files changed

+147
-42
lines changed

src/backend/access/transam/xlog.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
#include "storage/spin.h"
6161
#include "utils/builtins.h"
6262
#include "utils/guc.h"
63+
#include "utils/memutils.h"
6364
#include "utils/ps_status.h"
6465
#include "utils/relmapper.h"
6566
#include "utils/snapmgr.h"
@@ -736,6 +737,10 @@ static bool bgwriterLaunched = false;
736737
static int MyLockNo = 0;
737738
static bool holdingAllLocks = false;
738739

740+
#ifdef WAL_DEBUG
741+
static MemoryContext walDebugCxt = NULL;
742+
#endif
743+
739744
static void readRecoveryCommandFile(void);
740745
static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
741746
static bool recoveryStopsBefore(XLogRecord *record);
@@ -1258,6 +1263,7 @@ begin:;
12581263
if (XLOG_DEBUG)
12591264
{
12601265
StringInfoData buf;
1266+
MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
12611267

12621268
initStringInfo(&buf);
12631269
appendStringInfo(&buf, "INSERT @ %X/%X: ",
@@ -1282,10 +1288,11 @@ begin:;
12821288

12831289
appendStringInfoString(&buf, " - ");
12841290
RmgrTable[rechdr->xl_rmid].rm_desc(&buf, (XLogRecord *) recordbuf.data);
1285-
pfree(recordbuf.data);
12861291
}
12871292
elog(LOG, "%s", buf.data);
1288-
pfree(buf.data);
1293+
1294+
MemoryContextSwitchTo(oldCxt);
1295+
MemoryContextReset(walDebugCxt);
12891296
}
12901297
#endif
12911298

@@ -4807,6 +4814,24 @@ XLOGShmemInit(void)
48074814
char *allocptr;
48084815
int i;
48094816

4817+
#ifdef WAL_DEBUG
4818+
/*
4819+
* Create a memory context for WAL debugging that's exempt from the
4820+
* normal "no pallocs in critical section" rule. Yes, that can lead to a
4821+
* PANIC if an allocation fails, but wal_debug is not for production use
4822+
* anyway.
4823+
*/
4824+
if (walDebugCxt == NULL)
4825+
{
4826+
walDebugCxt = AllocSetContextCreate(TopMemoryContext,
4827+
"WAL Debug",
4828+
ALLOCSET_DEFAULT_MINSIZE,
4829+
ALLOCSET_DEFAULT_INITSIZE,
4830+
ALLOCSET_DEFAULT_MAXSIZE);
4831+
MemoryContextAllowInCriticalSection(walDebugCxt, true);
4832+
}
4833+
#endif
4834+
48104835
ControlFile = (ControlFileData *)
48114836
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
48124837
XLogCtl = (XLogCtlData *)

src/backend/postmaster/checkpointer.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,19 +1305,6 @@ AbsorbFsyncRequests(void)
13051305
if (!AmCheckpointerProcess())
13061306
return;
13071307

1308-
/*
1309-
* We have to PANIC if we fail to absorb all the pending requests (eg,
1310-
* because our hashtable runs out of memory). This is because the system
1311-
* cannot run safely if we are unable to fsync what we have been told to
1312-
* fsync. Fortunately, the hashtable is so small that the problem is
1313-
* quite unlikely to arise in practice.
1314-
*/
1315-
START_CRIT_SECTION();
1316-
1317-
/*
1318-
* We try to avoid holding the lock for a long time by copying the request
1319-
* array.
1320-
*/
13211308
LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE);
13221309

13231310
/* Transfer stats counts into pending pgstats message */
@@ -1327,23 +1314,36 @@ AbsorbFsyncRequests(void)
13271314
CheckpointerShmem->num_backend_writes = 0;
13281315
CheckpointerShmem->num_backend_fsync = 0;
13291316

1317+
/*
1318+
* We try to avoid holding the lock for a long time by copying the request
1319+
* array, and processing the requests after releasing the lock.
1320+
*
1321+
* Once we have cleared the requests from shared memory, we have to PANIC
1322+
* if we then fail to absorb them (eg, because our hashtable runs out of
1323+
* memory). This is because the system cannot run safely if we are unable
1324+
* to fsync what we have been told to fsync. Fortunately, the hashtable
1325+
* is so small that the problem is quite unlikely to arise in practice.
1326+
*/
13301327
n = CheckpointerShmem->num_requests;
13311328
if (n > 0)
13321329
{
13331330
requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest));
13341331
memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest));
13351332
}
1333+
1334+
START_CRIT_SECTION();
1335+
13361336
CheckpointerShmem->num_requests = 0;
13371337

13381338
LWLockRelease(CheckpointerCommLock);
13391339

13401340
for (request = requests; n > 0; request++, n--)
13411341
RememberFsyncRequest(request->rnode, request->forknum, request->segno);
13421342

1343+
END_CRIT_SECTION();
1344+
13431345
if (requests)
13441346
pfree(requests);
1345-
1346-
END_CRIT_SECTION();
13471347
}
13481348

13491349
/*

src/backend/storage/lmgr/lwlock.c

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ typedef struct lwlock_stats
104104
int spin_delay_count;
105105
} lwlock_stats;
106106

107-
static int counts_for_pid = 0;
108107
static HTAB *lwlock_stats_htab;
108+
static lwlock_stats lwlock_stats_dummy;
109109
#endif
110110

111111
#ifdef LOCK_DEBUG
@@ -142,21 +142,39 @@ static void
142142
init_lwlock_stats(void)
143143
{
144144
HASHCTL ctl;
145+
static MemoryContext lwlock_stats_cxt = NULL;
146+
static bool exit_registered = false;
145147

146-
if (lwlock_stats_htab != NULL)
147-
{
148-
hash_destroy(lwlock_stats_htab);
149-
lwlock_stats_htab = NULL;
150-
}
148+
if (lwlock_stats_cxt != NULL)
149+
MemoryContextDelete(lwlock_stats_cxt);
150+
151+
/*
152+
* The LWLock stats will be updated within a critical section, which
153+
* requires allocating new hash entries. Allocations within a critical
154+
* section are normally not allowed because running out of memory would
155+
* lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
156+
* turned on in production, so that's an acceptable risk. The hash entries
157+
* are small, so the risk of running out of memory is minimal in practice.
158+
*/
159+
lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
160+
"LWLock stats",
161+
ALLOCSET_DEFAULT_MINSIZE,
162+
ALLOCSET_DEFAULT_INITSIZE,
163+
ALLOCSET_DEFAULT_MAXSIZE);
164+
MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
151165

152166
MemSet(&ctl, 0, sizeof(ctl));
153167
ctl.keysize = sizeof(lwlock_stats_key);
154168
ctl.entrysize = sizeof(lwlock_stats);
155169
ctl.hash = tag_hash;
170+
ctl.hcxt = lwlock_stats_cxt;
156171
lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
157-
HASH_ELEM | HASH_FUNCTION);
158-
counts_for_pid = MyProcPid;
159-
on_shmem_exit(print_lwlock_stats, 0);
172+
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
173+
if (!exit_registered)
174+
{
175+
on_shmem_exit(print_lwlock_stats, 0);
176+
exit_registered = true;
177+
}
160178
}
161179

162180
static void
@@ -190,9 +208,13 @@ get_lwlock_stats_entry(LWLock *lock)
190208
lwlock_stats *lwstats;
191209
bool found;
192210

193-
/* Set up local count state first time through in a given process */
194-
if (counts_for_pid != MyProcPid)
195-
init_lwlock_stats();
211+
/*
212+
* During shared memory initialization, the hash table doesn't exist yet.
213+
* Stats of that phase aren't very interesting, so just collect operations
214+
* on all locks in a single dummy entry.
215+
*/
216+
if (lwlock_stats_htab == NULL)
217+
return &lwlock_stats_dummy;
196218

197219
/* Fetch or create the entry. */
198220
key.tranche = lock->tranche;
@@ -361,6 +383,16 @@ CreateLWLocks(void)
361383
LWLockRegisterTranche(0, &MainLWLockTranche);
362384
}
363385

386+
/*
387+
* InitLWLockAccess - initialize backend-local state needed to hold LWLocks
388+
*/
389+
void
390+
InitLWLockAccess(void)
391+
{
392+
#ifdef LWLOCK_STATS
393+
init_lwlock_stats();
394+
#endif
395+
}
364396

365397
/*
366398
* LWLockAssign - assign a dynamically-allocated LWLock number

src/backend/storage/lmgr/proc.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,9 @@ InitProcess(void)
411411

412412
/*
413413
* Now that we have a PGPROC, we could try to acquire locks, so initialize
414-
* the deadlock checker.
414+
* local state needed for LWLocks, and the deadlock checker.
415415
*/
416+
InitLWLockAccess();
416417
InitDeadLockChecking();
417418
}
418419

src/backend/storage/smgr/md.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ typedef struct _MdfdVec
115115
struct _MdfdVec *mdfd_chain; /* next segment, or NULL */
116116
} MdfdVec;
117117

118-
static MemoryContext MdCxt; /* context for all md.c allocations */
118+
static MemoryContext MdCxt; /* context for all MdfdVec objects */
119119

120120

121121
/*
@@ -157,6 +157,7 @@ typedef struct
157157

158158
static HTAB *pendingOpsTable = NULL;
159159
static List *pendingUnlinks = NIL;
160+
static MemoryContext pendingOpsCxt; /* context for the above */
160161

161162
static CycleCtr mdsync_cycle_ctr = 0;
162163
static CycleCtr mdckpt_cycle_ctr = 0;
@@ -209,11 +210,27 @@ mdinit(void)
209210
{
210211
HASHCTL hash_ctl;
211212

213+
/*
214+
* XXX: The checkpointer needs to add entries to the pending ops table
215+
* when absorbing fsync requests. That is done within a critical
216+
* section, which isn't usually allowed, but we make an exception.
217+
* It means that there's a theoretical possibility that you run out of
218+
* memory while absorbing fsync requests, which leads to a PANIC.
219+
* Fortunately the hash table is small so that's unlikely to happen in
220+
* practice.
221+
*/
222+
pendingOpsCxt = AllocSetContextCreate(MdCxt,
223+
"Pending Ops Context",
224+
ALLOCSET_DEFAULT_MINSIZE,
225+
ALLOCSET_DEFAULT_INITSIZE,
226+
ALLOCSET_DEFAULT_MAXSIZE);
227+
MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
228+
212229
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
213230
hash_ctl.keysize = sizeof(RelFileNode);
214231
hash_ctl.entrysize = sizeof(PendingOperationEntry);
215232
hash_ctl.hash = tag_hash;
216-
hash_ctl.hcxt = MdCxt;
233+
hash_ctl.hcxt = pendingOpsCxt;
217234
pendingOpsTable = hash_create("Pending Ops Table",
218235
100L,
219236
&hash_ctl,
@@ -1516,7 +1533,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
15161533
else if (segno == UNLINK_RELATION_REQUEST)
15171534
{
15181535
/* Unlink request: put it in the linked list */
1519-
MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
1536+
MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
15201537
PendingUnlinkEntry *entry;
15211538

15221539
/* PendingUnlinkEntry doesn't store forknum, since it's always MAIN */
@@ -1533,7 +1550,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
15331550
else
15341551
{
15351552
/* Normal case: enter a request to fsync this segment */
1536-
MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
1553+
MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
15371554
PendingOperationEntry *entry;
15381555
bool found;
15391556

src/backend/utils/mmgr/mcxt.c

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,9 @@ static void MemoryContextStatsInternal(MemoryContext context, int level);
6060
* You should not do memory allocations within a critical section, because
6161
* an out-of-memory error will be escalated to a PANIC. To enforce that
6262
* rule, the allocation functions Assert that.
63-
*
64-
* There are a two exceptions: 1) error recovery uses ErrorContext, which
65-
* has some memory set aside so that you don't run out. And 2) checkpointer
66-
* currently just hopes for the best, which is wrong and ought to be fixed,
67-
* but it's a known issue so let's not complain about in the meanwhile.
6863
*/
6964
#define AssertNotInCriticalSection(context) \
70-
Assert(CritSectionCount == 0 || (context) == ErrorContext || \
71-
AmCheckpointerProcess())
65+
Assert(CritSectionCount == 0 || (context)->allowInCritSection)
7266

7367
/*****************************************************************************
7468
* EXPORTED ROUTINES *
@@ -120,7 +114,10 @@ MemoryContextInit(void)
120114
* require it to contain at least 8K at all times. This is the only case
121115
* where retained memory in a context is *essential* --- we want to be
122116
* sure ErrorContext still has some memory even if we've run out
123-
* elsewhere!
117+
* elsewhere! Also, allow allocations in ErrorContext within a critical
118+
* section. Otherwise a PANIC will cause an assertion failure in the
119+
* error reporting code, before printing out the real cause of the
120+
* failure.
124121
*
125122
* This should be the last step in this function, as elog.c assumes memory
126123
* management works once ErrorContext is non-null.
@@ -130,6 +127,7 @@ MemoryContextInit(void)
130127
8 * 1024,
131128
8 * 1024,
132129
8 * 1024);
130+
MemoryContextAllowInCriticalSection(ErrorContext, true);
133131
}
134132

135133
/*
@@ -305,6 +303,26 @@ MemoryContextSetParent(MemoryContext context, MemoryContext new_parent)
305303
}
306304
}
307305

306+
/*
307+
* MemoryContextAllowInCriticalSection
308+
* Allow/disallow allocations in this memory context within a critical
309+
* section.
310+
*
311+
* Normally, memory allocations are not allowed within a critical section,
312+
* because a failure would lead to PANIC. There are a few exceptions to
313+
* that, like allocations related to debugging code that is not supposed to
314+
* be enabled in production. This function can be used to exempt specific
315+
* memory contexts from the assertion in palloc().
316+
*/
317+
void
318+
MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
319+
{
320+
AssertArg(MemoryContextIsValid(context));
321+
#ifdef USE_ASSERT_CHECKING
322+
context->allowInCritSection = allow;
323+
#endif
324+
}
325+
308326
/*
309327
* GetMemoryChunkSpace
310328
* Given a currently-allocated chunk, determine the total space
@@ -533,6 +551,7 @@ MemoryContextCreate(NodeTag tag, Size size,
533551
MemoryContext node;
534552
Size needed = size + strlen(name) + 1;
535553

554+
/* creating new memory contexts is not allowed in a critical section */
536555
Assert(CritSectionCount == 0);
537556

538557
/* Get space for node and name */
@@ -570,6 +589,11 @@ MemoryContextCreate(NodeTag tag, Size size,
570589
node->parent = parent;
571590
node->nextchild = parent->firstchild;
572591
parent->firstchild = node;
592+
593+
#ifdef USE_ASSERT_CHECKING
594+
/* inherit allowInCritSection flag from parent */
595+
node->allowInCritSection = parent->allowInCritSection;
596+
#endif
573597
}
574598

575599
VALGRIND_CREATE_MEMPOOL(node, 0, false);

src/include/nodes/memnodes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ typedef struct MemoryContextData
6060
MemoryContext nextchild; /* next child of same parent */
6161
char *name; /* context name (just for debugging) */
6262
bool isReset; /* T = no space alloced since last reset */
63+
#ifdef USE_ASSERT_CHECKING
64+
bool allowInCritSection; /* allow palloc in critical section */
65+
#endif
6366
} MemoryContextData;
6467

6568
/* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */

src/include/storage/lwlock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
182182

183183
extern Size LWLockShmemSize(void);
184184
extern void CreateLWLocks(void);
185+
extern void InitLWLockAccess(void);
185186

186187
/*
187188
* The traditional method for obtaining an lwlock for use by an extension is

0 commit comments

Comments
 (0)