Skip to content

Commit ee37fc9

Browse files
committed
PG-1604 fix: preallocate one more record for the cache
There is at lesat one corner case scenario where we have to load the last record into the cache during a write: * replica crashes, receives last segment from primary * replica replays last segment, reaches end * replica activtes new key * replica replays prepared transaction, has to use old keys again * old key write function sees that we generated a new key, tries to load it In this scenario we could get away by detecting that we are in a write, and asserting if we tried to use the last key. But in a release build assertions are not fired, and we would end up writing some non encrypted data to disk, and later if we have to run recovery failing. It could be a FATAL, but that would still crash the server, and the next startup would crash again and again... Instead, to properly avoid this situation we preallocate memory for one more key in the cache during initialization. Since we can only add one extra key to the cache during the servers run, this means we no longer try to allocate in the critical section in any corner case. While this is not the nicest solution, it is simple and keeps the current cache and decrypt/encrypt logic the same as before. Any other solution would be more complex, and even more of a hack, as it would require dealing with a possibly out of date cache.
1 parent 167aef2 commit ee37fc9

File tree

3 files changed

+42
-8
lines changed

3 files changed

+42
-8
lines changed

contrib/pg_tde/src/access/pg_tde_xlog_keys.c

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "common/pg_tde_utils.h"
1717
#include "encryption/enc_aes.h"
1818
#include "encryption/enc_tde.h"
19+
#include "utils/palloc.h"
1920

2021
#ifdef FRONTEND
2122
#include "pg_tde_fe.h"
@@ -43,7 +44,9 @@ typedef struct WalKeyFileEntry
4344
} WalKeyFileEntry;
4445

4546
static WALKeyCacheRec *tde_wal_key_cache = NULL;
47+
static WALKeyCacheRec *tde_wal_extra_alloc = NULL;
4648
static WALKeyCacheRec *tde_wal_key_last_rec = NULL;
49+
static WalEncryptionKey *tde_wal_extra_key = NULL;
4750

4851
static WALKeyCacheRec *pg_tde_add_wal_key_to_cache(WalEncryptionKey *cached_key);
4952
static WalEncryptionKey *pg_tde_decrypt_wal_key(const TDEPrincipalKey *principal_key, WalKeyFileEntry *entry);
@@ -326,6 +329,34 @@ pg_tde_fetch_wal_keys(WalLocation start)
326329
return return_wal_rec;
327330
}
328331

332+
/*
333+
* In special cases, we have to add one more record to the WAL key cache during write (in the critical section, when we can't allocate).
334+
* This method is a helper to deal with that: when adding a single key, we potentially allocate 2 records.
335+
* These variables help preallocate them, so in the critical section we can just use the already allocated objects, and update the cache with them.
336+
*
337+
* While this is somewhat a hack, it is also simple, safe, reliable, and way easier to implement than to refactor the cache or the decryption/encryption loop.
338+
*/
339+
void
340+
pg_tde_wal_cache_extra_palloc(void)
341+
{
342+
#ifndef FRONTEND
343+
MemoryContext oldCtx;
344+
345+
oldCtx = MemoryContextSwitchTo(TopMemoryContext);
346+
#endif
347+
if (tde_wal_extra_alloc == NULL)
348+
{
349+
tde_wal_extra_alloc = palloc0_object(WALKeyCacheRec);
350+
}
351+
if (tde_wal_extra_key == NULL)
352+
{
353+
tde_wal_extra_key = palloc0_object(WalEncryptionKey);
354+
}
355+
#ifndef FRONTEND
356+
MemoryContextSwitchTo(oldCtx);
357+
#endif
358+
}
359+
329360
static WALKeyCacheRec *
330361
pg_tde_add_wal_key_to_cache(WalEncryptionKey *key)
331362
{
@@ -335,7 +366,8 @@ pg_tde_add_wal_key_to_cache(WalEncryptionKey *key)
335366

336367
oldCtx = MemoryContextSwitchTo(TopMemoryContext);
337368
#endif
338-
wal_rec = palloc0_object(WALKeyCacheRec);
369+
wal_rec = tde_wal_extra_alloc == NULL ? palloc0_object(WALKeyCacheRec) : tde_wal_extra_alloc;
370+
tde_wal_extra_alloc = NULL;
339371
#ifndef FRONTEND
340372
MemoryContextSwitchTo(oldCtx);
341373
#endif
@@ -553,7 +585,9 @@ pg_tde_write_wal_key_file_entry(const WalEncryptionKey *rel_key_data,
553585
static WalEncryptionKey *
554586
pg_tde_decrypt_wal_key(const TDEPrincipalKey *principal_key, WalKeyFileEntry *entry)
555587
{
556-
WalEncryptionKey *key = palloc_object(WalEncryptionKey);
588+
WalEncryptionKey *key = tde_wal_extra_key == NULL ? palloc_object(WalEncryptionKey) : tde_wal_extra_key;
589+
590+
tde_wal_extra_key = NULL;
557591

558592
Assert(principal_key);
559593

contrib/pg_tde/src/access/pg_tde_xlog_smgr.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
248248

249249
/* cache is empty, prefetch keys from disk */
250250
pg_tde_fetch_wal_keys(start);
251+
pg_tde_wal_cache_extra_palloc();
251252
}
252253

253254
if (key)
@@ -284,8 +285,8 @@ TDEXLogWriteEncryptedPagesOldKeys(int fd, const void *buf, size_t count, off_t o
284285
memcpy(enc_buff, buf, count);
285286

286287
/*
287-
* This method potentially allocates, but only in very early execution
288-
* Shouldn't happen in a write, where we are in a critical section
288+
* This method potentially allocates, but only in very early execution Can
289+
* happen during a write, but we have one more cache entry preallocated.
289290
*/
290291
TDEXLogCryptBuffer(buf, enc_buff, count, offset, tli, segno, segSize);
291292

@@ -356,7 +357,7 @@ tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset,
356357
lastKeyUsable = (writeKeyLoc.lsn != 0);
357358
afterWriteKey = wal_location_cmp(writeKeyLoc, loc) <= 0;
358359

359-
if (EncryptionKey.type != WAL_KEY_TYPE_INVALID && !lastKeyUsable)
360+
if (EncryptionKey.type != WAL_KEY_TYPE_INVALID && !lastKeyUsable && afterWriteKey)
360361
{
361362
WALKeyCacheRec *last_key = pg_tde_get_last_wal_key();
362363

@@ -442,10 +443,8 @@ TDEXLogCryptBuffer(const void *buf, void *out_buf, size_t count, off_t offset,
442443
WALKeyCacheRec *last_key = pg_tde_get_last_wal_key();
443444
WalLocation write_loc = {.tli = TDEXLogGetEncKeyTli(),.lsn = write_key_lsn};
444445

445-
Assert(last_key);
446-
447446
/* write has generated a new key, need to fetch it */
448-
if (wal_location_cmp(last_key->start, write_loc) < 0)
447+
if (last_key != NULL && wal_location_cmp(last_key->start, write_loc) < 0)
449448
{
450449
pg_tde_fetch_wal_keys(write_loc);
451450

contrib/pg_tde/src/include/access/pg_tde_xlog_keys.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,5 +82,6 @@ extern WalEncryptionKey *pg_tde_read_last_wal_key(void);
8282
extern void pg_tde_save_server_key(const TDEPrincipalKey *principal_key, bool write_xlog);
8383
extern void pg_tde_save_server_key_redo(const TDESignedPrincipalKeyInfo *signed_key_info);
8484
extern void pg_tde_wal_last_key_set_location(WalLocation loc);
85+
extern void pg_tde_wal_cache_extra_palloc(void);
8586

8687
#endif /* PG_TDE_XLOG_KEYS_H */

0 commit comments

Comments
 (0)