@@ -6344,13 +6344,18 @@ heap_inplace_update_and_unlock(Relation relation,
6344
6344
HeapTupleHeader htup = oldtup -> t_data ;
6345
6345
uint32 oldlen ;
6346
6346
uint32 newlen ;
6347
+ char * dst ;
6348
+ char * src ;
6347
6349
6348
6350
Assert (ItemPointerEquals (& oldtup -> t_self , & tuple -> t_self ));
6349
6351
oldlen = oldtup -> t_len - htup -> t_hoff ;
6350
6352
newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
6351
6353
if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
6352
6354
elog (ERROR , "wrong tuple length" );
6353
6355
6356
+ dst = (char * ) htup + htup -> t_hoff ;
6357
+ src = (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ;
6358
+
6354
6359
/*
6355
6360
* Construct shared cache inval if necessary. Note that because we only
6356
6361
* pass the new version of the tuple, this mustn't be used for any
@@ -6369,15 +6374,15 @@ heap_inplace_update_and_unlock(Relation relation,
6369
6374
*/
6370
6375
PreInplace_Inval ();
6371
6376
6372
- /* NO EREPORT(ERROR) from here till changes are logged */
6373
- START_CRIT_SECTION ();
6374
-
6375
- memcpy ((char * ) htup + htup -> t_hoff ,
6376
- (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
6377
- newlen );
6378
-
6379
6377
/*----------
6380
- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6378
+ * NO EREPORT(ERROR) from here till changes are complete
6379
+ *
6380
+ * Our buffer lock won't stop a reader having already pinned and checked
6381
+ * visibility for this tuple. Hence, we write WAL first, then mutate the
6382
+ * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6383
+ * checkpoint delay makes that acceptable. With the usual order of
6384
+ * changes, a crash after memcpy() and before XLogInsert() could allow
6385
+ * datfrozenxid to overtake relfrozenxid:
6381
6386
*
6382
6387
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6383
6388
* ["R" is a VACUUM tbl]
@@ -6387,31 +6392,57 @@ heap_inplace_update_and_unlock(Relation relation,
6387
6392
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
6388
6393
* [crash]
6389
6394
* [recovery restores datfrozenxid w/o relfrozenxid]
6395
+ *
6396
+ * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6397
+ * the buffer to the stack before logging. Here, that facilitates a FPI
6398
+ * of the post-mutation block before we accept other sessions seeing it.
6390
6399
*/
6391
-
6392
- MarkBufferDirty (buffer );
6400
+ Assert ((MyProc -> delayChkptFlags & DELAY_CHKPT_START ) == 0 );
6401
+ START_CRIT_SECTION ();
6402
+ MyProc -> delayChkptFlags |= DELAY_CHKPT_START ;
6393
6403
6394
6404
/* XLOG stuff */
6395
6405
if (RelationNeedsWAL (relation ))
6396
6406
{
6397
6407
xl_heap_inplace xlrec ;
6408
+ PGAlignedBlock copied_buffer ;
6409
+ char * origdata = (char * ) BufferGetBlock (buffer );
6410
+ Page page = BufferGetPage (buffer );
6411
+ uint16 lower = ((PageHeader ) page )-> pd_lower ;
6412
+ uint16 upper = ((PageHeader ) page )-> pd_upper ;
6413
+ uintptr_t dst_offset_in_block ;
6414
+ RelFileLocator rlocator ;
6415
+ ForkNumber forkno ;
6416
+ BlockNumber blkno ;
6398
6417
XLogRecPtr recptr ;
6399
6418
6400
6419
xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
6401
6420
6402
6421
XLogBeginInsert ();
6403
6422
XLogRegisterData ((char * ) & xlrec , SizeOfHeapInplace );
6404
6423
6405
- XLogRegisterBuffer (0 , buffer , REGBUF_STANDARD );
6406
- XLogRegisterBufData (0 , (char * ) htup + htup -> t_hoff , newlen );
6424
+ /* register block matching what buffer will look like after changes */
6425
+ memcpy (copied_buffer .data , origdata , lower );
6426
+ memcpy (copied_buffer .data + upper , origdata + upper , BLCKSZ - upper );
6427
+ dst_offset_in_block = dst - origdata ;
6428
+ memcpy (copied_buffer .data + dst_offset_in_block , src , newlen );
6429
+ BufferGetTag (buffer , & rlocator , & forkno , & blkno );
6430
+ Assert (forkno == MAIN_FORKNUM );
6431
+ XLogRegisterBlock (0 , & rlocator , forkno , blkno , copied_buffer .data ,
6432
+ REGBUF_STANDARD );
6433
+ XLogRegisterBufData (0 , src , newlen );
6407
6434
6408
6435
/* inplace updates aren't decoded atm, don't log the origin */
6409
6436
6410
6437
recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
6411
6438
6412
- PageSetLSN (BufferGetPage ( buffer ) , recptr );
6439
+ PageSetLSN (page , recptr );
6413
6440
}
6414
6441
6442
+ memcpy (dst , src , newlen );
6443
+
6444
+ MarkBufferDirty (buffer );
6445
+
6415
6446
LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6416
6447
6417
6448
/*
@@ -6424,6 +6455,7 @@ heap_inplace_update_and_unlock(Relation relation,
6424
6455
*/
6425
6456
AtInplace_Inval ();
6426
6457
6458
+ MyProc -> delayChkptFlags &= ~DELAY_CHKPT_START ;
6427
6459
END_CRIT_SECTION ();
6428
6460
UnlockTuple (relation , & tuple -> t_self , InplaceUpdateTupleLock );
6429
6461
0 commit comments