@@ -6376,13 +6376,18 @@ heap_inplace_update_and_unlock(Relation relation,
6376
6376
HeapTupleHeader htup = oldtup -> t_data ;
6377
6377
uint32 oldlen ;
6378
6378
uint32 newlen ;
6379
+ char * dst ;
6380
+ char * src ;
6379
6381
6380
6382
Assert (ItemPointerEquals (& oldtup -> t_self , & tuple -> t_self ));
6381
6383
oldlen = oldtup -> t_len - htup -> t_hoff ;
6382
6384
newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
6383
6385
if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
6384
6386
elog (ERROR , "wrong tuple length" );
6385
6387
6388
+ dst = (char * ) htup + htup -> t_hoff ;
6389
+ src = (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ;
6390
+
6386
6391
/*
6387
6392
* Construct shared cache inval if necessary. Note that because we only
6388
6393
* pass the new version of the tuple, this mustn't be used for any
@@ -6401,15 +6406,15 @@ heap_inplace_update_and_unlock(Relation relation,
6401
6406
*/
6402
6407
PreInplace_Inval ();
6403
6408
6404
- /* NO EREPORT(ERROR) from here till changes are logged */
6405
- START_CRIT_SECTION ();
6406
-
6407
- memcpy ((char * ) htup + htup -> t_hoff ,
6408
- (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
6409
- newlen );
6410
-
6411
6409
/*----------
6412
- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6410
+ * NO EREPORT(ERROR) from here till changes are complete
6411
+ *
6412
+ * Our buffer lock won't stop a reader having already pinned and checked
6413
+ * visibility for this tuple. Hence, we write WAL first, then mutate the
6414
+ * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6415
+ * checkpoint delay makes that acceptable. With the usual order of
6416
+ * changes, a crash after memcpy() and before XLogInsert() could allow
6417
+ * datfrozenxid to overtake relfrozenxid:
6413
6418
*
6414
6419
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6415
6420
* ["R" is a VACUUM tbl]
@@ -6419,31 +6424,57 @@ heap_inplace_update_and_unlock(Relation relation,
6419
6424
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
6420
6425
* [crash]
6421
6426
* [recovery restores datfrozenxid w/o relfrozenxid]
6427
+ *
6428
+ * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6429
+ * the buffer to the stack before logging. Here, that facilitates a FPI
6430
+ * of the post-mutation block before we accept other sessions seeing it.
6422
6431
*/
6423
-
6424
- MarkBufferDirty (buffer );
6432
+ Assert (!MyProc -> delayChkpt );
6433
+ START_CRIT_SECTION ();
6434
+ MyProc -> delayChkpt = true;
6425
6435
6426
6436
/* XLOG stuff */
6427
6437
if (RelationNeedsWAL (relation ))
6428
6438
{
6429
6439
xl_heap_inplace xlrec ;
6440
+ PGAlignedBlock copied_buffer ;
6441
+ char * origdata = (char * ) BufferGetBlock (buffer );
6442
+ Page page = BufferGetPage (buffer );
6443
+ uint16 lower = ((PageHeader ) page )-> pd_lower ;
6444
+ uint16 upper = ((PageHeader ) page )-> pd_upper ;
6445
+ uintptr_t dst_offset_in_block ;
6446
+ RelFileNode rnode ;
6447
+ ForkNumber forkno ;
6448
+ BlockNumber blkno ;
6430
6449
XLogRecPtr recptr ;
6431
6450
6432
6451
xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
6433
6452
6434
6453
XLogBeginInsert ();
6435
6454
XLogRegisterData ((char * ) & xlrec , SizeOfHeapInplace );
6436
6455
6437
- XLogRegisterBuffer (0 , buffer , REGBUF_STANDARD );
6438
- XLogRegisterBufData (0 , (char * ) htup + htup -> t_hoff , newlen );
6456
+ /* register block matching what buffer will look like after changes */
6457
+ memcpy (copied_buffer .data , origdata , lower );
6458
+ memcpy (copied_buffer .data + upper , origdata + upper , BLCKSZ - upper );
6459
+ dst_offset_in_block = dst - origdata ;
6460
+ memcpy (copied_buffer .data + dst_offset_in_block , src , newlen );
6461
+ BufferGetTag (buffer , & rnode , & forkno , & blkno );
6462
+ Assert (forkno == MAIN_FORKNUM );
6463
+ XLogRegisterBlock (0 , & rnode , forkno , blkno , copied_buffer .data ,
6464
+ REGBUF_STANDARD );
6465
+ XLogRegisterBufData (0 , src , newlen );
6439
6466
6440
6467
/* inplace updates aren't decoded atm, don't log the origin */
6441
6468
6442
6469
recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
6443
6470
6444
- PageSetLSN (BufferGetPage ( buffer ) , recptr );
6471
+ PageSetLSN (page , recptr );
6445
6472
}
6446
6473
6474
+ memcpy (dst , src , newlen );
6475
+
6476
+ MarkBufferDirty (buffer );
6477
+
6447
6478
LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6448
6479
6449
6480
/*
@@ -6456,6 +6487,7 @@ heap_inplace_update_and_unlock(Relation relation,
6456
6487
*/
6457
6488
AtInplace_Inval ();
6458
6489
6490
+ MyProc -> delayChkpt = false;
6459
6491
END_CRIT_SECTION ();
6460
6492
UnlockTuple (relation , & tuple -> t_self , InplaceUpdateTupleLock );
6461
6493
0 commit comments