@@ -6104,13 +6104,18 @@ heap_inplace_update_and_unlock(Relation relation,
6104
6104
HeapTupleHeader htup = oldtup -> t_data ;
6105
6105
uint32 oldlen ;
6106
6106
uint32 newlen ;
6107
+ char * dst ;
6108
+ char * src ;
6107
6109
6108
6110
Assert (ItemPointerEquals (& oldtup -> t_self , & tuple -> t_self ));
6109
6111
oldlen = oldtup -> t_len - htup -> t_hoff ;
6110
6112
newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
6111
6113
if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
6112
6114
elog (ERROR , "wrong tuple length" );
6113
6115
6116
+ dst = (char * ) htup + htup -> t_hoff ;
6117
+ src = (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ;
6118
+
6114
6119
/*
6115
6120
* Construct shared cache inval if necessary. Note that because we only
6116
6121
* pass the new version of the tuple, this mustn't be used for any
@@ -6129,15 +6134,15 @@ heap_inplace_update_and_unlock(Relation relation,
6129
6134
*/
6130
6135
PreInplace_Inval ();
6131
6136
6132
- /* NO EREPORT(ERROR) from here till changes are logged */
6133
- START_CRIT_SECTION ();
6134
-
6135
- memcpy ((char * ) htup + htup -> t_hoff ,
6136
- (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
6137
- newlen );
6138
-
6139
6137
/*----------
6140
- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6138
+ * NO EREPORT(ERROR) from here till changes are complete
6139
+ *
6140
+ * Our buffer lock won't stop a reader having already pinned and checked
6141
+ * visibility for this tuple. Hence, we write WAL first, then mutate the
6142
+ * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6143
+ * checkpoint delay makes that acceptable. With the usual order of
6144
+ * changes, a crash after memcpy() and before XLogInsert() could allow
6145
+ * datfrozenxid to overtake relfrozenxid:
6141
6146
*
6142
6147
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6143
6148
* ["R" is a VACUUM tbl]
@@ -6147,31 +6152,57 @@ heap_inplace_update_and_unlock(Relation relation,
6147
6152
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
6148
6153
* [crash]
6149
6154
* [recovery restores datfrozenxid w/o relfrozenxid]
6155
+ *
6156
+ * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6157
+ * the buffer to the stack before logging. Here, that facilitates a FPI
6158
+ * of the post-mutation block before we accept other sessions seeing it.
6150
6159
*/
6151
-
6152
- MarkBufferDirty (buffer );
6160
+ Assert (!MyProc -> delayChkpt );
6161
+ START_CRIT_SECTION ();
6162
+ MyProc -> delayChkpt = true;
6153
6163
6154
6164
/* XLOG stuff */
6155
6165
if (RelationNeedsWAL (relation ))
6156
6166
{
6157
6167
xl_heap_inplace xlrec ;
6168
+ PGAlignedBlock copied_buffer ;
6169
+ char * origdata = (char * ) BufferGetBlock (buffer );
6170
+ Page page = BufferGetPage (buffer );
6171
+ uint16 lower = ((PageHeader ) page )-> pd_lower ;
6172
+ uint16 upper = ((PageHeader ) page )-> pd_upper ;
6173
+ uintptr_t dst_offset_in_block ;
6174
+ RelFileNode rnode ;
6175
+ ForkNumber forkno ;
6176
+ BlockNumber blkno ;
6158
6177
XLogRecPtr recptr ;
6159
6178
6160
6179
xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
6161
6180
6162
6181
XLogBeginInsert ();
6163
6182
XLogRegisterData ((char * ) & xlrec , SizeOfHeapInplace );
6164
6183
6165
- XLogRegisterBuffer (0 , buffer , REGBUF_STANDARD );
6166
- XLogRegisterBufData (0 , (char * ) htup + htup -> t_hoff , newlen );
6184
+ /* register block matching what buffer will look like after changes */
6185
+ memcpy (copied_buffer .data , origdata , lower );
6186
+ memcpy (copied_buffer .data + upper , origdata + upper , BLCKSZ - upper );
6187
+ dst_offset_in_block = dst - origdata ;
6188
+ memcpy (copied_buffer .data + dst_offset_in_block , src , newlen );
6189
+ BufferGetTag (buffer , & rnode , & forkno , & blkno );
6190
+ Assert (forkno == MAIN_FORKNUM );
6191
+ XLogRegisterBlock (0 , & rnode , forkno , blkno , copied_buffer .data ,
6192
+ REGBUF_STANDARD );
6193
+ XLogRegisterBufData (0 , src , newlen );
6167
6194
6168
6195
/* inplace updates aren't decoded atm, don't log the origin */
6169
6196
6170
6197
recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
6171
6198
6172
- PageSetLSN (BufferGetPage ( buffer ) , recptr );
6199
+ PageSetLSN (page , recptr );
6173
6200
}
6174
6201
6202
+ memcpy (dst , src , newlen );
6203
+
6204
+ MarkBufferDirty (buffer );
6205
+
6175
6206
LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6176
6207
6177
6208
/*
@@ -6184,6 +6215,7 @@ heap_inplace_update_and_unlock(Relation relation,
6184
6215
*/
6185
6216
AtInplace_Inval ();
6186
6217
6218
+ MyProc -> delayChkpt = false;
6187
6219
END_CRIT_SECTION ();
6188
6220
UnlockTuple (relation , & tuple -> t_self , InplaceUpdateTupleLock );
6189
6221
0 commit comments