@@ -6072,23 +6072,245 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
6072
6072
}
6073
6073
6074
6074
/*
6075
- * heap_inplace_update - update a tuple "in place" (ie, overwrite it)
6076
- *
6077
- * Overwriting violates both MVCC and transactional safety, so the uses
6078
- * of this function in Postgres are extremely limited. Nonetheless we
6079
- * find some places to use it.
6080
- *
6081
- * The tuple cannot change size, and therefore it's reasonable to assume
6082
- * that its null bitmap (if any) doesn't change either. So we just
6083
- * overwrite the data portion of the tuple without touching the null
6084
- * bitmap or any of the header fields.
6075
+ * heap_inplace_lock - protect inplace update from concurrent heap_update()
6076
+ *
6077
+ * Evaluate whether the tuple's state is compatible with a no-key update.
6078
+ * Current transaction rowmarks are fine, as is KEY SHARE from any
6079
+ * transaction. If compatible, return true with the buffer exclusive-locked,
6080
+ * and the caller must release that by calling
6081
+ * heap_inplace_update_and_unlock(), calling heap_inplace_unlock(), or raising
6082
+ * an error. Otherwise, return false after blocking transactions, if any,
6083
+ * have ended.
6084
+ *
6085
+ * Since this is intended for system catalogs and SERIALIZABLE doesn't cover
6086
+ * DDL, this doesn't guarantee any particular predicate locking.
6087
+ *
6088
+ * One could modify this to return true for tuples with delete in progress,
6089
+ * All inplace updaters take a lock that conflicts with DROP. If explicit
6090
+ * "DELETE FROM pg_class" is in progress, we'll wait for it like we would an
6091
+ * update.
6092
+ *
6093
+ * Readers of inplace-updated fields expect changes to those fields are
6094
+ * durable. For example, vac_truncate_clog() reads datfrozenxid from
6095
+ * pg_database tuples via catalog snapshots. A future snapshot must not
6096
+ * return a lower datfrozenxid for the same database OID (lower in the
6097
+ * FullTransactionIdPrecedes() sense). We achieve that since no update of a
6098
+ * tuple can start while we hold a lock on its buffer. In cases like
6099
+ * BEGIN;GRANT;CREATE INDEX;COMMIT we're inplace-updating a tuple visible only
6100
+ * to this transaction. ROLLBACK then is one case where it's okay to lose
6101
+ * inplace updates. (Restoring relhasindex=false on ROLLBACK is fine, since
6102
+ * any concurrent CREATE INDEX would have blocked, then inplace-updated the
6103
+ * committed tuple.)
6104
+ *
6105
+ * In principle, we could avoid waiting by overwriting every tuple in the
6106
+ * updated tuple chain. Reader expectations permit updating a tuple only if
6107
+ * it's aborted, is the tail of the chain, or we already updated the tuple
6108
+ * referenced in its t_ctid. Hence, we would need to overwrite the tuples in
6109
+ * order from tail to head. That would imply either (a) mutating all tuples
6110
+ * in one critical section or (b) accepting a chance of partial completion.
6111
+ * Partial completion of a relfrozenxid update would have the weird
6112
+ * consequence that the table's next VACUUM could see the table's relfrozenxid
6113
+ * move forward between vacuum_get_cutoffs() and finishing.
6114
+ */
6115
+ bool
6116
+ heap_inplace_lock (Relation relation ,
6117
+ HeapTuple oldtup_ptr , Buffer buffer )
6118
+ {
6119
+ HeapTupleData oldtup = * oldtup_ptr ; /* minimize diff vs. heap_update() */
6120
+ TM_Result result ;
6121
+ bool ret ;
6122
+
6123
+ Assert (BufferIsValid (buffer ));
6124
+
6125
+ LockBuffer (buffer , BUFFER_LOCK_EXCLUSIVE );
6126
+
6127
+ /*----------
6128
+ * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6129
+ *
6130
+ * - wait unconditionally
6131
+ * - no tuple locks
6132
+ * - don't recheck header after wait: simpler to defer to next iteration
6133
+ * - don't try to continue even if the updater aborts: likewise
6134
+ * - no crosscheck
6135
+ */
6136
+ result = HeapTupleSatisfiesUpdate (& oldtup , GetCurrentCommandId (false),
6137
+ buffer );
6138
+
6139
+ if (result == TM_Invisible )
6140
+ {
6141
+ /* no known way this can happen */
6142
+ ereport (ERROR ,
6143
+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
6144
+ errmsg_internal ("attempted to overwrite invisible tuple" )));
6145
+ }
6146
+ else if (result == TM_SelfModified )
6147
+ {
6148
+ /*
6149
+ * CREATE INDEX might reach this if an expression is silly enough to
6150
+ * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6151
+ * statements might get here after a heap_update() of the same row, in
6152
+ * the absence of an intervening CommandCounterIncrement().
6153
+ */
6154
+ ereport (ERROR ,
6155
+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
6156
+ errmsg ("tuple to be updated was already modified by an operation triggered by the current command" )));
6157
+ }
6158
+ else if (result == TM_BeingModified )
6159
+ {
6160
+ TransactionId xwait ;
6161
+ uint16 infomask ;
6162
+
6163
+ xwait = HeapTupleHeaderGetRawXmax (oldtup .t_data );
6164
+ infomask = oldtup .t_data -> t_infomask ;
6165
+
6166
+ if (infomask & HEAP_XMAX_IS_MULTI )
6167
+ {
6168
+ LockTupleMode lockmode = LockTupleNoKeyExclusive ;
6169
+ MultiXactStatus mxact_status = MultiXactStatusNoKeyUpdate ;
6170
+ int remain ;
6171
+ bool current_is_member ;
6172
+
6173
+ if (DoesMultiXactIdConflict ((MultiXactId ) xwait , infomask ,
6174
+ lockmode , & current_is_member ))
6175
+ {
6176
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6177
+ ret = false;
6178
+ MultiXactIdWait ((MultiXactId ) xwait , mxact_status , infomask ,
6179
+ relation , & oldtup .t_self , XLTW_Update ,
6180
+ & remain );
6181
+ }
6182
+ else
6183
+ ret = true;
6184
+ }
6185
+ else if (TransactionIdIsCurrentTransactionId (xwait ))
6186
+ ret = true;
6187
+ else if (HEAP_XMAX_IS_KEYSHR_LOCKED (infomask ))
6188
+ ret = true;
6189
+ else
6190
+ {
6191
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6192
+ ret = false;
6193
+ XactLockTableWait (xwait , relation , & oldtup .t_self ,
6194
+ XLTW_Update );
6195
+ }
6196
+ }
6197
+ else
6198
+ {
6199
+ ret = (result == TM_Ok );
6200
+ if (!ret )
6201
+ {
6202
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6203
+ }
6204
+ }
6205
+
6206
+ /*
6207
+ * GetCatalogSnapshot() relies on invalidation messages to know when to
6208
+ * take a new snapshot. COMMIT of xwait is responsible for sending the
6209
+ * invalidation. We're not acquiring heavyweight locks sufficient to
6210
+ * block if not yet sent, so we must take a new snapshot to ensure a later
6211
+ * attempt has a fair chance. While we don't need this if xwait aborted,
6212
+ * don't bother optimizing that.
6213
+ */
6214
+ if (!ret )
6215
+ InvalidateCatalogSnapshot ();
6216
+ return ret ;
6217
+ }
6218
+
6219
+ /*
6220
+ * heap_inplace_update_and_unlock - core of systable_inplace_update_finish
6085
6221
*
6086
- * tuple is an in-memory tuple structure containing the data to be written
6087
- * over the target tuple. Also, tuple->t_self identifies the target tuple.
6222
+ * The tuple cannot change size, and therefore its header fields and null
6223
+ * bitmap (if any) don't change either.
6224
+ */
6225
+ void
6226
+ heap_inplace_update_and_unlock (Relation relation ,
6227
+ HeapTuple oldtup , HeapTuple tuple ,
6228
+ Buffer buffer )
6229
+ {
6230
+ HeapTupleHeader htup = oldtup -> t_data ;
6231
+ uint32 oldlen ;
6232
+ uint32 newlen ;
6233
+
6234
+ Assert (ItemPointerEquals (& oldtup -> t_self , & tuple -> t_self ));
6235
+ oldlen = oldtup -> t_len - htup -> t_hoff ;
6236
+ newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
6237
+ if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
6238
+ elog (ERROR , "wrong tuple length" );
6239
+
6240
+ /* NO EREPORT(ERROR) from here till changes are logged */
6241
+ START_CRIT_SECTION ();
6242
+
6243
+ memcpy ((char * ) htup + htup -> t_hoff ,
6244
+ (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
6245
+ newlen );
6246
+
6247
+ /*----------
6248
+ * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6249
+ *
6250
+ * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6251
+ * ["R" is a VACUUM tbl]
6252
+ * D: vac_update_datfrozenid() -> systable_beginscan(pg_class)
6253
+ * D: systable_getnext() returns pg_class tuple of tbl
6254
+ * R: memcpy() into pg_class tuple of tbl
6255
+ * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6256
+ * [crash]
6257
+ * [recovery restores datfrozenxid w/o relfrozenxid]
6258
+ */
6259
+
6260
+ MarkBufferDirty (buffer );
6261
+
6262
+ /* XLOG stuff */
6263
+ if (RelationNeedsWAL (relation ))
6264
+ {
6265
+ xl_heap_inplace xlrec ;
6266
+ XLogRecPtr recptr ;
6267
+
6268
+ xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
6269
+
6270
+ XLogBeginInsert ();
6271
+ XLogRegisterData ((char * ) & xlrec , SizeOfHeapInplace );
6272
+
6273
+ XLogRegisterBuffer (0 , buffer , REGBUF_STANDARD );
6274
+ XLogRegisterBufData (0 , (char * ) htup + htup -> t_hoff , newlen );
6275
+
6276
+ /* inplace updates aren't decoded atm, don't log the origin */
6277
+
6278
+ recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
6279
+
6280
+ PageSetLSN (BufferGetPage (buffer ), recptr );
6281
+ }
6282
+
6283
+ END_CRIT_SECTION ();
6284
+
6285
+ heap_inplace_unlock (relation , oldtup , buffer );
6286
+
6287
+ /*
6288
+ * Send out shared cache inval if necessary. Note that because we only
6289
+ * pass the new version of the tuple, this mustn't be used for any
6290
+ * operations that could change catcache lookup keys. But we aren't
6291
+ * bothering with index updates either, so that's true a fortiori.
6292
+ *
6293
+ * XXX ROLLBACK discards the invalidation. See test inplace-inval.spec.
6294
+ */
6295
+ if (!IsBootstrapProcessingMode ())
6296
+ CacheInvalidateHeapTuple (relation , tuple , NULL );
6297
+ }
6298
+
6299
+ /*
6300
+ * heap_inplace_unlock - reverse of heap_inplace_lock
6301
+ */
6302
+ void
6303
+ heap_inplace_unlock (Relation relation ,
6304
+ HeapTuple oldtup , Buffer buffer )
6305
+ {
6306
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6307
+ }
6308
+
6309
+ /*
6310
+ * heap_inplace_update - deprecated
6088
6311
*
6089
- * Note that the tuple updated here had better not come directly from the
6090
- * syscache if the relation has a toast relation as this tuple could
6091
- * include toast values that have been expanded, causing a failure here.
6312
+ * This exists only to keep modules working in back branches. Affected
6313
+ * modules should migrate to systable_inplace_update_begin().
6092
6314
*/
6093
6315
void
6094
6316
heap_inplace_update (Relation relation , HeapTuple tuple )
0 commit comments