@@ -5991,23 +5991,245 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
5991
5991
}
5992
5992
5993
5993
/*
5994
- * heap_inplace_update - update a tuple "in place" (ie, overwrite it)
5995
- *
5996
- * Overwriting violates both MVCC and transactional safety, so the uses
5997
- * of this function in Postgres are extremely limited. Nonetheless we
5998
- * find some places to use it.
5999
- *
6000
- * The tuple cannot change size, and therefore it's reasonable to assume
6001
- * that its null bitmap (if any) doesn't change either. So we just
6002
- * overwrite the data portion of the tuple without touching the null
6003
- * bitmap or any of the header fields.
5994
+ * heap_inplace_lock - protect inplace update from concurrent heap_update()
5995
+ *
5996
+ * Evaluate whether the tuple's state is compatible with a no-key update.
5997
+ * Current transaction rowmarks are fine, as is KEY SHARE from any
5998
+ * transaction. If compatible, return true with the buffer exclusive-locked,
5999
+ * and the caller must release that by calling
6000
+ * heap_inplace_update_and_unlock(), calling heap_inplace_unlock(), or raising
6001
+ * an error. Otherwise, return false after blocking transactions, if any,
6002
+ * have ended.
6003
+ *
6004
+ * Since this is intended for system catalogs and SERIALIZABLE doesn't cover
6005
+ * DDL, this doesn't guarantee any particular predicate locking.
6006
+ *
6007
+ * One could modify this to return true for tuples with delete in progress,
6008
+ * All inplace updaters take a lock that conflicts with DROP. If explicit
6009
+ * "DELETE FROM pg_class" is in progress, we'll wait for it like we would an
6010
+ * update.
6011
+ *
6012
+ * Readers of inplace-updated fields expect changes to those fields are
6013
+ * durable. For example, vac_truncate_clog() reads datfrozenxid from
6014
+ * pg_database tuples via catalog snapshots. A future snapshot must not
6015
+ * return a lower datfrozenxid for the same database OID (lower in the
6016
+ * FullTransactionIdPrecedes() sense). We achieve that since no update of a
6017
+ * tuple can start while we hold a lock on its buffer. In cases like
6018
+ * BEGIN;GRANT;CREATE INDEX;COMMIT we're inplace-updating a tuple visible only
6019
+ * to this transaction. ROLLBACK then is one case where it's okay to lose
6020
+ * inplace updates. (Restoring relhasindex=false on ROLLBACK is fine, since
6021
+ * any concurrent CREATE INDEX would have blocked, then inplace-updated the
6022
+ * committed tuple.)
6023
+ *
6024
+ * In principle, we could avoid waiting by overwriting every tuple in the
6025
+ * updated tuple chain. Reader expectations permit updating a tuple only if
6026
+ * it's aborted, is the tail of the chain, or we already updated the tuple
6027
+ * referenced in its t_ctid. Hence, we would need to overwrite the tuples in
6028
+ * order from tail to head. That would imply either (a) mutating all tuples
6029
+ * in one critical section or (b) accepting a chance of partial completion.
6030
+ * Partial completion of a relfrozenxid update would have the weird
6031
+ * consequence that the table's next VACUUM could see the table's relfrozenxid
6032
+ * move forward between vacuum_get_cutoffs() and finishing.
6033
+ */
6034
+ bool
6035
+ heap_inplace_lock (Relation relation ,
6036
+ HeapTuple oldtup_ptr , Buffer buffer )
6037
+ {
6038
+ HeapTupleData oldtup = * oldtup_ptr ; /* minimize diff vs. heap_update() */
6039
+ TM_Result result ;
6040
+ bool ret ;
6041
+
6042
+ Assert (BufferIsValid (buffer ));
6043
+
6044
+ LockBuffer (buffer , BUFFER_LOCK_EXCLUSIVE );
6045
+
6046
+ /*----------
6047
+ * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6048
+ *
6049
+ * - wait unconditionally
6050
+ * - no tuple locks
6051
+ * - don't recheck header after wait: simpler to defer to next iteration
6052
+ * - don't try to continue even if the updater aborts: likewise
6053
+ * - no crosscheck
6054
+ */
6055
+ result = HeapTupleSatisfiesUpdate (& oldtup , GetCurrentCommandId (false),
6056
+ buffer );
6057
+
6058
+ if (result == TM_Invisible )
6059
+ {
6060
+ /* no known way this can happen */
6061
+ ereport (ERROR ,
6062
+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
6063
+ errmsg_internal ("attempted to overwrite invisible tuple" )));
6064
+ }
6065
+ else if (result == TM_SelfModified )
6066
+ {
6067
+ /*
6068
+ * CREATE INDEX might reach this if an expression is silly enough to
6069
+ * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6070
+ * statements might get here after a heap_update() of the same row, in
6071
+ * the absence of an intervening CommandCounterIncrement().
6072
+ */
6073
+ ereport (ERROR ,
6074
+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
6075
+ errmsg ("tuple to be updated was already modified by an operation triggered by the current command" )));
6076
+ }
6077
+ else if (result == TM_BeingModified )
6078
+ {
6079
+ TransactionId xwait ;
6080
+ uint16 infomask ;
6081
+
6082
+ xwait = HeapTupleHeaderGetRawXmax (oldtup .t_data );
6083
+ infomask = oldtup .t_data -> t_infomask ;
6084
+
6085
+ if (infomask & HEAP_XMAX_IS_MULTI )
6086
+ {
6087
+ LockTupleMode lockmode = LockTupleNoKeyExclusive ;
6088
+ MultiXactStatus mxact_status = MultiXactStatusNoKeyUpdate ;
6089
+ int remain ;
6090
+ bool current_is_member ;
6091
+
6092
+ if (DoesMultiXactIdConflict ((MultiXactId ) xwait , infomask ,
6093
+ lockmode , & current_is_member ))
6094
+ {
6095
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6096
+ ret = false;
6097
+ MultiXactIdWait ((MultiXactId ) xwait , mxact_status , infomask ,
6098
+ relation , & oldtup .t_self , XLTW_Update ,
6099
+ & remain );
6100
+ }
6101
+ else
6102
+ ret = true;
6103
+ }
6104
+ else if (TransactionIdIsCurrentTransactionId (xwait ))
6105
+ ret = true;
6106
+ else if (HEAP_XMAX_IS_KEYSHR_LOCKED (infomask ))
6107
+ ret = true;
6108
+ else
6109
+ {
6110
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6111
+ ret = false;
6112
+ XactLockTableWait (xwait , relation , & oldtup .t_self ,
6113
+ XLTW_Update );
6114
+ }
6115
+ }
6116
+ else
6117
+ {
6118
+ ret = (result == TM_Ok );
6119
+ if (!ret )
6120
+ {
6121
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6122
+ }
6123
+ }
6124
+
6125
+ /*
6126
+ * GetCatalogSnapshot() relies on invalidation messages to know when to
6127
+ * take a new snapshot. COMMIT of xwait is responsible for sending the
6128
+ * invalidation. We're not acquiring heavyweight locks sufficient to
6129
+ * block if not yet sent, so we must take a new snapshot to ensure a later
6130
+ * attempt has a fair chance. While we don't need this if xwait aborted,
6131
+ * don't bother optimizing that.
6132
+ */
6133
+ if (!ret )
6134
+ InvalidateCatalogSnapshot ();
6135
+ return ret ;
6136
+ }
6137
+
6138
+ /*
6139
+ * heap_inplace_update_and_unlock - core of systable_inplace_update_finish
6004
6140
*
6005
- * tuple is an in-memory tuple structure containing the data to be written
6006
- * over the target tuple. Also, tuple->t_self identifies the target tuple.
6141
+ * The tuple cannot change size, and therefore its header fields and null
6142
+ * bitmap (if any) don't change either.
6143
+ */
6144
+ void
6145
+ heap_inplace_update_and_unlock (Relation relation ,
6146
+ HeapTuple oldtup , HeapTuple tuple ,
6147
+ Buffer buffer )
6148
+ {
6149
+ HeapTupleHeader htup = oldtup -> t_data ;
6150
+ uint32 oldlen ;
6151
+ uint32 newlen ;
6152
+
6153
+ Assert (ItemPointerEquals (& oldtup -> t_self , & tuple -> t_self ));
6154
+ oldlen = oldtup -> t_len - htup -> t_hoff ;
6155
+ newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
6156
+ if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
6157
+ elog (ERROR , "wrong tuple length" );
6158
+
6159
+ /* NO EREPORT(ERROR) from here till changes are logged */
6160
+ START_CRIT_SECTION ();
6161
+
6162
+ memcpy ((char * ) htup + htup -> t_hoff ,
6163
+ (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
6164
+ newlen );
6165
+
6166
+ /*----------
6167
+ * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6168
+ *
6169
+ * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6170
+ * ["R" is a VACUUM tbl]
6171
+ * D: vac_update_datfrozenid() -> systable_beginscan(pg_class)
6172
+ * D: systable_getnext() returns pg_class tuple of tbl
6173
+ * R: memcpy() into pg_class tuple of tbl
6174
+ * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6175
+ * [crash]
6176
+ * [recovery restores datfrozenxid w/o relfrozenxid]
6177
+ */
6178
+
6179
+ MarkBufferDirty (buffer );
6180
+
6181
+ /* XLOG stuff */
6182
+ if (RelationNeedsWAL (relation ))
6183
+ {
6184
+ xl_heap_inplace xlrec ;
6185
+ XLogRecPtr recptr ;
6186
+
6187
+ xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
6188
+
6189
+ XLogBeginInsert ();
6190
+ XLogRegisterData ((char * ) & xlrec , SizeOfHeapInplace );
6191
+
6192
+ XLogRegisterBuffer (0 , buffer , REGBUF_STANDARD );
6193
+ XLogRegisterBufData (0 , (char * ) htup + htup -> t_hoff , newlen );
6194
+
6195
+ /* inplace updates aren't decoded atm, don't log the origin */
6196
+
6197
+ recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
6198
+
6199
+ PageSetLSN (BufferGetPage (buffer ), recptr );
6200
+ }
6201
+
6202
+ END_CRIT_SECTION ();
6203
+
6204
+ heap_inplace_unlock (relation , oldtup , buffer );
6205
+
6206
+ /*
6207
+ * Send out shared cache inval if necessary. Note that because we only
6208
+ * pass the new version of the tuple, this mustn't be used for any
6209
+ * operations that could change catcache lookup keys. But we aren't
6210
+ * bothering with index updates either, so that's true a fortiori.
6211
+ *
6212
+ * XXX ROLLBACK discards the invalidation. See test inplace-inval.spec.
6213
+ */
6214
+ if (!IsBootstrapProcessingMode ())
6215
+ CacheInvalidateHeapTuple (relation , tuple , NULL );
6216
+ }
6217
+
6218
+ /*
6219
+ * heap_inplace_unlock - reverse of heap_inplace_lock
6220
+ */
6221
+ void
6222
+ heap_inplace_unlock (Relation relation ,
6223
+ HeapTuple oldtup , Buffer buffer )
6224
+ {
6225
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6226
+ }
6227
+
6228
+ /*
6229
+ * heap_inplace_update - deprecated
6007
6230
*
6008
- * Note that the tuple updated here had better not come directly from the
6009
- * syscache if the relation has a toast relation as this tuple could
6010
- * include toast values that have been expanded, causing a failure here.
6231
+ * This exists only to keep modules working in back branches. Affected
6232
+ * modules should migrate to systable_inplace_update_begin().
6011
6233
*/
6012
6234
void
6013
6235
heap_inplace_update (Relation relation , HeapTuple tuple )
0 commit comments