47
47
#include "storage/predicate.h"
48
48
#include "storage/procarray.h"
49
49
#include "utils/datum.h"
50
- #include "utils/injection_point.h"
51
50
#include "utils/inval.h"
52
51
#include "utils/spccache.h"
53
52
@@ -6023,61 +6022,167 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
6023
6022
}
6024
6023
6025
6024
/*
6026
- * heap_inplace_update - update a tuple "in place" (ie, overwrite it)
6027
- *
6028
- * Overwriting violates both MVCC and transactional safety, so the uses
6029
- * of this function in Postgres are extremely limited. Nonetheless we
6030
- * find some places to use it.
6031
- *
6032
- * The tuple cannot change size, and therefore it's reasonable to assume
6033
- * that its null bitmap (if any) doesn't change either. So we just
6034
- * overwrite the data portion of the tuple without touching the null
6035
- * bitmap or any of the header fields.
6036
- *
6037
- * tuple is an in-memory tuple structure containing the data to be written
6038
- * over the target tuple. Also, tuple->t_self identifies the target tuple.
6039
- *
6040
- * Note that the tuple updated here had better not come directly from the
6041
- * syscache if the relation has a toast relation as this tuple could
6042
- * include toast values that have been expanded, causing a failure here.
6025
+ * heap_inplace_lock - protect inplace update from concurrent heap_update()
6026
+ *
6027
+ * Evaluate whether the tuple's state is compatible with a no-key update.
6028
+ * Current transaction rowmarks are fine, as is KEY SHARE from any
6029
+ * transaction. If compatible, return true with the buffer exclusive-locked,
6030
+ * and the caller must release that by calling
6031
+ * heap_inplace_update_and_unlock(), calling heap_inplace_unlock(), or raising
6032
+ * an error. Otherwise, return false after blocking transactions, if any,
6033
+ * have ended.
6034
+ *
6035
+ * Since this is intended for system catalogs and SERIALIZABLE doesn't cover
6036
+ * DDL, this doesn't guarantee any particular predicate locking.
6037
+ *
6038
+ * One could modify this to return true for tuples with delete in progress,
6039
+ * All inplace updaters take a lock that conflicts with DROP. If explicit
6040
+ * "DELETE FROM pg_class" is in progress, we'll wait for it like we would an
6041
+ * update.
6042
+ *
6043
+ * Readers of inplace-updated fields expect changes to those fields are
6044
+ * durable. For example, vac_truncate_clog() reads datfrozenxid from
6045
+ * pg_database tuples via catalog snapshots. A future snapshot must not
6046
+ * return a lower datfrozenxid for the same database OID (lower in the
6047
+ * FullTransactionIdPrecedes() sense). We achieve that since no update of a
6048
+ * tuple can start while we hold a lock on its buffer. In cases like
6049
+ * BEGIN;GRANT;CREATE INDEX;COMMIT we're inplace-updating a tuple visible only
6050
+ * to this transaction. ROLLBACK then is one case where it's okay to lose
6051
+ * inplace updates. (Restoring relhasindex=false on ROLLBACK is fine, since
6052
+ * any concurrent CREATE INDEX would have blocked, then inplace-updated the
6053
+ * committed tuple.)
6054
+ *
6055
+ * In principle, we could avoid waiting by overwriting every tuple in the
6056
+ * updated tuple chain. Reader expectations permit updating a tuple only if
6057
+ * it's aborted, is the tail of the chain, or we already updated the tuple
6058
+ * referenced in its t_ctid. Hence, we would need to overwrite the tuples in
6059
+ * order from tail to head. That would imply either (a) mutating all tuples
6060
+ * in one critical section or (b) accepting a chance of partial completion.
6061
+ * Partial completion of a relfrozenxid update would have the weird
6062
+ * consequence that the table's next VACUUM could see the table's relfrozenxid
6063
+ * move forward between vacuum_get_cutoffs() and finishing.
6043
6064
*/
6044
- void
6045
- heap_inplace_update (Relation relation , HeapTuple tuple )
6065
+ bool
6066
+ heap_inplace_lock (Relation relation ,
6067
+ HeapTuple oldtup_ptr , Buffer buffer )
6046
6068
{
6047
- Buffer buffer ;
6048
- Page page ;
6049
- OffsetNumber offnum ;
6050
- ItemId lp = NULL ;
6051
- HeapTupleHeader htup ;
6052
- uint32 oldlen ;
6053
- uint32 newlen ;
6069
+ HeapTupleData oldtup = * oldtup_ptr ; /* minimize diff vs. heap_update() */
6070
+ TM_Result result ;
6071
+ bool ret ;
6054
6072
6055
- /*
6056
- * For now, we don't allow parallel updates. Unlike a regular update,
6057
- * this should never create a combo CID, so it might be possible to relax
6058
- * this restriction, but not without more thought and testing. It's not
6059
- * clear that it would be useful, anyway.
6073
+ Assert (BufferIsValid (buffer ));
6074
+
6075
+ LockBuffer (buffer , BUFFER_LOCK_EXCLUSIVE );
6076
+
6077
+ /*----------
6078
+ * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6079
+ *
6080
+ * - wait unconditionally
6081
+ * - no tuple locks
6082
+ * - don't recheck header after wait: simpler to defer to next iteration
6083
+ * - don't try to continue even if the updater aborts: likewise
6084
+ * - no crosscheck
6060
6085
*/
6061
- if (IsInParallelMode ())
6086
+ result = HeapTupleSatisfiesUpdate (& oldtup , GetCurrentCommandId (false),
6087
+ buffer );
6088
+
6089
+ if (result == TM_Invisible )
6090
+ {
6091
+ /* no known way this can happen */
6062
6092
ereport (ERROR ,
6063
- (errcode (ERRCODE_INVALID_TRANSACTION_STATE ),
6064
- errmsg ("cannot update tuples during a parallel operation" )));
6093
+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
6094
+ errmsg_internal ("attempted to overwrite invisible tuple" )));
6095
+ }
6096
+ else if (result == TM_SelfModified )
6097
+ {
6098
+ /*
6099
+ * CREATE INDEX might reach this if an expression is silly enough to
6100
+ * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6101
+ * statements might get here after a heap_update() of the same row, in
6102
+ * the absence of an intervening CommandCounterIncrement().
6103
+ */
6104
+ ereport (ERROR ,
6105
+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
6106
+ errmsg ("tuple to be updated was already modified by an operation triggered by the current command" )));
6107
+ }
6108
+ else if (result == TM_BeingModified )
6109
+ {
6110
+ TransactionId xwait ;
6111
+ uint16 infomask ;
6065
6112
6066
- INJECTION_POINT ("inplace-before-pin" );
6067
- buffer = ReadBuffer (relation , ItemPointerGetBlockNumber (& (tuple -> t_self )));
6068
- LockBuffer (buffer , BUFFER_LOCK_EXCLUSIVE );
6069
- page = (Page ) BufferGetPage (buffer );
6113
+ xwait = HeapTupleHeaderGetRawXmax (oldtup .t_data );
6114
+ infomask = oldtup .t_data -> t_infomask ;
6070
6115
6071
- offnum = ItemPointerGetOffsetNumber (& (tuple -> t_self ));
6072
- if (PageGetMaxOffsetNumber (page ) >= offnum )
6073
- lp = PageGetItemId (page , offnum );
6116
+ if (infomask & HEAP_XMAX_IS_MULTI )
6117
+ {
6118
+ LockTupleMode lockmode = LockTupleNoKeyExclusive ;
6119
+ MultiXactStatus mxact_status = MultiXactStatusNoKeyUpdate ;
6120
+ int remain ;
6121
+ bool current_is_member ;
6074
6122
6075
- if (PageGetMaxOffsetNumber (page ) < offnum || !ItemIdIsNormal (lp ))
6076
- elog (ERROR , "invalid lp" );
6123
+ if (DoesMultiXactIdConflict ((MultiXactId ) xwait , infomask ,
6124
+ lockmode , & current_is_member ))
6125
+ {
6126
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6127
+ ret = false;
6128
+ MultiXactIdWait ((MultiXactId ) xwait , mxact_status , infomask ,
6129
+ relation , & oldtup .t_self , XLTW_Update ,
6130
+ & remain );
6131
+ }
6132
+ else
6133
+ ret = true;
6134
+ }
6135
+ else if (TransactionIdIsCurrentTransactionId (xwait ))
6136
+ ret = true;
6137
+ else if (HEAP_XMAX_IS_KEYSHR_LOCKED (infomask ))
6138
+ ret = true;
6139
+ else
6140
+ {
6141
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6142
+ ret = false;
6143
+ XactLockTableWait (xwait , relation , & oldtup .t_self ,
6144
+ XLTW_Update );
6145
+ }
6146
+ }
6147
+ else
6148
+ {
6149
+ ret = (result == TM_Ok );
6150
+ if (!ret )
6151
+ {
6152
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6153
+ }
6154
+ }
6077
6155
6078
- htup = (HeapTupleHeader ) PageGetItem (page , lp );
6156
+ /*
6157
+ * GetCatalogSnapshot() relies on invalidation messages to know when to
6158
+ * take a new snapshot. COMMIT of xwait is responsible for sending the
6159
+ * invalidation. We're not acquiring heavyweight locks sufficient to
6160
+ * block if not yet sent, so we must take a new snapshot to ensure a later
6161
+ * attempt has a fair chance. While we don't need this if xwait aborted,
6162
+ * don't bother optimizing that.
6163
+ */
6164
+ if (!ret )
6165
+ InvalidateCatalogSnapshot ();
6166
+ return ret ;
6167
+ }
6168
+
6169
+ /*
6170
+ * heap_inplace_update_and_unlock - core of systable_inplace_update_finish
6171
+ *
6172
+ * The tuple cannot change size, and therefore its header fields and null
6173
+ * bitmap (if any) don't change either.
6174
+ */
6175
+ void
6176
+ heap_inplace_update_and_unlock (Relation relation ,
6177
+ HeapTuple oldtup , HeapTuple tuple ,
6178
+ Buffer buffer )
6179
+ {
6180
+ HeapTupleHeader htup = oldtup -> t_data ;
6181
+ uint32 oldlen ;
6182
+ uint32 newlen ;
6079
6183
6080
- oldlen = ItemIdGetLength (lp ) - htup -> t_hoff ;
6184
+ Assert (ItemPointerEquals (& oldtup -> t_self , & tuple -> t_self ));
6185
+ oldlen = oldtup -> t_len - htup -> t_hoff ;
6081
6186
newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
6082
6187
if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
6083
6188
elog (ERROR , "wrong tuple length" );
@@ -6089,6 +6194,19 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
6089
6194
(char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
6090
6195
newlen );
6091
6196
6197
+ /*----------
6198
+ * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6199
+ *
6200
+ * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6201
+ * ["R" is a VACUUM tbl]
6202
+ * D: vac_update_datfrozenid() -> systable_beginscan(pg_class)
6203
+ * D: systable_getnext() returns pg_class tuple of tbl
6204
+ * R: memcpy() into pg_class tuple of tbl
6205
+ * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6206
+ * [crash]
6207
+ * [recovery restores datfrozenxid w/o relfrozenxid]
6208
+ */
6209
+
6092
6210
MarkBufferDirty (buffer );
6093
6211
6094
6212
/* XLOG stuff */
@@ -6109,23 +6227,35 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
6109
6227
6110
6228
recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
6111
6229
6112
- PageSetLSN (page , recptr );
6230
+ PageSetLSN (BufferGetPage ( buffer ) , recptr );
6113
6231
}
6114
6232
6115
6233
END_CRIT_SECTION ();
6116
6234
6117
- UnlockReleaseBuffer ( buffer );
6235
+ heap_inplace_unlock ( relation , oldtup , buffer );
6118
6236
6119
6237
/*
6120
6238
* Send out shared cache inval if necessary. Note that because we only
6121
6239
* pass the new version of the tuple, this mustn't be used for any
6122
6240
* operations that could change catcache lookup keys. But we aren't
6123
6241
* bothering with index updates either, so that's true a fortiori.
6242
+ *
6243
+ * XXX ROLLBACK discards the invalidation. See test inplace-inval.spec.
6124
6244
*/
6125
6245
if (!IsBootstrapProcessingMode ())
6126
6246
CacheInvalidateHeapTuple (relation , tuple , NULL );
6127
6247
}
6128
6248
6249
+ /*
6250
+ * heap_inplace_unlock - reverse of heap_inplace_lock
6251
+ */
6252
+ void
6253
+ heap_inplace_unlock (Relation relation ,
6254
+ HeapTuple oldtup , Buffer buffer )
6255
+ {
6256
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6257
+ }
6258
+
6129
6259
#define FRM_NOOP 0x0001
6130
6260
#define FRM_INVALIDATE_XMAX 0x0002
6131
6261
#define FRM_RETURN_IS_XID 0x0004
0 commit comments