@@ -5800,23 +5800,245 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
5800
5800
}
5801
5801
5802
5802
/*
5803
- * heap_inplace_update - update a tuple "in place" (ie, overwrite it )
5803
+ * heap_inplace_lock - protect inplace update from concurrent heap_update( )
5804
5804
*
5805
- * Overwriting violates both MVCC and transactional safety, so the uses
5806
- * of this function in Postgres are extremely limited. Nonetheless we
5807
- * find some places to use it.
5805
+ * Evaluate whether the tuple's state is compatible with a no-key update.
5806
+ * Current transaction rowmarks are fine, as is KEY SHARE from any
5807
+ * transaction. If compatible, return true with the buffer exclusive-locked,
5808
+ * and the caller must release that by calling
5809
+ * heap_inplace_update_and_unlock(), calling heap_inplace_unlock(), or raising
5810
+ * an error. Otherwise, return false after blocking transactions, if any,
5811
+ * have ended.
5808
5812
*
5809
- * The tuple cannot change size, and therefore it's reasonable to assume
5810
- * that its null bitmap (if any) doesn't change either. So we just
5811
- * overwrite the data portion of the tuple without touching the null
5812
- * bitmap or any of the header fields.
5813
+ * Since this is intended for system catalogs and SERIALIZABLE doesn't cover
5814
+ * DDL, this doesn't guarantee any particular predicate locking.
5813
5815
*
5814
- * tuple is an in-memory tuple structure containing the data to be written
5815
- * over the target tuple. Also, tuple->t_self identifies the target tuple.
5816
+ * One could modify this to return true for tuples with delete in progress,
5817
+ * All inplace updaters take a lock that conflicts with DROP. If explicit
5818
+ * "DELETE FROM pg_class" is in progress, we'll wait for it like we would an
5819
+ * update.
5816
5820
*
5817
- * Note that the tuple updated here had better not come directly from the
5818
- * syscache if the relation has a toast relation as this tuple could
5819
- * include toast values that have been expanded, causing a failure here.
5821
+ * Readers of inplace-updated fields expect changes to those fields are
5822
+ * durable. For example, vac_truncate_clog() reads datfrozenxid from
5823
+ * pg_database tuples via catalog snapshots. A future snapshot must not
5824
+ * return a lower datfrozenxid for the same database OID (lower in the
5825
+ * FullTransactionIdPrecedes() sense). We achieve that since no update of a
5826
+ * tuple can start while we hold a lock on its buffer. In cases like
5827
+ * BEGIN;GRANT;CREATE INDEX;COMMIT we're inplace-updating a tuple visible only
5828
+ * to this transaction. ROLLBACK then is one case where it's okay to lose
5829
+ * inplace updates. (Restoring relhasindex=false on ROLLBACK is fine, since
5830
+ * any concurrent CREATE INDEX would have blocked, then inplace-updated the
5831
+ * committed tuple.)
5832
+ *
5833
+ * In principle, we could avoid waiting by overwriting every tuple in the
5834
+ * updated tuple chain. Reader expectations permit updating a tuple only if
5835
+ * it's aborted, is the tail of the chain, or we already updated the tuple
5836
+ * referenced in its t_ctid. Hence, we would need to overwrite the tuples in
5837
+ * order from tail to head. That would imply either (a) mutating all tuples
5838
+ * in one critical section or (b) accepting a chance of partial completion.
5839
+ * Partial completion of a relfrozenxid update would have the weird
5840
+ * consequence that the table's next VACUUM could see the table's relfrozenxid
5841
+ * move forward between vacuum_get_cutoffs() and finishing.
5842
+ */
5843
+ bool
5844
+ heap_inplace_lock (Relation relation ,
5845
+ HeapTuple oldtup_ptr , Buffer buffer )
5846
+ {
5847
+ HeapTupleData oldtup = * oldtup_ptr ; /* minimize diff vs. heap_update() */
5848
+ TM_Result result ;
5849
+ bool ret ;
5850
+
5851
+ Assert (BufferIsValid (buffer ));
5852
+
5853
+ LockBuffer (buffer , BUFFER_LOCK_EXCLUSIVE );
5854
+
5855
+ /*----------
5856
+ * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
5857
+ *
5858
+ * - wait unconditionally
5859
+ * - no tuple locks
5860
+ * - don't recheck header after wait: simpler to defer to next iteration
5861
+ * - don't try to continue even if the updater aborts: likewise
5862
+ * - no crosscheck
5863
+ */
5864
+ result = HeapTupleSatisfiesUpdate (& oldtup , GetCurrentCommandId (false),
5865
+ buffer );
5866
+
5867
+ if (result == TM_Invisible )
5868
+ {
5869
+ /* no known way this can happen */
5870
+ ereport (ERROR ,
5871
+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
5872
+ errmsg_internal ("attempted to overwrite invisible tuple" )));
5873
+ }
5874
+ else if (result == TM_SelfModified )
5875
+ {
5876
+ /*
5877
+ * CREATE INDEX might reach this if an expression is silly enough to
5878
+ * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
5879
+ * statements might get here after a heap_update() of the same row, in
5880
+ * the absence of an intervening CommandCounterIncrement().
5881
+ */
5882
+ ereport (ERROR ,
5883
+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
5884
+ errmsg ("tuple to be updated was already modified by an operation triggered by the current command" )));
5885
+ }
5886
+ else if (result == TM_BeingModified )
5887
+ {
5888
+ TransactionId xwait ;
5889
+ uint16 infomask ;
5890
+
5891
+ xwait = HeapTupleHeaderGetRawXmax (oldtup .t_data );
5892
+ infomask = oldtup .t_data -> t_infomask ;
5893
+
5894
+ if (infomask & HEAP_XMAX_IS_MULTI )
5895
+ {
5896
+ LockTupleMode lockmode = LockTupleNoKeyExclusive ;
5897
+ MultiXactStatus mxact_status = MultiXactStatusNoKeyUpdate ;
5898
+ int remain ;
5899
+ bool current_is_member ;
5900
+
5901
+ if (DoesMultiXactIdConflict ((MultiXactId ) xwait , infomask ,
5902
+ lockmode , & current_is_member ))
5903
+ {
5904
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
5905
+ ret = false;
5906
+ MultiXactIdWait ((MultiXactId ) xwait , mxact_status , infomask ,
5907
+ relation , & oldtup .t_self , XLTW_Update ,
5908
+ & remain );
5909
+ }
5910
+ else
5911
+ ret = true;
5912
+ }
5913
+ else if (TransactionIdIsCurrentTransactionId (xwait ))
5914
+ ret = true;
5915
+ else if (HEAP_XMAX_IS_KEYSHR_LOCKED (infomask ))
5916
+ ret = true;
5917
+ else
5918
+ {
5919
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
5920
+ ret = false;
5921
+ XactLockTableWait (xwait , relation , & oldtup .t_self ,
5922
+ XLTW_Update );
5923
+ }
5924
+ }
5925
+ else
5926
+ {
5927
+ ret = (result == TM_Ok );
5928
+ if (!ret )
5929
+ {
5930
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
5931
+ }
5932
+ }
5933
+
5934
+ /*
5935
+ * GetCatalogSnapshot() relies on invalidation messages to know when to
5936
+ * take a new snapshot. COMMIT of xwait is responsible for sending the
5937
+ * invalidation. We're not acquiring heavyweight locks sufficient to
5938
+ * block if not yet sent, so we must take a new snapshot to ensure a later
5939
+ * attempt has a fair chance. While we don't need this if xwait aborted,
5940
+ * don't bother optimizing that.
5941
+ */
5942
+ if (!ret )
5943
+ InvalidateCatalogSnapshot ();
5944
+ return ret ;
5945
+ }
5946
+
5947
+ /*
5948
+ * heap_inplace_update_and_unlock - core of systable_inplace_update_finish
5949
+ *
5950
+ * The tuple cannot change size, and therefore its header fields and null
5951
+ * bitmap (if any) don't change either.
5952
+ */
5953
+ void
5954
+ heap_inplace_update_and_unlock (Relation relation ,
5955
+ HeapTuple oldtup , HeapTuple tuple ,
5956
+ Buffer buffer )
5957
+ {
5958
+ HeapTupleHeader htup = oldtup -> t_data ;
5959
+ uint32 oldlen ;
5960
+ uint32 newlen ;
5961
+
5962
+ Assert (ItemPointerEquals (& oldtup -> t_self , & tuple -> t_self ));
5963
+ oldlen = oldtup -> t_len - htup -> t_hoff ;
5964
+ newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
5965
+ if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
5966
+ elog (ERROR , "wrong tuple length" );
5967
+
5968
+ /* NO EREPORT(ERROR) from here till changes are logged */
5969
+ START_CRIT_SECTION ();
5970
+
5971
+ memcpy ((char * ) htup + htup -> t_hoff ,
5972
+ (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
5973
+ newlen );
5974
+
5975
+ /*----------
5976
+ * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
5977
+ *
5978
+ * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
5979
+ * ["R" is a VACUUM tbl]
5980
+ * D: vac_update_datfrozenid() -> systable_beginscan(pg_class)
5981
+ * D: systable_getnext() returns pg_class tuple of tbl
5982
+ * R: memcpy() into pg_class tuple of tbl
5983
+ * D: raise pg_database.datfrozenxid, XLogInsert(), finish
5984
+ * [crash]
5985
+ * [recovery restores datfrozenxid w/o relfrozenxid]
5986
+ */
5987
+
5988
+ MarkBufferDirty (buffer );
5989
+
5990
+ /* XLOG stuff */
5991
+ if (RelationNeedsWAL (relation ))
5992
+ {
5993
+ xl_heap_inplace xlrec ;
5994
+ XLogRecPtr recptr ;
5995
+
5996
+ xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
5997
+
5998
+ XLogBeginInsert ();
5999
+ XLogRegisterData ((char * ) & xlrec , SizeOfHeapInplace );
6000
+
6001
+ XLogRegisterBuffer (0 , buffer , REGBUF_STANDARD );
6002
+ XLogRegisterBufData (0 , (char * ) htup + htup -> t_hoff , newlen );
6003
+
6004
+ /* inplace updates aren't decoded atm, don't log the origin */
6005
+
6006
+ recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
6007
+
6008
+ PageSetLSN (BufferGetPage (buffer ), recptr );
6009
+ }
6010
+
6011
+ END_CRIT_SECTION ();
6012
+
6013
+ heap_inplace_unlock (relation , oldtup , buffer );
6014
+
6015
+ /*
6016
+ * Send out shared cache inval if necessary. Note that because we only
6017
+ * pass the new version of the tuple, this mustn't be used for any
6018
+ * operations that could change catcache lookup keys. But we aren't
6019
+ * bothering with index updates either, so that's true a fortiori.
6020
+ *
6021
+ * XXX ROLLBACK discards the invalidation. See test inplace-inval.spec.
6022
+ */
6023
+ if (!IsBootstrapProcessingMode ())
6024
+ CacheInvalidateHeapTuple (relation , tuple , NULL );
6025
+ }
6026
+
6027
+ /*
6028
+ * heap_inplace_unlock - reverse of heap_inplace_lock
6029
+ */
6030
+ void
6031
+ heap_inplace_unlock (Relation relation ,
6032
+ HeapTuple oldtup , Buffer buffer )
6033
+ {
6034
+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6035
+ }
6036
+
6037
+ /*
6038
+ * heap_inplace_update - deprecated
6039
+ *
6040
+ * This exists only to keep modules working in back branches. Affected
6041
+ * modules should migrate to systable_inplace_update_begin().
5820
6042
*/
5821
6043
void
5822
6044
heap_inplace_update (Relation relation , HeapTuple tuple )
0 commit comments