42
42
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
43
43
* Portions Copyright (c) 1994, Regents of the University of California
44
44
*
45
- * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.9 2005/10/15 02:49:09 momjian Exp $
45
+ * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.10 2005/10/28 17:27:29 tgl Exp $
46
46
*
47
47
*-------------------------------------------------------------------------
48
48
*/
@@ -631,7 +631,16 @@ CreateMultiXactId(int nxids, TransactionId *xids)
631
631
}
632
632
633
633
/*
634
- * OK, assign the MXID and offsets range to use
634
+ * Critical section from here until we've written the data; we don't
635
+ * want to error out with a partly written MultiXact structure.
636
+ * (In particular, failing to write our start offset after advancing
637
+ * nextMXact would effectively corrupt the previous MultiXact.)
638
+ */
639
+ START_CRIT_SECTION ();
640
+
641
+ /*
642
+ * Assign the MXID and offsets range to use, and make sure there is
643
+ * space in the OFFSETs and MEMBERs files.
635
644
*/
636
645
multi = GetNewMultiXactId (nxids , & offset );
637
646
@@ -668,6 +677,9 @@ CreateMultiXactId(int nxids, TransactionId *xids)
668
677
/* Now enter the information into the OFFSETs and MEMBERs logs */
669
678
RecordNewMultiXact (multi , offset , nxids , xids );
670
679
680
+ /* Done with critical section */
681
+ END_CRIT_SECTION ();
682
+
671
683
/* Store the new MultiXactId in the local cache, too */
672
684
mXactCachePut (multi , nxids , xids );
673
685
@@ -761,6 +773,7 @@ static MultiXactId
761
773
GetNewMultiXactId (int nxids , MultiXactOffset * offset )
762
774
{
763
775
MultiXactId result ;
776
+ MultiXactOffset nextOffset ;
764
777
765
778
debug_elog3 (DEBUG2 , "GetNew: for %d xids" , nxids );
766
779
@@ -784,19 +797,28 @@ GetNewMultiXactId(int nxids, MultiXactOffset *offset)
784
797
* Advance counter. As in GetNewTransactionId(), this must not happen
785
798
* until after ExtendMultiXactOffset has succeeded!
786
799
*
787
- * We don't care about MultiXactId wraparound here; it will be handled by the
788
- * next iteration. But note that nextMXact may be InvalidMultiXactId
800
+ * We don't care about MultiXactId wraparound here; it will be handled by
801
+ * the next iteration. But note that nextMXact may be InvalidMultiXactId
789
802
* after this routine exits, so anyone else looking at the variable must
790
803
* be prepared to deal with that.
791
804
*/
792
805
(MultiXactState -> nextMXact )++ ;
793
806
794
807
/*
795
- * Reserve the members space. Same considerations as above.
808
+ * Reserve the members space. Same considerations as above. Also, be
809
+ * careful not to return zero as the starting offset for any multixact.
810
+ * See GetMultiXactIdMembers() for motivation.
796
811
*/
797
- * offset = MultiXactState -> nextOffset ;
812
+ nextOffset = MultiXactState -> nextOffset ;
813
+ if (nextOffset == 0 )
814
+ {
815
+ * offset = 1 ;
816
+ nxids ++ ; /* allocate member slot 0 too */
817
+ }
818
+ else
819
+ * offset = nextOffset ;
798
820
799
- ExtendMultiXactMember (* offset , nxids );
821
+ ExtendMultiXactMember (nextOffset , nxids );
800
822
801
823
MultiXactState -> nextOffset += nxids ;
802
824
@@ -824,6 +846,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
824
846
MultiXactOffset * offptr ;
825
847
MultiXactOffset offset ;
826
848
int length ;
849
+ int truelength ;
827
850
int i ;
828
851
MultiXactId nextMXact ;
829
852
MultiXactId tmpMXact ;
@@ -849,13 +872,13 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
849
872
/*
850
873
* We check known limits on MultiXact before resorting to the SLRU area.
851
874
*
852
- * An ID older than our OldestVisibleMXactId[] entry can't possibly still be
853
- * running, and we'd run the risk of trying to read already-truncated SLRU
854
- * data if we did try to examine it.
875
+ * An ID older than our OldestVisibleMXactId[] entry can't possibly still
876
+ * be running, and we'd run the risk of trying to read already-truncated
877
+ * SLRU data if we did try to examine it.
855
878
*
856
- * Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is seen,
857
- * it implies undetected ID wraparound has occurred. We just silently
858
- * assume that such an ID is no longer running.
879
+ * Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is
880
+ * seen, it implies undetected ID wraparound has occurred. We just
881
+ * silently assume that such an ID is no longer running.
859
882
*
860
883
* Shared lock is enough here since we aren't modifying any global state.
861
884
* Also, we can examine our own OldestVisibleMXactId without the lock,
@@ -868,27 +891,58 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
868
891
return -1 ;
869
892
}
870
893
894
+ /*
895
+ * Acquire the shared lock just long enough to grab the current counter
896
+ * values. We may need both nextMXact and nextOffset; see below.
897
+ */
871
898
LWLockAcquire (MultiXactGenLock , LW_SHARED );
872
899
873
- if (!MultiXactIdPrecedes (multi , MultiXactState -> nextMXact ))
900
+ nextMXact = MultiXactState -> nextMXact ;
901
+ nextOffset = MultiXactState -> nextOffset ;
902
+
903
+ LWLockRelease (MultiXactGenLock );
904
+
905
+ if (!MultiXactIdPrecedes (multi , nextMXact ))
874
906
{
875
- LWLockRelease (MultiXactGenLock );
876
907
debug_elog2 (DEBUG2 , "GetMembers: it's too new!" );
877
908
* xids = NULL ;
878
909
return -1 ;
879
910
}
880
911
881
912
/*
882
- * Before releasing the lock, save the current counter values, because the
883
- * target MultiXactId may be just one less than nextMXact. We will need
884
- * to use nextOffset as the endpoint if so.
913
+ * Find out the offset at which we need to start reading MultiXactMembers
914
+ * and the number of members in the multixact. We determine the latter
915
+ * as the difference between this multixact's starting offset and the
916
+ * next one's. However, there are some corner cases to worry about:
917
+ *
918
+ * 1. This multixact may be the latest one created, in which case there
919
+ * is no next one to look at. In this case the nextOffset value we just
920
+ * saved is the correct endpoint.
921
+ *
922
+ * 2. The next multixact may still be in process of being filled in:
923
+ * that is, another process may have done GetNewMultiXactId but not yet
924
+ * written the offset entry for that ID. In that scenario, it is
925
+ * guaranteed that the offset entry for that multixact exists (because
926
+ * GetNewMultiXactId won't release MultiXactGenLock until it does)
927
+ * but contains zero (because we are careful to pre-zero offset pages).
928
+ * Because GetNewMultiXactId will never return zero as the starting offset
929
+ * for a multixact, when we read zero as the next multixact's offset, we
930
+ * know we have this case. We sleep for a bit and try again.
931
+ *
932
+ * 3. Because GetNewMultiXactId increments offset zero to offset one
933
+ * to handle case #2, there is an ambiguity near the point of offset
934
+ * wraparound. If we see next multixact's offset is one, is that our
935
+ * multixact's actual endpoint, or did it end at zero with a subsequent
936
+ * increment? We handle this using the knowledge that if the zero'th
937
+ * member slot wasn't filled, it'll contain zero, and zero isn't a valid
938
+ * transaction ID so it can't be a multixact member. Therefore, if we
939
+ * read a zero from the members array, just ignore it.
940
+ *
941
+ * This is all pretty messy, but the mess occurs only in infrequent corner
942
+ * cases, so it seems better than holding the MultiXactGenLock for a long
943
+ * time on every multixact creation.
885
944
*/
886
- nextMXact = MultiXactState -> nextMXact ;
887
- nextOffset = MultiXactState -> nextOffset ;
888
-
889
- LWLockRelease (MultiXactGenLock );
890
-
891
- /* Get the offset at which we need to start reading MultiXactMembers */
945
+ retry :
892
946
LWLockAcquire (MultiXactOffsetControlLock , LW_EXCLUSIVE );
893
947
894
948
pageno = MultiXactIdToOffsetPage (multi );
@@ -899,20 +953,23 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
899
953
offptr += entryno ;
900
954
offset = * offptr ;
901
955
956
+ Assert (offset != 0 );
957
+
902
958
/*
903
- * How many members do we need to read? If we are at the end of the
904
- * assigned MultiXactIds, use the offset just saved above. Else we need
905
- * to check the MultiXactId following ours.
906
- *
907
- * Use the same increment rule as GetNewMultiXactId(), that is, don't handle
908
- * wraparound explicitly until needed.
959
+ * Use the same increment rule as GetNewMultiXactId(), that is, don't
960
+ * handle wraparound explicitly until needed.
909
961
*/
910
962
tmpMXact = multi + 1 ;
911
963
912
964
if (nextMXact == tmpMXact )
965
+ {
966
+ /* Corner case 1: there is no next multixact */
913
967
length = nextOffset - offset ;
968
+ }
914
969
else
915
970
{
971
+ MultiXactOffset nextMXOffset ;
972
+
916
973
/* handle wraparound if needed */
917
974
if (tmpMXact < FirstMultiXactId )
918
975
tmpMXact = FirstMultiXactId ;
@@ -927,7 +984,17 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
927
984
928
985
offptr = (MultiXactOffset * ) MultiXactOffsetCtl -> shared -> page_buffer [slotno ];
929
986
offptr += entryno ;
930
- length = * offptr - offset ;
987
+ nextMXOffset = * offptr ;
988
+
989
+ if (nextMXOffset == 0 )
990
+ {
991
+ /* Corner case 2: next multixact is still being filled in */
992
+ LWLockRelease (MultiXactOffsetControlLock );
993
+ pg_usleep (1000L );
994
+ goto retry ;
995
+ }
996
+
997
+ length = nextMXOffset - offset ;
931
998
}
932
999
933
1000
LWLockRelease (MultiXactOffsetControlLock );
@@ -938,6 +1005,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
938
1005
/* Now get the members themselves. */
939
1006
LWLockAcquire (MultiXactMemberControlLock , LW_EXCLUSIVE );
940
1007
1008
+ truelength = 0 ;
941
1009
prev_pageno = -1 ;
942
1010
for (i = 0 ; i < length ; i ++ , offset ++ )
943
1011
{
@@ -956,19 +1024,26 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
956
1024
MultiXactMemberCtl -> shared -> page_buffer [slotno ];
957
1025
xactptr += entryno ;
958
1026
959
- ptr [i ] = * xactptr ;
1027
+ if (!TransactionIdIsValid (* xactptr ))
1028
+ {
1029
+ /* Corner case 3: we must be looking at unused slot zero */
1030
+ Assert (offset == 0 );
1031
+ continue ;
1032
+ }
1033
+
1034
+ ptr [truelength ++ ] = * xactptr ;
960
1035
}
961
1036
962
1037
LWLockRelease (MultiXactMemberControlLock );
963
1038
964
1039
/*
965
1040
* Copy the result into the local cache.
966
1041
*/
967
- mXactCachePut (multi , length , ptr );
1042
+ mXactCachePut (multi , truelength , ptr );
968
1043
969
1044
debug_elog3 (DEBUG2 , "GetMembers: no cache for %s" ,
970
- mxid_to_string (multi , length , ptr ));
971
- return length ;
1045
+ mxid_to_string (multi , truelength , ptr ));
1046
+ return truelength ;
972
1047
}
973
1048
974
1049
/*
0 commit comments