Skip to content

Commit cf97080

Browse files
committed
TOAST needs to do at least minimal time-qual checking in order not to
mess up after an aborted VACUUM FULL, per today's pghackers discussion. Add a suitable HeapTupleSatisfiesToast routine. Remove useless special- case test in HeapTupleSatisfiesVisibility macro for xmax = BootstrapTransactionId; perhaps that was needed at one time, but it's a waste of cycles now, not to mention actively wrong for SnapshotAny. Along the way, add some much-needed comments to tqual.c, and simplify toast_fetch_datum, which no longer needs to assume it may see chunks out-of-order.
1 parent 0f2d949 commit cf97080

File tree

3 files changed

+158
-59
lines changed

3 files changed

+158
-59
lines changed

src/backend/access/heap/tuptoaster.c

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
* Support routines for external and compressed storage of
55
* variable size attributes.
66
*
7-
* Copyright (c) 2000, PostgreSQL Global Development Group
7+
* Copyright (c) 2000-2002, PostgreSQL Global Development Group
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.26 2001/11/05 17:46:23 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.27 2002/01/16 20:29:01 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -921,7 +921,7 @@ toast_delete_datum(Relation rel, Datum value)
921921
while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
922922
{
923923
toasttup.t_self = indexRes->heap_iptr;
924-
heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer, toastscan);
924+
heap_fetch(toastrel, SnapshotToast, &toasttup, &buffer, toastscan);
925925
pfree(indexRes);
926926

927927
if (!toasttup.t_data)
@@ -963,26 +963,18 @@ toast_fetch_datum(varattrib *attr)
963963
TupleDesc toasttupDesc;
964964
RetrieveIndexResult indexRes;
965965
Buffer buffer;
966-
967966
varattrib *result;
968967
int32 ressize;
969-
int32 residx;
970-
int numchunks;
968+
int32 residx,
969+
nextidx;
970+
int32 numchunks;
971971
Pointer chunk;
972972
bool isnull;
973973
int32 chunksize;
974974

975-
char *chunks_found;
976-
char *chunks_expected;
977-
978975
ressize = attr->va_content.va_external.va_extsize;
979976
numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
980977

981-
chunks_found = palloc(numchunks);
982-
chunks_expected = palloc(numchunks);
983-
memset(chunks_found, 0, numchunks);
984-
memset(chunks_expected, 1, numchunks);
985-
986978
result = (varattrib *) palloc(ressize + VARHDRSZ);
987979
VARATT_SIZEP(result) = ressize + VARHDRSZ;
988980
if (VARATT_IS_COMPRESSED(attr))
@@ -1008,13 +1000,17 @@ toast_fetch_datum(varattrib *attr)
10081000
/*
10091001
* Read the chunks by index
10101002
*
1011-
* Note we will not necessarily see the chunks in sequence-number order.
1003+
* Note that because the index is actually on (valueid, chunkidx)
1004+
* we will see the chunks in chunkidx order, even though we didn't
1005+
* explicitly ask for it.
10121006
*/
1007+
nextidx = 0;
1008+
10131009
toastscan = index_beginscan(toastidx, false, 1, &toastkey);
10141010
while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
10151011
{
10161012
toasttup.t_self = indexRes->heap_iptr;
1017-
heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer, toastscan);
1013+
heap_fetch(toastrel, SnapshotToast, &toasttup, &buffer, toastscan);
10181014
pfree(indexRes);
10191015

10201016
if (toasttup.t_data == NULL)
@@ -1033,9 +1029,9 @@ toast_fetch_datum(varattrib *attr)
10331029
/*
10341030
* Some checks on the data we've found
10351031
*/
1036-
if (residx < 0 || residx >= numchunks)
1037-
elog(ERROR, "unexpected chunk number %d for toast value %u",
1038-
residx,
1032+
if (residx != nextidx)
1033+
elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u",
1034+
residx, nextidx,
10391035
attr->va_content.va_external.va_valueid);
10401036
if (residx < numchunks - 1)
10411037
{
@@ -1044,15 +1040,15 @@ toast_fetch_datum(varattrib *attr)
10441040
chunksize, residx,
10451041
attr->va_content.va_external.va_valueid);
10461042
}
1047-
else
1043+
else if (residx < numchunks)
10481044
{
10491045
if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
10501046
elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
10511047
chunksize, residx,
10521048
attr->va_content.va_external.va_valueid);
10531049
}
1054-
if (chunks_found[residx]++ > 0)
1055-
elog(ERROR, "chunk %d for toast value %u appears multiple times",
1050+
else
1051+
elog(ERROR, "unexpected chunk number %d for toast value %u",
10561052
residx,
10571053
attr->va_content.va_external.va_valueid);
10581054

@@ -1064,16 +1060,16 @@ toast_fetch_datum(varattrib *attr)
10641060
chunksize);
10651061

10661062
ReleaseBuffer(buffer);
1063+
nextidx++;
10671064
}
10681065

10691066
/*
10701067
* Final checks that we successfully fetched the datum
10711068
*/
1072-
if (memcmp(chunks_found, chunks_expected, numchunks) != 0)
1073-
elog(ERROR, "not all toast chunks found for value %u",
1069+
if (nextidx != numchunks)
1070+
elog(ERROR, "missing chunk number %d for toast value %u",
1071+
nextidx,
10741072
attr->va_content.va_external.va_valueid);
1075-
pfree(chunks_expected);
1076-
pfree(chunks_found);
10771073

10781074
/*
10791075
* End scan and close relations

src/backend/utils/time/tqual.c

Lines changed: 115 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
11
/*-------------------------------------------------------------------------
22
*
33
* tqual.c
4-
* POSTGRES "time" qualification code.
4+
* POSTGRES "time" qualification code, ie, tuple visibility rules.
5+
*
6+
* NOTE: all the HeapTupleSatisfies routines will update the tuple's
7+
* "hint" status bits if we see that the inserting or deleting transaction
8+
* has now committed or aborted. The caller is responsible for noticing any
9+
* change in t_infomask and scheduling a disk write if so. Note that the
10+
* caller must hold at least a shared buffer context lock on the buffer
11+
* containing the tuple. (VACUUM FULL assumes it's sufficient to have
12+
* exclusive lock on the containing relation, instead.)
13+
*
514
*
615
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
716
* Portions Copyright (c) 1994, Regents of the University of California
817
*
9-
*
1018
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.46 2002/01/11 20:07:03 tgl Exp $
19+
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.47 2002/01/16 20:29:02 tgl Exp $
1220
*
1321
*-------------------------------------------------------------------------
1422
*/
@@ -36,8 +44,7 @@ bool ReferentialIntegritySnapshotOverride = false;
3644
*
3745
* Note:
3846
* Assumes heap tuple is valid.
39-
*/
40-
/*
47+
*
4148
* The satisfaction of "itself" requires the following:
4249
*
4350
* ((Xmin == my-transaction && the row was updated by the current transaction, and
@@ -153,8 +160,7 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple)
153160
*
154161
* Note:
155162
* Assumes heap tuple is valid.
156-
*/
157-
/*
163+
*
158164
* The satisfaction of "now" requires the following:
159165
*
160166
* ((Xmin == my-transaction && changed by the current transaction
@@ -288,6 +294,71 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple)
288294
return false;
289295
}
290296

297+
/*
298+
* HeapTupleSatisfiesToast
299+
* True iff heap tuple is valid for TOAST usage.
300+
*
301+
* This is a simplified version that only checks for VACUUM moving conditions.
302+
* It's appropriate for TOAST usage because TOAST really doesn't want to do
303+
* its own time qual checks; if you can see the main-table row that contains
304+
* a TOAST reference, you should be able to see the TOASTed value. However,
305+
* vacuuming a TOAST table is independent of the main table, and in case such
306+
* a vacuum fails partway through, we'd better do this much checking.
307+
*
308+
* Among other things, this means you can't do UPDATEs of rows in a TOAST
309+
* table.
310+
*/
311+
bool
312+
HeapTupleSatisfiesToast(HeapTupleHeader tuple)
313+
{
314+
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
315+
{
316+
if (tuple->t_infomask & HEAP_XMIN_INVALID)
317+
return false;
318+
319+
if (tuple->t_infomask & HEAP_MOVED_OFF)
320+
{
321+
if (TransactionIdIsCurrentTransactionId((TransactionId) tuple->t_cmin))
322+
return false;
323+
if (!TransactionIdIsInProgress((TransactionId) tuple->t_cmin))
324+
{
325+
if (TransactionIdDidCommit((TransactionId) tuple->t_cmin))
326+
{
327+
tuple->t_infomask |= HEAP_XMIN_INVALID;
328+
return false;
329+
}
330+
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
331+
}
332+
}
333+
else if (tuple->t_infomask & HEAP_MOVED_IN)
334+
{
335+
if (!TransactionIdIsCurrentTransactionId((TransactionId) tuple->t_cmin))
336+
{
337+
if (TransactionIdIsInProgress((TransactionId) tuple->t_cmin))
338+
return false;
339+
if (TransactionIdDidCommit((TransactionId) tuple->t_cmin))
340+
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
341+
else
342+
{
343+
tuple->t_infomask |= HEAP_XMIN_INVALID;
344+
return false;
345+
}
346+
}
347+
}
348+
}
349+
350+
/* otherwise assume the tuple is valid for TOAST. */
351+
return true;
352+
}
353+
354+
/*
355+
* HeapTupleSatisfiesUpdate
356+
* Check whether a tuple can be updated.
357+
*
358+
* This applies exactly the same checks as HeapTupleSatisfiesNow,
359+
* but returns a more-detailed result code, since UPDATE needs to know
360+
* more than "is it visible?"
361+
*/
291362
int
292363
HeapTupleSatisfiesUpdate(HeapTuple htuple)
293364
{
@@ -404,6 +475,18 @@ HeapTupleSatisfiesUpdate(HeapTuple htuple)
404475
return HeapTupleUpdated; /* updated by other */
405476
}
406477

478+
/*
479+
* HeapTupleSatisfiesDirty
480+
* True iff heap tuple is valid, including effects of concurrent xacts.
481+
*
482+
* This is essentially like HeapTupleSatisfiesItself as far as effects of
483+
* the current transaction and committed/aborted xacts are concerned.
484+
* However, we also include the effects of other xacts still in progress.
485+
*
486+
* Returns extra information in the global variable SnapshotDirty, namely
487+
* xids of concurrent xacts that affected the tuple. Also, the tuple's
488+
* t_ctid (forward link) is returned if it's being updated.
489+
*/
407490
bool
408491
HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
409492
{
@@ -516,6 +599,18 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
516599
return false; /* updated by other */
517600
}
518601

602+
/*
603+
* HeapTupleSatisfiesSnapshot
604+
* True iff heap tuple is valid for the given snapshot.
605+
*
606+
* This is the same as HeapTupleSatisfiesNow, except that transactions that
607+
* were in progress or as yet unstarted when the snapshot was taken will
608+
* be treated as uncommitted, even if they really have committed by now.
609+
*
610+
* (Notice, however, that the tuple status hint bits will be updated on the
611+
* basis of the true state of the transaction, even if we then pretend we
612+
* can't see it.)
613+
*/
519614
bool
520615
HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
521616
{
@@ -658,11 +753,6 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
658753
* deleted by XIDs >= OldestXmin are deemed "recently dead"; they might
659754
* still be visible to some open transaction, so we can't remove them,
660755
* even if we see that the deleting transaction has committed.
661-
*
662-
* As with the other HeapTupleSatisfies routines, we may update the tuple's
663-
* "hint" status bits if we see that the inserting or deleting transaction
664-
* has now committed or aborted. The caller is responsible for noticing any
665-
* change in t_infomask and scheduling a disk write if so.
666756
*/
667757
HTSV_Result
668758
HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin)
@@ -808,13 +898,21 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin)
808898
}
809899

810900

901+
/*
902+
* SetQuerySnapshot
903+
* Initialize query snapshot for a new query
904+
*
905+
* The SerializableSnapshot is the first one taken in a transaction.
906+
* In serializable mode we just use that one throughout the transaction.
907+
* In read-committed mode, we take a new snapshot at the start of each query.
908+
*/
811909
void
812910
SetQuerySnapshot(void)
813911
{
814912
/* Initialize snapshot overriding to false */
815913
ReferentialIntegritySnapshotOverride = false;
816914

817-
/* 1st call in xaction */
915+
/* 1st call in xaction? */
818916
if (SerializableSnapshot == NULL)
819917
{
820918
SerializableSnapshot = GetSnapshotData(true);
@@ -837,6 +935,10 @@ SetQuerySnapshot(void)
837935
Assert(QuerySnapshot != NULL);
838936
}
839937

938+
/*
939+
* FreeXactSnapshot
940+
* Free snapshot(s) at end of transaction.
941+
*/
840942
void
841943
FreeXactSnapshot(void)
842944
{

0 commit comments

Comments
 (0)