Skip to content

Commit 4e02f88

Browse files
committed
Fix timing issue with ALTER TABLE's validate constraint
An ALTER TABLE to validate a foreign key in which another subcommand already caused a pending table rewrite could fail due to ALTER TABLE attempting to validate the foreign key before the actual table rewrite takes place. This situation could result in an error such as: ERROR: could not read block 0 in file "base/nnnnn/nnnnn": read only 0 of 8192 bytes The failure here was due to the SPI call which validates the foreign key trying to access an index which is yet to be rebuilt. Similarly, we also incorrectly tried to validate CHECK constraints before the heap had been rewritten. The fix for both is to delay constraint validation until phase 3, after the table has been rewritten. For CHECK constraints this means a slight behavioral change. Previously ALTER TABLE VALIDATE CONSTRAINT on inheritance tables would be validated from the bottom up. This was different from the order of evaluation when a new CHECK constraint was added. The changes made here aligns the VALIDATE CONSTRAINT evaluation order for inheritance tables to be the same as ADD CONSTRAINT, which is generally top-down. Reported-by: Nazli Ugur Koyluoglu, using SQLancer Discussion: https://postgr.es/m/CAApHDvp%3DZXv8wiRyk_0rWr00skhGkt8vXDrHJYXRMft3TjkxCA%40mail.gmail.com Backpatch-through: 9.5 (all supported versions)
1 parent 6443cd2 commit 4e02f88

File tree

3 files changed

+95
-111
lines changed

3 files changed

+95
-111
lines changed

src/backend/commands/tablecmds.c

Lines changed: 52 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,9 @@ static void AlterSeqNamespaces(Relation classRel, Relation rel,
313313
LOCKMODE lockmode);
314314
static ObjectAddress ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
315315
bool recurse, bool recursing, LOCKMODE lockmode);
316-
static ObjectAddress ATExecValidateConstraint(Relation rel, char *constrName,
317-
bool recurse, bool recursing, LOCKMODE lockmode);
316+
static ObjectAddress ATExecValidateConstraint(List **wqueue, Relation rel,
317+
char *constrName, bool recurse, bool recursing,
318+
LOCKMODE lockmode);
318319
static int transformColumnNameList(Oid relId, List *colList,
319320
int16 *attnums, Oid *atttypids);
320321
static int transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid,
@@ -327,7 +328,6 @@ static Oid transformFkeyCheckAttrs(Relation pkrel,
327328
static void checkFkeyPermissions(Relation rel, int16 *attnums, int natts);
328329
static CoercionPathType findFkeyCast(Oid targetTypeId, Oid sourceTypeId,
329330
Oid *funcid);
330-
static void validateCheckConstraint(Relation rel, HeapTuple constrtup);
331331
static void validateForeignKeyConstraint(char *conname,
332332
Relation rel, Relation pkrel,
333333
Oid pkindOid, Oid constraintOid);
@@ -3951,13 +3951,13 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
39513951
address = ATExecAlterConstraint(rel, cmd, false, false, lockmode);
39523952
break;
39533953
case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */
3954-
address = ATExecValidateConstraint(rel, cmd->name, false, false,
3955-
lockmode);
3954+
address = ATExecValidateConstraint(wqueue, rel, cmd->name, false,
3955+
false, lockmode);
39563956
break;
39573957
case AT_ValidateConstraintRecurse: /* VALIDATE CONSTRAINT with
39583958
* recursion */
3959-
address = ATExecValidateConstraint(rel, cmd->name, true, false,
3960-
lockmode);
3959+
address = ATExecValidateConstraint(wqueue, rel, cmd->name, true,
3960+
false, lockmode);
39613961
break;
39623962
case AT_DropConstraint: /* DROP CONSTRAINT */
39633963
ATExecDropConstraint(rel, cmd->name, cmd->behavior,
@@ -7660,8 +7660,8 @@ ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
76607660
* was already validated, InvalidObjectAddress is returned.
76617661
*/
76627662
static ObjectAddress
7663-
ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
7664-
bool recursing, LOCKMODE lockmode)
7663+
ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
7664+
bool recurse, bool recursing, LOCKMODE lockmode)
76657665
{
76667666
Relation conrel;
76677667
SysScanDesc scan;
@@ -7708,27 +7708,31 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
77087708

77097709
if (!con->convalidated)
77107710
{
7711+
AlteredTableInfo *tab;
77117712
HeapTuple copyTuple;
77127713
Form_pg_constraint copy_con;
77137714

77147715
if (con->contype == CONSTRAINT_FOREIGN)
77157716
{
7716-
Relation refrel;
7717+
NewConstraint *newcon;
7718+
Constraint *fkconstraint;
77177719

7718-
/*
7719-
* Triggers are already in place on both tables, so a concurrent
7720-
* write that alters the result here is not possible. Normally we
7721-
* can run a query here to do the validation, which would only
7722-
* require AccessShareLock. In some cases, it is possible that we
7723-
* might need to fire triggers to perform the check, so we take a
7724-
* lock at RowShareLock level just in case.
7725-
*/
7726-
refrel = heap_open(con->confrelid, RowShareLock);
7720+
/* Queue validation for phase 3 */
7721+
fkconstraint = makeNode(Constraint);
7722+
/* for now this is all we need */
7723+
fkconstraint->conname = constrName;
77277724

7728-
validateForeignKeyConstraint(constrName, rel, refrel,
7729-
con->conindid,
7730-
HeapTupleGetOid(tuple));
7731-
heap_close(refrel, NoLock);
7725+
newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
7726+
newcon->name = constrName;
7727+
newcon->contype = CONSTR_FOREIGN;
7728+
newcon->refrelid = con->confrelid;
7729+
newcon->refindid = con->conindid;
7730+
newcon->conid = HeapTupleGetOid(tuple);
7731+
newcon->qual = (Node *) fkconstraint;
7732+
7733+
/* Find or create work queue entry for this table */
7734+
tab = ATGetQueueEntry(wqueue, rel);
7735+
tab->constraints = lappend(tab->constraints, newcon);
77327736

77337737
/*
77347738
* Foreign keys do not inherit, so we purposely ignore the
@@ -7739,6 +7743,10 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
77397743
{
77407744
List *children = NIL;
77417745
ListCell *child;
7746+
NewConstraint *newcon;
7747+
bool isnull;
7748+
Datum val;
7749+
char *conbin;
77427750

77437751
/*
77447752
* If we're recursing, the parent has already done this, so skip
@@ -7778,12 +7786,31 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
77787786
/* find_all_inheritors already got lock */
77797787
childrel = heap_open(childoid, NoLock);
77807788

7781-
ATExecValidateConstraint(childrel, constrName, false,
7789+
ATExecValidateConstraint(wqueue, childrel, constrName, false,
77827790
true, lockmode);
77837791
heap_close(childrel, NoLock);
77847792
}
77857793

7786-
validateCheckConstraint(rel, tuple);
7794+
/* Queue validation for phase 3 */
7795+
newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
7796+
newcon->name = constrName;
7797+
newcon->contype = CONSTR_CHECK;
7798+
newcon->refrelid = InvalidOid;
7799+
newcon->refindid = InvalidOid;
7800+
newcon->conid = HeapTupleGetOid(tuple);
7801+
7802+
val = SysCacheGetAttr(CONSTROID, tuple,
7803+
Anum_pg_constraint_conbin, &isnull);
7804+
if (isnull)
7805+
elog(ERROR, "null conbin for constraint %u",
7806+
HeapTupleGetOid(tuple));
7807+
7808+
conbin = TextDatumGetCString(val);
7809+
newcon->qual = (Node *) stringToNode(conbin);
7810+
7811+
/* Find or create work queue entry for this table */
7812+
tab = ATGetQueueEntry(wqueue, rel);
7813+
tab->constraints = lappend(tab->constraints, newcon);
77877814

77887815
/*
77897816
* Invalidate relcache so that others see the new validated
@@ -8159,91 +8186,6 @@ checkFkeyPermissions(Relation rel, int16 *attnums, int natts)
81598186
}
81608187
}
81618188

8162-
/*
8163-
* Scan the existing rows in a table to verify they meet a proposed
8164-
* CHECK constraint.
8165-
*
8166-
* The caller must have opened and locked the relation appropriately.
8167-
*/
8168-
static void
8169-
validateCheckConstraint(Relation rel, HeapTuple constrtup)
8170-
{
8171-
EState *estate;
8172-
Datum val;
8173-
char *conbin;
8174-
Expr *origexpr;
8175-
ExprState *exprstate;
8176-
TupleDesc tupdesc;
8177-
HeapScanDesc scan;
8178-
HeapTuple tuple;
8179-
ExprContext *econtext;
8180-
MemoryContext oldcxt;
8181-
TupleTableSlot *slot;
8182-
Form_pg_constraint constrForm;
8183-
bool isnull;
8184-
Snapshot snapshot;
8185-
8186-
/*
8187-
* VALIDATE CONSTRAINT is a no-op for foreign tables and partitioned
8188-
* tables.
8189-
*/
8190-
if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
8191-
rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
8192-
return;
8193-
8194-
constrForm = (Form_pg_constraint) GETSTRUCT(constrtup);
8195-
8196-
estate = CreateExecutorState();
8197-
8198-
/*
8199-
* XXX this tuple doesn't really come from a syscache, but this doesn't
8200-
* matter to SysCacheGetAttr, because it only wants to be able to fetch
8201-
* the tupdesc
8202-
*/
8203-
val = SysCacheGetAttr(CONSTROID, constrtup, Anum_pg_constraint_conbin,
8204-
&isnull);
8205-
if (isnull)
8206-
elog(ERROR, "null conbin for constraint %u",
8207-
HeapTupleGetOid(constrtup));
8208-
conbin = TextDatumGetCString(val);
8209-
origexpr = (Expr *) stringToNode(conbin);
8210-
exprstate = ExecPrepareExpr(origexpr, estate);
8211-
8212-
econtext = GetPerTupleExprContext(estate);
8213-
tupdesc = RelationGetDescr(rel);
8214-
slot = MakeSingleTupleTableSlot(tupdesc);
8215-
econtext->ecxt_scantuple = slot;
8216-
8217-
snapshot = RegisterSnapshot(GetLatestSnapshot());
8218-
scan = heap_beginscan(rel, snapshot, 0, NULL);
8219-
8220-
/*
8221-
* Switch to per-tuple memory context and reset it for each tuple
8222-
* produced, so we don't leak memory.
8223-
*/
8224-
oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
8225-
8226-
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
8227-
{
8228-
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
8229-
8230-
if (!ExecCheck(exprstate, econtext))
8231-
ereport(ERROR,
8232-
(errcode(ERRCODE_CHECK_VIOLATION),
8233-
errmsg("check constraint \"%s\" is violated by some row",
8234-
NameStr(constrForm->conname)),
8235-
errtableconstraint(rel, NameStr(constrForm->conname))));
8236-
8237-
ResetExprContext(econtext);
8238-
}
8239-
8240-
MemoryContextSwitchTo(oldcxt);
8241-
heap_endscan(scan);
8242-
UnregisterSnapshot(snapshot);
8243-
ExecDropSingleTupleTableSlot(slot);
8244-
FreeExecutorState(estate);
8245-
}
8246-
82478189
/*
82488190
* Scan the existing rows in a table to verify they meet a proposed FK
82498191
* constraint.

src/test/regress/expected/alter_table.out

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,8 @@ NOTICE: boo: 18
436436
ALTER TABLE tmp3 ADD CONSTRAINT IDENTITY check (b = boo(b)) NOT VALID;
437437
NOTICE: merging constraint "identity" with inherited definition
438438
ALTER TABLE tmp3 VALIDATE CONSTRAINT identity;
439-
NOTICE: boo: 16
440439
NOTICE: boo: 20
440+
NOTICE: boo: 16
441441
-- A NO INHERIT constraint should not be looked for in children during VALIDATE CONSTRAINT
442442
create table parent_noinh_convalid (a int);
443443
create table child_noinh_convalid () inherits (parent_noinh_convalid);
@@ -941,6 +941,26 @@ ERROR: column "test2" contains null values
941941
-- now add a primary key column with a default (succeeds).
942942
alter table atacc1 add column test2 int default 0 primary key;
943943
drop table atacc1;
944+
-- additionally, we've seen issues with foreign key validation not being
945+
-- properly delayed until after a table rewrite. Check that works ok.
946+
create table atacc1 (a int primary key);
947+
alter table atacc1 add constraint atacc1_fkey foreign key (a) references atacc1 (a) not valid;
948+
alter table atacc1 validate constraint atacc1_fkey, alter a type bigint;
949+
drop table atacc1;
950+
-- we've also seen issues with check constraints being validated at the wrong
951+
-- time when there's a pending table rewrite.
952+
create table atacc1 (a bigint, b int);
953+
insert into atacc1 values(1,1);
954+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
955+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
956+
drop table atacc1;
957+
-- same as above, but ensure the constraint violation is detected
958+
create table atacc1 (a bigint, b int);
959+
insert into atacc1 values(1,2);
960+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
961+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
962+
ERROR: check constraint "atacc1_chk" is violated by some row
963+
drop table atacc1;
944964
-- something a little more complicated
945965
create table atacc1 ( test int, test2 int);
946966
-- add a primary key constraint

src/test/regress/sql/alter_table.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,28 @@ alter table atacc1 add column test2 int primary key;
713713
alter table atacc1 add column test2 int default 0 primary key;
714714
drop table atacc1;
715715

716+
-- additionally, we've seen issues with foreign key validation not being
717+
-- properly delayed until after a table rewrite. Check that works ok.
718+
create table atacc1 (a int primary key);
719+
alter table atacc1 add constraint atacc1_fkey foreign key (a) references atacc1 (a) not valid;
720+
alter table atacc1 validate constraint atacc1_fkey, alter a type bigint;
721+
drop table atacc1;
722+
723+
-- we've also seen issues with check constraints being validated at the wrong
724+
-- time when there's a pending table rewrite.
725+
create table atacc1 (a bigint, b int);
726+
insert into atacc1 values(1,1);
727+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
728+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
729+
drop table atacc1;
730+
731+
-- same as above, but ensure the constraint violation is detected
732+
create table atacc1 (a bigint, b int);
733+
insert into atacc1 values(1,2);
734+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
735+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
736+
drop table atacc1;
737+
716738
-- something a little more complicated
717739
create table atacc1 ( test int, test2 int);
718740
-- add a primary key constraint

0 commit comments

Comments
 (0)