Skip to content

Commit c7b96ba

Browse files
committed
Fix logical decoding error when system table w/ toast is repeatedly rewritten.
Repeatedly rewriting a mapped catalog table with VACUUM FULL or CLUSTER could cause logical decoding to fail with: ERROR, "could not map filenode \"%s\" to relation OID" To trigger the problem the rewritten catalog had to have live tuples with toasted columns. The problem was triggered as during catalog table rewrites the heap_insert() check that prevents logical decoding information to be emitted for system catalogs, failed to treat the new heap's toast table as a system catalog (because the new heap is not recognized as a catalog table via RelationIsLogicallyLogged()). The relmapper, in contrast to the normal catalog contents, does not contain historical information. After a single rewrite of a mapped table the new relation is known to the relmapper, but if the table is rewritten twice before logical decoding occurs, the relfilenode cannot be mapped to a relation anymore. Which then leads us to error out. This only happens for toast tables, because the main table contents aren't re-inserted with heap_insert(). The fix is simple, add a new heap_insert() flag that prevents logical decoding information from being emitted, and accept during decoding that there might not be tuple data for toast tables. Unfortunately that does not fix pre-existing logical decoding errors. Doing so would require not throwing an error when a filenode cannot be mapped to a relation during decoding, and that seems too likely to hide bugs. If it's crucial to fix decoding for an existing slot, temporarily changing the ERROR in ReorderBufferCommit() to a WARNING appears to be the best fix. Author: Andres Freund Discussion: https://postgr.es/m/20180914021046.oi7dm4ra3ot2g2kt@alap3.anarazel.de Backpatch: 9.4-, where logical decoding was introduced
1 parent 26cc275 commit c7b96ba

File tree

6 files changed

+171
-10
lines changed

6 files changed

+171
-10
lines changed

contrib/test_decoding/expected/rewrite.out

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,61 @@
11
-- predictability
22
SET synchronous_commit = on;
33
DROP TABLE IF EXISTS replication_example;
4+
-- Ensure there's tables with toast datums. To do so, we dynamically
5+
-- create a function returning a large textblob. We want tables of
6+
-- different kinds: mapped catalog table, unmapped catalog table,
7+
-- shared catalog table and usertable.
8+
CREATE FUNCTION exec(text) returns void language plpgsql volatile
9+
AS $f$
10+
BEGIN
11+
EXECUTE $1;
12+
END;
13+
$f$;
14+
CREATE ROLE justforcomments NOLOGIN;
15+
SELECT exec(
16+
format($outer$CREATE FUNCTION iamalongfunction() RETURNS TEXT IMMUTABLE LANGUAGE SQL AS $f$SELECT text %L$f$$outer$,
17+
(SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i))));
18+
exec
19+
------
20+
21+
(1 row)
22+
23+
SELECT exec(
24+
format($outer$COMMENT ON FUNCTION iamalongfunction() IS %L$outer$,
25+
iamalongfunction()));
26+
exec
27+
------
28+
29+
(1 row)
30+
31+
SELECT exec(
32+
format($outer$COMMENT ON ROLE JUSTFORCOMMENTS IS %L$outer$,
33+
iamalongfunction()));
34+
exec
35+
------
36+
37+
(1 row)
38+
39+
CREATE TABLE iamalargetable AS SELECT iamalongfunction() longfunctionoutput;
40+
-- verify toast usage
41+
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_proc'::regclass)) > 0;
42+
?column?
43+
----------
44+
t
45+
(1 row)
46+
47+
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_description'::regclass)) > 0;
48+
?column?
49+
----------
50+
t
51+
(1 row)
52+
53+
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_shdescription'::regclass)) > 0;
54+
?column?
55+
----------
56+
t
57+
(1 row)
58+
459
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
560
?column?
661
----------
@@ -76,10 +131,30 @@ SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'inc
76131
COMMIT
77132
(15 rows)
78133

134+
-- trigger repeated rewrites of a system catalog with a toast table,
135+
-- that previously was buggy: 20180914021046.oi7dm4ra3ot2g2kt@alap3.anarazel.de
136+
VACUUM FULL pg_proc; VACUUM FULL pg_description; VACUUM FULL pg_shdescription; VACUUM FULL iamalargetable;
137+
INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (8, 6, 1);
138+
VACUUM FULL pg_proc; VACUUM FULL pg_description; VACUUM FULL pg_shdescription; VACUUM FULL iamalargetable;
139+
INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (9, 7, 1);
140+
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
141+
data
142+
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
143+
BEGIN
144+
table public.replication_example: INSERT: id[integer]:9 somedata[integer]:8 text[character varying]:null testcolumn1[integer]:6 testcolumn2[integer]:null testcolumn3[integer]:1
145+
COMMIT
146+
BEGIN
147+
table public.replication_example: INSERT: id[integer]:10 somedata[integer]:9 text[character varying]:null testcolumn1[integer]:7 testcolumn2[integer]:null testcolumn3[integer]:1
148+
COMMIT
149+
(6 rows)
150+
79151
SELECT pg_drop_replication_slot('regression_slot');
80152
pg_drop_replication_slot
81153
--------------------------
82154

83155
(1 row)
84156

85157
DROP TABLE IF EXISTS replication_example;
158+
DROP FUNCTION iamalongfunction();
159+
DROP FUNCTION exec(text);
160+
DROP ROLE justforcomments;

contrib/test_decoding/sql/rewrite.sql

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,35 @@ SET synchronous_commit = on;
33

44
DROP TABLE IF EXISTS replication_example;
55

6+
-- Ensure there's tables with toast datums. To do so, we dynamically
7+
-- create a function returning a large textblob. We want tables of
8+
-- different kinds: mapped catalog table, unmapped catalog table,
9+
-- shared catalog table and usertable.
10+
CREATE FUNCTION exec(text) returns void language plpgsql volatile
11+
AS $f$
12+
BEGIN
13+
EXECUTE $1;
14+
END;
15+
$f$;
16+
CREATE ROLE justforcomments NOLOGIN;
17+
18+
SELECT exec(
19+
format($outer$CREATE FUNCTION iamalongfunction() RETURNS TEXT IMMUTABLE LANGUAGE SQL AS $f$SELECT text %L$f$$outer$,
20+
(SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i))));
21+
SELECT exec(
22+
format($outer$COMMENT ON FUNCTION iamalongfunction() IS %L$outer$,
23+
iamalongfunction()));
24+
SELECT exec(
25+
format($outer$COMMENT ON ROLE JUSTFORCOMMENTS IS %L$outer$,
26+
iamalongfunction()));
27+
CREATE TABLE iamalargetable AS SELECT iamalongfunction() longfunctionoutput;
28+
29+
-- verify toast usage
30+
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_proc'::regclass)) > 0;
31+
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_description'::regclass)) > 0;
32+
SELECT pg_relation_size((SELECT reltoastrelid FROM pg_class WHERE oid = 'pg_shdescription'::regclass)) > 0;
33+
34+
635
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
736
CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
837
INSERT INTO replication_example(somedata) VALUES (1);
@@ -57,6 +86,17 @@ COMMIT;
5786
CHECKPOINT;
5887

5988
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
60-
SELECT pg_drop_replication_slot('regression_slot');
6189

90+
-- trigger repeated rewrites of a system catalog with a toast table,
91+
-- that previously was buggy: 20180914021046.oi7dm4ra3ot2g2kt@alap3.anarazel.de
92+
VACUUM FULL pg_proc; VACUUM FULL pg_description; VACUUM FULL pg_shdescription; VACUUM FULL iamalargetable;
93+
INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (8, 6, 1);
94+
VACUUM FULL pg_proc; VACUUM FULL pg_description; VACUUM FULL pg_shdescription; VACUUM FULL iamalargetable;
95+
INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (9, 7, 1);
96+
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
97+
98+
SELECT pg_drop_replication_slot('regression_slot');
6299
DROP TABLE IF EXISTS replication_example;
100+
DROP FUNCTION iamalongfunction();
101+
DROP FUNCTION exec(text);
102+
DROP ROLE justforcomments;

src/backend/access/heap/heapam.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2036,6 +2036,18 @@ FreeBulkInsertState(BulkInsertState bistate)
20362036
* This causes rows to be frozen, which is an MVCC violation and
20372037
* requires explicit options chosen by user.
20382038
*
2039+
* HEAP_INSERT_SPECULATIVE is used on so-called "speculative insertions",
2040+
* which can be backed out afterwards without aborting the whole transaction.
2041+
* Other sessions can wait for the speculative insertion to be confirmed,
2042+
* turning it into a regular tuple, or aborted, as if it never existed.
2043+
* Speculatively inserted tuples behave as "value locks" of short duration,
2044+
* used to implement INSERT .. ON CONFLICT.
2045+
*
2046+
* HEAP_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
2047+
* information for the tuple. This should solely be used during table rewrites
2048+
* where RelationIsLogicallyLogged(relation) is not yet accurate for the new
2049+
* relation.
2050+
*
20392051
* Note that these options will be applied when inserting into the heap's
20402052
* TOAST table, too, if the tuple requires any out-of-line data.
20412053
*
@@ -2138,7 +2150,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
21382150
* Also, if this is a catalog, we need to transmit combocids to
21392151
* properly decode, so log that as well.
21402152
*/
2141-
need_tuple_data = RelationIsLogicallyLogged(relation);
2153+
need_tuple_data = RelationIsLogicallyLogged(relation) &&
2154+
!(options & HEAP_INSERT_NO_LOGICAL);
21422155
if (RelationIsAccessibleInLogicalDecoding(relation))
21432156
log_heap_new_cid(relation, heaptup);
21442157

@@ -2325,6 +2338,9 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
23252338
bool need_tuple_data = RelationIsLogicallyLogged(relation);
23262339
bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
23272340

2341+
/* currently not needed (thus unsupported) for heap_multi_insert() */
2342+
AssertArg(!(options & HEAP_INSERT_NO_LOGICAL));
2343+
23282344
needwal = !(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation);
23292345
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
23302346
HEAP_DEFAULT_FILLFACTOR);

src/backend/access/heap/rewriteheap.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -651,10 +651,23 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
651651
heaptup = tup;
652652
}
653653
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
654+
{
655+
int options = HEAP_INSERT_SKIP_FSM;
656+
657+
if (!state->rs_use_wal)
658+
options |= HEAP_INSERT_SKIP_WAL;
659+
660+
/*
661+
* The new relfilenode's relcache entrye doesn't have the necessary
662+
* information to determine whether a relation should emit data for
663+
* logical decoding. Force it to off if necessary.
664+
*/
665+
if (!RelationIsLogicallyLogged(state->rs_old_rel))
666+
options |= HEAP_INSERT_NO_LOGICAL;
667+
654668
heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
655-
HEAP_INSERT_SKIP_FSM |
656-
(state->rs_use_wal ?
657-
0 : HEAP_INSERT_SKIP_WAL));
669+
options);
670+
}
658671
else
659672
heaptup = tup;
660673

src/backend/replication/logical/reorderbuffer.c

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,8 +1509,16 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
15091509
change->data.tp.relnode.relNode);
15101510

15111511
/*
1512-
* Catalog tuple without data, emitted while catalog was
1513-
* in the process of being rewritten.
1512+
* Mapped catalog tuple without data, emitted while
1513+
* catalog table was in the process of being rewritten. We
1514+
* can fail to look up the relfilenode, because the the
1515+
* relmapper has no "historic" view, in contrast to normal
1516+
* the normal catalog during decoding. Thus repeated
1517+
* rewrites can cause a lookup failure. That's OK because
1518+
* we do not decode catalog changes anyway. Normally such
1519+
* tuples would be skipped over below, but we can't
1520+
* identify whether the table should be logically logged
1521+
* without mapping the relfilenode to the oid.
15141522
*/
15151523
if (reloid == InvalidOid &&
15161524
change->data.tp.newtuple == NULL &&
@@ -1564,10 +1572,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
15641572
* transaction's changes. Otherwise it will get
15651573
* freed/reused while restoring spooled data from
15661574
* disk.
1575+
*
1576+
* But skip doing so if there's no
1577+
* tuple-data. That happens if a non-mapped system
1578+
* catalog with a toast table is rewritten.
15671579
*/
1568-
dlist_delete(&change->node);
1569-
ReorderBufferToastAppendChunk(rb, txn, relation,
1570-
change);
1580+
if (change->data.tp.newtuple != NULL)
1581+
{
1582+
dlist_delete(&change->node);
1583+
ReorderBufferToastAppendChunk(rb, txn, relation,
1584+
change);
1585+
}
15711586
}
15721587

15731588
}

src/include/access/heapam.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
#define HEAP_INSERT_SKIP_WAL 0x0001
2828
#define HEAP_INSERT_SKIP_FSM 0x0002
2929
#define HEAP_INSERT_FROZEN 0x0004
30+
/* gap, to keep NO_LOGICAL in sync w/ newer branches */
31+
#define HEAP_INSERT_NO_LOGICAL 0x0010
3032

3133
typedef struct BulkInsertStateData *BulkInsertState;
3234

0 commit comments

Comments
 (0)