Skip to content

Commit fe5d44a

Browse files
committed
Fix CREATE INDEX CONCURRENTLY for the newest prepared transactions.
The purpose of commit 8a54e12 was to fix this, and it sufficed when the PREPARE TRANSACTION completed before the CIC looked for lock conflicts. Otherwise, things still broke. As before, in a cluster having used CIC while having enabled prepared transactions, queries that use the resulting index can silently fail to find rows. It may be necessary to reindex to recover from past occurrences; REINDEX CONCURRENTLY suffices. Fix this for future index builds by making CIC wait for arbitrarily-recent prepared transactions and for ordinary transactions that may yet PREPARE TRANSACTION. As part of that, have PREPARE TRANSACTION transfer locks to its dummy PGPROC before it calls ProcArrayClearTransaction(). Back-patch to 9.6 (all supported versions). Andrey Borodin, reviewed (in earlier versions) by Andres Freund. Discussion: https://postgr.es/m/01824242-AA92-4FE9-9BA7-AEBAFFEA3D0C@yandex-team.ru
1 parent 0869e53 commit fe5d44a

File tree

9 files changed

+452
-36
lines changed

9 files changed

+452
-36
lines changed

contrib/amcheck/t/003_cic_2pc.pl

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
2+
# Copyright (c) 2021, PostgreSQL Global Development Group
3+
4+
# Test CREATE INDEX CONCURRENTLY with concurrent prepared-xact modifications
5+
use strict;
6+
use warnings;
7+
8+
use Config;
9+
use PostgresNode;
10+
use TestLib;
11+
12+
use Test::More tests => 6;
13+
14+
my ($node, $result);
15+
16+
#
17+
# Test set-up
18+
#
19+
$node = get_new_node('CIC_2PC_test');
20+
$node->init;
21+
$node->append_conf('postgresql.conf', 'max_prepared_transactions = 10');
22+
$node->append_conf('postgresql.conf', 'lock_timeout = 180000');
23+
$node->start;
24+
$node->safe_psql('postgres', q(CREATE EXTENSION amcheck));
25+
$node->safe_psql('postgres', q(CREATE TABLE tbl(i int)));
26+
27+
28+
#
29+
# Run 3 overlapping 2PC transactions with CIC
30+
#
31+
# We have two concurrent background psql processes: $main_h for INSERTs and
32+
# $cic_h for CIC. Also, we use non-background psql for some COMMIT PREPARED
33+
# statements.
34+
#
35+
36+
my $main_in = '';
37+
my $main_out = '';
38+
my $main_timer = IPC::Run::timeout(180);
39+
40+
my $main_h =
41+
$node->background_psql('postgres', \$main_in, \$main_out,
42+
$main_timer, on_error_stop => 1);
43+
$main_in .= q(
44+
BEGIN;
45+
INSERT INTO tbl VALUES(0);
46+
\echo syncpoint1
47+
);
48+
pump $main_h until $main_out =~ /syncpoint1/ || $main_timer->is_expired;
49+
50+
my $cic_in = '';
51+
my $cic_out = '';
52+
my $cic_timer = IPC::Run::timeout(180);
53+
my $cic_h =
54+
$node->background_psql('postgres', \$cic_in, \$cic_out,
55+
$cic_timer, on_error_stop => 1);
56+
$cic_in .= q(
57+
\echo start
58+
CREATE INDEX CONCURRENTLY idx ON tbl(i);
59+
);
60+
pump $cic_h until $cic_out =~ /start/ || $cic_timer->is_expired;
61+
62+
$main_in .= q(
63+
PREPARE TRANSACTION 'a';
64+
);
65+
66+
$main_in .= q(
67+
BEGIN;
68+
INSERT INTO tbl VALUES(0);
69+
\echo syncpoint2
70+
);
71+
pump $main_h until $main_out =~ /syncpoint2/ || $main_timer->is_expired;
72+
73+
$node->safe_psql('postgres', q(COMMIT PREPARED 'a';));
74+
75+
$main_in .= q(
76+
PREPARE TRANSACTION 'b';
77+
BEGIN;
78+
INSERT INTO tbl VALUES(0);
79+
\echo syncpoint3
80+
);
81+
pump $main_h until $main_out =~ /syncpoint3/ || $main_timer->is_expired;
82+
83+
$node->safe_psql('postgres', q(COMMIT PREPARED 'b';));
84+
85+
$main_in .= q(
86+
PREPARE TRANSACTION 'c';
87+
COMMIT PREPARED 'c';
88+
);
89+
$main_h->pump_nb;
90+
91+
$main_h->finish;
92+
$cic_h->finish;
93+
94+
$result = $node->psql('postgres', q(SELECT bt_index_check('idx',true)));
95+
is($result, '0', 'bt_index_check after overlapping 2PC');
96+
97+
98+
#
99+
# Server restart shall not change whether prepared xact blocks CIC
100+
#
101+
102+
$node->safe_psql(
103+
'postgres', q(
104+
BEGIN;
105+
INSERT INTO tbl VALUES(0);
106+
PREPARE TRANSACTION 'spans_restart';
107+
BEGIN;
108+
CREATE TABLE unused ();
109+
PREPARE TRANSACTION 'persists_forever';
110+
));
111+
$node->restart;
112+
113+
my $reindex_in = '';
114+
my $reindex_out = '';
115+
my $reindex_timer = IPC::Run::timeout(180);
116+
my $reindex_h =
117+
$node->background_psql('postgres', \$reindex_in, \$reindex_out,
118+
$reindex_timer, on_error_stop => 1);
119+
$reindex_in .= q(
120+
\echo start
121+
DROP INDEX CONCURRENTLY idx;
122+
CREATE INDEX CONCURRENTLY idx ON tbl(i);
123+
);
124+
pump $reindex_h until $reindex_out =~ /start/ || $reindex_timer->is_expired;
125+
126+
$node->safe_psql('postgres', "COMMIT PREPARED 'spans_restart'");
127+
$reindex_h->finish;
128+
$result = $node->psql('postgres', q(SELECT bt_index_check('idx',true)));
129+
is($result, '0', 'bt_index_check after 2PC and restart');
130+
131+
132+
#
133+
# Stress CIC+2PC with pgbench
134+
#
135+
136+
# Fix broken index first
137+
$node->safe_psql('postgres', q(REINDEX TABLE tbl;));
138+
139+
# Run background pgbench with CIC. We cannot mix-in this script into single
140+
# pgbench: CIC will deadlock with itself occasionally.
141+
my $pgbench_out = '';
142+
my $pgbench_timer = IPC::Run::timeout(180);
143+
my $pgbench_h = $node->background_pgbench(
144+
'--no-vacuum --client=1 --transactions=100',
145+
{
146+
'002_pgbench_concurrent_cic' => q(
147+
DROP INDEX CONCURRENTLY idx;
148+
CREATE INDEX CONCURRENTLY idx ON tbl(i);
149+
SELECT bt_index_check('idx',true);
150+
)
151+
},
152+
\$pgbench_out,
153+
$pgbench_timer);
154+
155+
# Run pgbench.
156+
$node->pgbench(
157+
'--no-vacuum --client=5 --transactions=100',
158+
0,
159+
[qr{actually processed}],
160+
[qr{^$}],
161+
'concurrent INSERTs w/ 2PC',
162+
{
163+
'002_pgbench_concurrent_2pc' => q(
164+
BEGIN;
165+
INSERT INTO tbl VALUES(0);
166+
PREPARE TRANSACTION 'c:client_id';
167+
COMMIT PREPARED 'c:client_id';
168+
),
169+
'002_pgbench_concurrent_2pc_savepoint' => q(
170+
BEGIN;
171+
SAVEPOINT s1;
172+
INSERT INTO tbl VALUES(0);
173+
PREPARE TRANSACTION 'c:client_id';
174+
COMMIT PREPARED 'c:client_id';
175+
)
176+
});
177+
178+
$pgbench_h->pump_nb;
179+
$pgbench_h->finish();
180+
$result =
181+
($Config{osname} eq "MSWin32")
182+
? ($pgbench_h->full_results)[0]
183+
: $pgbench_h->result(0);
184+
is($result, 0, "pgbench with CIC works");
185+
186+
# done
187+
$node->stop;
188+
done_testing();

src/backend/access/transam/twophase.c

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -462,14 +462,24 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
462462
proc->pgprocno = gxact->pgprocno;
463463
SHMQueueElemInit(&(proc->links));
464464
proc->waitStatus = STATUS_OK;
465-
/* We set up the gxact's VXID as InvalidBackendId/XID */
466-
proc->lxid = (LocalTransactionId) xid;
465+
if (LocalTransactionIdIsValid(MyProc->lxid))
466+
{
467+
/* clone VXID, for TwoPhaseGetXidByVirtualXID() to find */
468+
proc->lxid = MyProc->lxid;
469+
proc->backendId = MyBackendId;
470+
}
471+
else
472+
{
473+
Assert(AmStartupProcess() || !IsPostmasterEnvironment);
474+
/* GetLockConflicts() uses this to specify a wait on the XID */
475+
proc->lxid = xid;
476+
proc->backendId = InvalidBackendId;
477+
}
467478
pgxact->xid = xid;
468479
pgxact->xmin = InvalidTransactionId;
469480
pgxact->delayChkpt = false;
470481
pgxact->vacuumFlags = 0;
471482
proc->pid = 0;
472-
proc->backendId = InvalidBackendId;
473483
proc->databaseId = databaseid;
474484
proc->roleId = owner;
475485
proc->tempNamespaceId = InvalidOid;
@@ -851,6 +861,53 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held)
851861
return result;
852862
}
853863

864+
/*
865+
* TwoPhaseGetXidByVirtualXID
866+
* Lookup VXID among xacts prepared since last startup.
867+
*
868+
* (This won't find recovered xacts.) If more than one matches, return any
869+
* and set "have_more" to true. To witness multiple matches, a single
870+
* BackendId must consume 2^32 LXIDs, with no intervening database restart.
871+
*/
872+
TransactionId
873+
TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid,
874+
bool *have_more)
875+
{
876+
int i;
877+
TransactionId result = InvalidTransactionId;
878+
879+
Assert(VirtualTransactionIdIsValid(vxid));
880+
LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
881+
882+
for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
883+
{
884+
GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
885+
PGPROC *proc;
886+
VirtualTransactionId proc_vxid;
887+
888+
if (!gxact->valid)
889+
continue;
890+
proc = &ProcGlobal->allProcs[gxact->pgprocno];
891+
GET_VXID_FROM_PGPROC(proc_vxid, *proc);
892+
if (VirtualTransactionIdEquals(vxid, proc_vxid))
893+
{
894+
/* Startup process sets proc->backendId to InvalidBackendId. */
895+
Assert(!gxact->inredo);
896+
897+
if (result != InvalidTransactionId)
898+
{
899+
*have_more = true;
900+
break;
901+
}
902+
result = gxact->xid;
903+
}
904+
}
905+
906+
LWLockRelease(TwoPhaseStateLock);
907+
908+
return result;
909+
}
910+
854911
/*
855912
* TwoPhaseGetDummyBackendId
856913
* Get the dummy backend ID for prepared transaction specified by XID

src/backend/access/transam/xact.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2462,6 +2462,13 @@ PrepareTransaction(void)
24622462
/* Reset XactLastRecEnd until the next transaction writes something */
24632463
XactLastRecEnd = 0;
24642464

2465+
/*
2466+
* Transfer our locks to a dummy PGPROC. This has to be done before
2467+
* ProcArrayClearTransaction(). Otherwise, a GetLockConflicts() would
2468+
* conclude "xact already committed or aborted" for our locks.
2469+
*/
2470+
PostPrepare_Locks(xid);
2471+
24652472
/*
24662473
* Let others know about no transaction in progress by me. This has to be
24672474
* done *after* the prepared transaction has been marked valid, else
@@ -2501,7 +2508,6 @@ PrepareTransaction(void)
25012508

25022509
PostPrepare_MultiXact(xid);
25032510

2504-
PostPrepare_Locks(xid);
25052511
PostPrepare_PredicateLocks(xid);
25062512

25072513
ResourceOwnerRelease(TopTransactionResourceOwner,

src/backend/storage/lmgr/lmgr.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -870,9 +870,10 @@ XactLockTableWaitErrorCb(void *arg)
870870
* To do this, obtain the current list of lockers, and wait on their VXIDs
871871
* until they are finished.
872872
*
873-
* Note we don't try to acquire the locks on the given locktags, only the VXIDs
874-
* of its lock holders; if somebody grabs a conflicting lock on the objects
875-
* after we obtained our initial list of lockers, we will not wait for them.
873+
* Note we don't try to acquire the locks on the given locktags, only the
874+
* VXIDs and XIDs of their lock holders; if somebody grabs a conflicting lock
875+
* on the objects after we obtained our initial list of lockers, we will not
876+
* wait for them.
876877
*/
877878
void
878879
WaitForLockersMultiple(List *locktags, LOCKMODE lockmode, bool progress)

0 commit comments

Comments
 (0)