Skip to content

Commit 3cd9c3b

Browse files
committed
Fix CREATE INDEX CONCURRENTLY for the newest prepared transactions.
The purpose of commit 8a54e12 was to fix this, and it sufficed when the PREPARE TRANSACTION completed before the CIC looked for lock conflicts. Otherwise, things still broke. As before, in a cluster having used CIC while having enabled prepared transactions, queries that use the resulting index can silently fail to find rows. It may be necessary to reindex to recover from past occurrences; REINDEX CONCURRENTLY suffices. Fix this for future index builds by making CIC wait for arbitrarily-recent prepared transactions and for ordinary transactions that may yet PREPARE TRANSACTION. As part of that, have PREPARE TRANSACTION transfer locks to its dummy PGPROC before it calls ProcArrayClearTransaction(). Back-patch to 9.6 (all supported versions). Andrey Borodin, reviewed (in earlier versions) by Andres Freund. Discussion: https://postgr.es/m/01824242-AA92-4FE9-9BA7-AEBAFFEA3D0C@yandex-team.ru
1 parent fdd965d commit 3cd9c3b

File tree

8 files changed

+362
-36
lines changed

8 files changed

+362
-36
lines changed

contrib/amcheck/t/003_cic_2pc.pl

+188
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
2+
# Copyright (c) 2021, PostgreSQL Global Development Group
3+
4+
# Test CREATE INDEX CONCURRENTLY with concurrent prepared-xact modifications
5+
use strict;
6+
use warnings;
7+
8+
use Config;
9+
use PostgresNode;
10+
use TestLib;
11+
12+
use Test::More tests => 6;
13+
14+
my ($node, $result);
15+
16+
#
17+
# Test set-up
18+
#
19+
$node = PostgresNode->new('CIC_2PC_test');
20+
$node->init;
21+
$node->append_conf('postgresql.conf', 'max_prepared_transactions = 10');
22+
$node->append_conf('postgresql.conf', 'lock_timeout = 180000');
23+
$node->start;
24+
$node->safe_psql('postgres', q(CREATE EXTENSION amcheck));
25+
$node->safe_psql('postgres', q(CREATE TABLE tbl(i int)));
26+
27+
28+
#
29+
# Run 3 overlapping 2PC transactions with CIC
30+
#
31+
# We have two concurrent background psql processes: $main_h for INSERTs and
32+
# $cic_h for CIC. Also, we use non-background psql for some COMMIT PREPARED
33+
# statements.
34+
#
35+
36+
my $main_in = '';
37+
my $main_out = '';
38+
my $main_timer = IPC::Run::timeout(180);
39+
40+
my $main_h =
41+
$node->background_psql('postgres', \$main_in, \$main_out,
42+
$main_timer, on_error_stop => 1);
43+
$main_in .= q(
44+
BEGIN;
45+
INSERT INTO tbl VALUES(0);
46+
\echo syncpoint1
47+
);
48+
pump $main_h until $main_out =~ /syncpoint1/ || $main_timer->is_expired;
49+
50+
my $cic_in = '';
51+
my $cic_out = '';
52+
my $cic_timer = IPC::Run::timeout(180);
53+
my $cic_h =
54+
$node->background_psql('postgres', \$cic_in, \$cic_out,
55+
$cic_timer, on_error_stop => 1);
56+
$cic_in .= q(
57+
\echo start
58+
CREATE INDEX CONCURRENTLY idx ON tbl(i);
59+
);
60+
pump $cic_h until $cic_out =~ /start/ || $cic_timer->is_expired;
61+
62+
$main_in .= q(
63+
PREPARE TRANSACTION 'a';
64+
);
65+
66+
$main_in .= q(
67+
BEGIN;
68+
INSERT INTO tbl VALUES(0);
69+
\echo syncpoint2
70+
);
71+
pump $main_h until $main_out =~ /syncpoint2/ || $main_timer->is_expired;
72+
73+
$node->safe_psql('postgres', q(COMMIT PREPARED 'a';));
74+
75+
$main_in .= q(
76+
PREPARE TRANSACTION 'b';
77+
BEGIN;
78+
INSERT INTO tbl VALUES(0);
79+
\echo syncpoint3
80+
);
81+
pump $main_h until $main_out =~ /syncpoint3/ || $main_timer->is_expired;
82+
83+
$node->safe_psql('postgres', q(COMMIT PREPARED 'b';));
84+
85+
$main_in .= q(
86+
PREPARE TRANSACTION 'c';
87+
COMMIT PREPARED 'c';
88+
);
89+
$main_h->pump_nb;
90+
91+
$main_h->finish;
92+
$cic_h->finish;
93+
94+
$result = $node->psql('postgres', q(SELECT bt_index_check('idx',true)));
95+
is($result, '0', 'bt_index_check after overlapping 2PC');
96+
97+
98+
#
99+
# Server restart shall not change whether prepared xact blocks CIC
100+
#
101+
102+
$node->safe_psql(
103+
'postgres', q(
104+
BEGIN;
105+
INSERT INTO tbl VALUES(0);
106+
PREPARE TRANSACTION 'spans_restart';
107+
BEGIN;
108+
CREATE TABLE unused ();
109+
PREPARE TRANSACTION 'persists_forever';
110+
));
111+
$node->restart;
112+
113+
my $reindex_in = '';
114+
my $reindex_out = '';
115+
my $reindex_timer = IPC::Run::timeout(180);
116+
my $reindex_h =
117+
$node->background_psql('postgres', \$reindex_in, \$reindex_out,
118+
$reindex_timer, on_error_stop => 1);
119+
$reindex_in .= q(
120+
\echo start
121+
DROP INDEX CONCURRENTLY idx;
122+
CREATE INDEX CONCURRENTLY idx ON tbl(i);
123+
);
124+
pump $reindex_h until $reindex_out =~ /start/ || $reindex_timer->is_expired;
125+
126+
$node->safe_psql('postgres', "COMMIT PREPARED 'spans_restart'");
127+
$reindex_h->finish;
128+
$result = $node->psql('postgres', q(SELECT bt_index_check('idx',true)));
129+
is($result, '0', 'bt_index_check after 2PC and restart');
130+
131+
132+
#
133+
# Stress CIC+2PC with pgbench
134+
#
135+
136+
# Fix broken index first
137+
$node->safe_psql('postgres', q(REINDEX TABLE tbl;));
138+
139+
# Run background pgbench with CIC. We cannot mix-in this script into single
140+
# pgbench: CIC will deadlock with itself occasionally.
141+
my $pgbench_out = '';
142+
my $pgbench_timer = IPC::Run::timeout(180);
143+
my $pgbench_h = $node->background_pgbench(
144+
'--no-vacuum --client=1 --transactions=100',
145+
{
146+
'002_pgbench_concurrent_cic' => q(
147+
DROP INDEX CONCURRENTLY idx;
148+
CREATE INDEX CONCURRENTLY idx ON tbl(i);
149+
SELECT bt_index_check('idx',true);
150+
)
151+
},
152+
\$pgbench_out,
153+
$pgbench_timer);
154+
155+
# Run pgbench.
156+
$node->pgbench(
157+
'--no-vacuum --client=5 --transactions=100',
158+
0,
159+
[qr{actually processed}],
160+
[qr{^$}],
161+
'concurrent INSERTs w/ 2PC',
162+
{
163+
'002_pgbench_concurrent_2pc' => q(
164+
BEGIN;
165+
INSERT INTO tbl VALUES(0);
166+
PREPARE TRANSACTION 'c:client_id';
167+
COMMIT PREPARED 'c:client_id';
168+
),
169+
'002_pgbench_concurrent_2pc_savepoint' => q(
170+
BEGIN;
171+
SAVEPOINT s1;
172+
INSERT INTO tbl VALUES(0);
173+
PREPARE TRANSACTION 'c:client_id';
174+
COMMIT PREPARED 'c:client_id';
175+
)
176+
});
177+
178+
$pgbench_h->pump_nb;
179+
$pgbench_h->finish();
180+
$result =
181+
($Config{osname} eq "MSWin32")
182+
? ($pgbench_h->full_results)[0]
183+
: $pgbench_h->result(0);
184+
is($result, 0, "pgbench with CIC works");
185+
186+
# done
187+
$node->stop;
188+
done_testing();

src/backend/access/transam/twophase.c

+60-3
Original file line numberDiff line numberDiff line change
@@ -459,14 +459,24 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
459459
proc->pgprocno = gxact->pgprocno;
460460
SHMQueueElemInit(&(proc->links));
461461
proc->waitStatus = PROC_WAIT_STATUS_OK;
462-
/* We set up the gxact's VXID as InvalidBackendId/XID */
463-
proc->lxid = (LocalTransactionId) xid;
462+
if (LocalTransactionIdIsValid(MyProc->lxid))
463+
{
464+
/* clone VXID, for TwoPhaseGetXidByVirtualXID() to find */
465+
proc->lxid = MyProc->lxid;
466+
proc->backendId = MyBackendId;
467+
}
468+
else
469+
{
470+
Assert(AmStartupProcess() || !IsPostmasterEnvironment);
471+
/* GetLockConflicts() uses this to specify a wait on the XID */
472+
proc->lxid = xid;
473+
proc->backendId = InvalidBackendId;
474+
}
464475
proc->xid = xid;
465476
Assert(proc->xmin == InvalidTransactionId);
466477
proc->delayChkpt = false;
467478
proc->statusFlags = 0;
468479
proc->pid = 0;
469-
proc->backendId = InvalidBackendId;
470480
proc->databaseId = databaseid;
471481
proc->roleId = owner;
472482
proc->tempNamespaceId = InvalidOid;
@@ -846,6 +856,53 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held)
846856
return result;
847857
}
848858

859+
/*
860+
* TwoPhaseGetXidByVirtualXID
861+
* Lookup VXID among xacts prepared since last startup.
862+
*
863+
* (This won't find recovered xacts.) If more than one matches, return any
864+
* and set "have_more" to true. To witness multiple matches, a single
865+
* BackendId must consume 2^32 LXIDs, with no intervening database restart.
866+
*/
867+
TransactionId
868+
TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid,
869+
bool *have_more)
870+
{
871+
int i;
872+
TransactionId result = InvalidTransactionId;
873+
874+
Assert(VirtualTransactionIdIsValid(vxid));
875+
LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
876+
877+
for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
878+
{
879+
GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
880+
PGPROC *proc;
881+
VirtualTransactionId proc_vxid;
882+
883+
if (!gxact->valid)
884+
continue;
885+
proc = &ProcGlobal->allProcs[gxact->pgprocno];
886+
GET_VXID_FROM_PGPROC(proc_vxid, *proc);
887+
if (VirtualTransactionIdEquals(vxid, proc_vxid))
888+
{
889+
/* Startup process sets proc->backendId to InvalidBackendId. */
890+
Assert(!gxact->inredo);
891+
892+
if (result != InvalidTransactionId)
893+
{
894+
*have_more = true;
895+
break;
896+
}
897+
result = gxact->xid;
898+
}
899+
}
900+
901+
LWLockRelease(TwoPhaseStateLock);
902+
903+
return result;
904+
}
905+
849906
/*
850907
* TwoPhaseGetDummyBackendId
851908
* Get the dummy backend ID for prepared transaction specified by XID

src/backend/access/transam/xact.c

+7-1
Original file line numberDiff line numberDiff line change
@@ -2506,6 +2506,13 @@ PrepareTransaction(void)
25062506
/* Reset XactLastRecEnd until the next transaction writes something */
25072507
XactLastRecEnd = 0;
25082508

2509+
/*
2510+
* Transfer our locks to a dummy PGPROC. This has to be done before
2511+
* ProcArrayClearTransaction(). Otherwise, a GetLockConflicts() would
2512+
* conclude "xact already committed or aborted" for our locks.
2513+
*/
2514+
PostPrepare_Locks(xid);
2515+
25092516
/*
25102517
* Let others know about no transaction in progress by me. This has to be
25112518
* done *after* the prepared transaction has been marked valid, else
@@ -2545,7 +2552,6 @@ PrepareTransaction(void)
25452552

25462553
PostPrepare_MultiXact(xid);
25472554

2548-
PostPrepare_Locks(xid);
25492555
PostPrepare_PredicateLocks(xid);
25502556

25512557
ResourceOwnerRelease(TopTransactionResourceOwner,

src/backend/storage/lmgr/lmgr.c

+4-3
Original file line numberDiff line numberDiff line change
@@ -871,9 +871,10 @@ XactLockTableWaitErrorCb(void *arg)
871871
* To do this, obtain the current list of lockers, and wait on their VXIDs
872872
* until they are finished.
873873
*
874-
* Note we don't try to acquire the locks on the given locktags, only the VXIDs
875-
* of its lock holders; if somebody grabs a conflicting lock on the objects
876-
* after we obtained our initial list of lockers, we will not wait for them.
874+
* Note we don't try to acquire the locks on the given locktags, only the
875+
* VXIDs and XIDs of their lock holders; if somebody grabs a conflicting lock
876+
* on the objects after we obtained our initial list of lockers, we will not
877+
* wait for them.
877878
*/
878879
void
879880
WaitForLockersMultiple(List *locktags, LOCKMODE lockmode, bool progress)

0 commit comments

Comments
 (0)