Skip to content

Commit c431986

Browse files
committed
postgres_fdw: re-issue cancel requests a few times if necessary.
Despite the best efforts of commit 0e5c823, we're still seeing occasional failures of postgres_fdw's query_cancel test in the buildfarm. Investigation suggests that its 100ms timeout is still not enough to reliably ensure that the remote side starts the query before receiving the cancel request --- and if it hasn't, it will just discard the request because it's idle. We discussed allowing a cancel request to kill the next-received query, but that would have wide and perhaps unpleasant side-effects. What seems safer is to make postgres_fdw do what a human user would likely do, which is issue another cancel request if the first one didn't seem to do anything. We'll keep the same overall 30 second grace period before concluding things are broken, but issue additional cancel requests after 1 second, then 2 more seconds, then 4, then 8. (The next one in series is 16 seconds, but we'll hit the 30 second timeout before that.) Having done that, revert the timeout in query_cancel.sql to 10 ms. That will still be enough on most machines, most of the time, for the remote query to start; but now we're intentionally risking the race condition occurring sometimes in the buildfarm, so that the repeat-cancel code path will get some testing. As before, back-patch to v17. We might eventually contemplate back-patching this further, and/or adding similar logic to dblink. But given the lack of field complaints to date, this feels like mostly an exercise in test case stabilization, so v17 is enough. Discussion: https://postgr.es/m/colnv3lzzmc53iu5qoawynr6qq7etn47lmggqr65ddtpjliq5d@glkveb4m6nop
1 parent 1585ff7 commit c431986

File tree

3 files changed

+84
-24
lines changed

3 files changed

+84
-24
lines changed

contrib/postgres_fdw/connection.c

+76-20
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,13 @@ static uint32 pgfdw_we_get_result = 0;
9595
*/
9696
#define CONNECTION_CLEANUP_TIMEOUT 30000
9797

98+
/*
99+
* Milliseconds to wait before issuing another cancel request. This covers
100+
* the race condition where the remote session ignored our cancel request
101+
* because it arrived while idle.
102+
*/
103+
#define RETRY_CANCEL_TIMEOUT 1000
104+
98105
/* Macro for constructing abort command to be sent */
99106
#define CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel) \
100107
do { \
@@ -145,6 +152,7 @@ static void pgfdw_reset_xact_state(ConnCacheEntry *entry, bool toplevel);
145152
static bool pgfdw_cancel_query(PGconn *conn);
146153
static bool pgfdw_cancel_query_begin(PGconn *conn, TimestampTz endtime);
147154
static bool pgfdw_cancel_query_end(PGconn *conn, TimestampTz endtime,
155+
TimestampTz retrycanceltime,
148156
bool consume_input);
149157
static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query,
150158
bool ignore_errors);
@@ -154,6 +162,7 @@ static bool pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query,
154162
bool consume_input,
155163
bool ignore_errors);
156164
static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime,
165+
TimestampTz retrycanceltime,
157166
PGresult **result, bool *timed_out);
158167
static void pgfdw_abort_cleanup(ConnCacheEntry *entry, bool toplevel);
159168
static bool pgfdw_abort_cleanup_begin(ConnCacheEntry *entry, bool toplevel,
@@ -1322,18 +1331,25 @@ pgfdw_reset_xact_state(ConnCacheEntry *entry, bool toplevel)
13221331
static bool
13231332
pgfdw_cancel_query(PGconn *conn)
13241333
{
1334+
TimestampTz now = GetCurrentTimestamp();
13251335
TimestampTz endtime;
1336+
TimestampTz retrycanceltime;
13261337

13271338
/*
13281339
* If it takes too long to cancel the query and discard the result, assume
13291340
* the connection is dead.
13301341
*/
1331-
endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
1332-
CONNECTION_CLEANUP_TIMEOUT);
1342+
endtime = TimestampTzPlusMilliseconds(now, CONNECTION_CLEANUP_TIMEOUT);
1343+
1344+
/*
1345+
* Also, lose patience and re-issue the cancel request after a little bit.
1346+
* (This serves to close some race conditions.)
1347+
*/
1348+
retrycanceltime = TimestampTzPlusMilliseconds(now, RETRY_CANCEL_TIMEOUT);
13331349

13341350
if (!pgfdw_cancel_query_begin(conn, endtime))
13351351
return false;
1336-
return pgfdw_cancel_query_end(conn, endtime, false);
1352+
return pgfdw_cancel_query_end(conn, endtime, retrycanceltime, false);
13371353
}
13381354

13391355
/*
@@ -1359,9 +1375,10 @@ pgfdw_cancel_query_begin(PGconn *conn, TimestampTz endtime)
13591375
}
13601376

13611377
static bool
1362-
pgfdw_cancel_query_end(PGconn *conn, TimestampTz endtime, bool consume_input)
1378+
pgfdw_cancel_query_end(PGconn *conn, TimestampTz endtime,
1379+
TimestampTz retrycanceltime, bool consume_input)
13631380
{
1364-
PGresult *result = NULL;
1381+
PGresult *result;
13651382
bool timed_out;
13661383

13671384
/*
@@ -1380,7 +1397,8 @@ pgfdw_cancel_query_end(PGconn *conn, TimestampTz endtime, bool consume_input)
13801397
}
13811398

13821399
/* Get and discard the result of the query. */
1383-
if (pgfdw_get_cleanup_result(conn, endtime, &result, &timed_out))
1400+
if (pgfdw_get_cleanup_result(conn, endtime, retrycanceltime,
1401+
&result, &timed_out))
13841402
{
13851403
if (timed_out)
13861404
ereport(WARNING,
@@ -1453,7 +1471,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query,
14531471
TimestampTz endtime, bool consume_input,
14541472
bool ignore_errors)
14551473
{
1456-
PGresult *result = NULL;
1474+
PGresult *result;
14571475
bool timed_out;
14581476

14591477
Assert(query != NULL);
@@ -1471,7 +1489,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query,
14711489
}
14721490

14731491
/* Get the result of the query. */
1474-
if (pgfdw_get_cleanup_result(conn, endtime, &result, &timed_out))
1492+
if (pgfdw_get_cleanup_result(conn, endtime, endtime, &result, &timed_out))
14751493
{
14761494
if (timed_out)
14771495
ereport(WARNING,
@@ -1495,28 +1513,36 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query,
14951513
}
14961514

14971515
/*
1498-
* Get, during abort cleanup, the result of a query that is in progress. This
1499-
* might be a query that is being interrupted by transaction abort, or it might
1500-
* be a query that was initiated as part of transaction abort to get the remote
1501-
* side back to the appropriate state.
1516+
* Get, during abort cleanup, the result of a query that is in progress.
1517+
* This might be a query that is being interrupted by a cancel request or by
1518+
* transaction abort, or it might be a query that was initiated as part of
1519+
* transaction abort to get the remote side back to the appropriate state.
1520+
*
1521+
* endtime is the time at which we should give up and assume the remote side
1522+
* is dead. retrycanceltime is the time at which we should issue a fresh
1523+
* cancel request (pass the same value as endtime if this is not wanted).
15021524
*
1503-
* endtime is the time at which we should give up and assume the remote
1504-
* side is dead. Returns true if the timeout expired or connection trouble
1505-
* occurred, false otherwise. Sets *result except in case of a timeout.
1506-
* Sets timed_out to true only when the timeout expired.
1525+
* Returns true if the timeout expired or connection trouble occurred,
1526+
* false otherwise. Sets *result except in case of a true result.
1527+
* Sets *timed_out to true only when the timeout expired.
15071528
*/
15081529
static bool
1509-
pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result,
1530+
pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime,
1531+
TimestampTz retrycanceltime,
1532+
PGresult **result,
15101533
bool *timed_out)
15111534
{
15121535
volatile bool failed = false;
15131536
PGresult *volatile last_res = NULL;
15141537

1538+
*result = NULL;
15151539
*timed_out = false;
15161540

15171541
/* In what follows, do not leak any PGresults on an error. */
15181542
PG_TRY();
15191543
{
1544+
int canceldelta = RETRY_CANCEL_TIMEOUT * 2;
1545+
15201546
for (;;)
15211547
{
15221548
PGresult *res;
@@ -1527,8 +1553,33 @@ pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result,
15271553
TimestampTz now = GetCurrentTimestamp();
15281554
long cur_timeout;
15291555

1556+
/* If timeout has expired, give up. */
1557+
if (now >= endtime)
1558+
{
1559+
*timed_out = true;
1560+
failed = true;
1561+
goto exit;
1562+
}
1563+
1564+
/* If we need to re-issue the cancel request, do that. */
1565+
if (now >= retrycanceltime)
1566+
{
1567+
/* We ignore failure to issue the repeated request. */
1568+
(void) libpqsrv_cancel(conn, endtime);
1569+
1570+
/* Recompute "now" in case that took measurable time. */
1571+
now = GetCurrentTimestamp();
1572+
1573+
/* Adjust re-cancel timeout in increasing steps. */
1574+
retrycanceltime = TimestampTzPlusMilliseconds(now,
1575+
canceldelta);
1576+
canceldelta += canceldelta;
1577+
}
1578+
15301579
/* If timeout has expired, give up, else get sleep time. */
1531-
cur_timeout = TimestampDifferenceMilliseconds(now, endtime);
1580+
cur_timeout = TimestampDifferenceMilliseconds(now,
1581+
Min(endtime,
1582+
retrycanceltime));
15321583
if (cur_timeout <= 0)
15331584
{
15341585
*timed_out = true;
@@ -1849,7 +1900,9 @@ pgfdw_finish_abort_cleanup(List *pending_entries, List *cancel_requested,
18491900
foreach(lc, cancel_requested)
18501901
{
18511902
ConnCacheEntry *entry = (ConnCacheEntry *) lfirst(lc);
1903+
TimestampTz now = GetCurrentTimestamp();
18521904
TimestampTz endtime;
1905+
TimestampTz retrycanceltime;
18531906
char sql[100];
18541907

18551908
Assert(entry->changing_xact_state);
@@ -1863,10 +1916,13 @@ pgfdw_finish_abort_cleanup(List *pending_entries, List *cancel_requested,
18631916
* remaining entries in the list, leading to slamming that entry's
18641917
* connection shut.
18651918
*/
1866-
endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
1919+
endtime = TimestampTzPlusMilliseconds(now,
18671920
CONNECTION_CLEANUP_TIMEOUT);
1921+
retrycanceltime = TimestampTzPlusMilliseconds(now,
1922+
RETRY_CANCEL_TIMEOUT);
18681923

1869-
if (!pgfdw_cancel_query_end(entry->conn, endtime, true))
1924+
if (!pgfdw_cancel_query_end(entry->conn, endtime,
1925+
retrycanceltime, true))
18701926
{
18711927
/* Unable to cancel running query */
18721928
pgfdw_reset_xact_state(entry, toplevel);

contrib/postgres_fdw/expected/query_cancel.out

+4-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@ SELECT count(*) FROM ft1 a;
2424
822
2525
(1 row)
2626

27-
-- Timeout needs to be long enough to be sure that we've sent the slow query.
28-
SET LOCAL statement_timeout = '100ms';
27+
-- On most machines, 10ms will be enough to be sure that we've sent the slow
28+
-- query. We may sometimes exercise the race condition where we send cancel
29+
-- before the remote side starts the query, but that's fine too.
30+
SET LOCAL statement_timeout = '10ms';
2931
-- This would take very long if not canceled:
3032
SELECT count(*) FROM ft1 a CROSS JOIN ft1 b CROSS JOIN ft1 c CROSS JOIN ft1 d;
3133
ERROR: canceling statement due to statement timeout

contrib/postgres_fdw/sql/query_cancel.sql

+4-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ SELECT count(*) FROM ft1 a CROSS JOIN ft1 b CROSS JOIN ft1 c CROSS JOIN ft1 d;
1313
BEGIN;
1414
-- Make sure that connection is open and set up.
1515
SELECT count(*) FROM ft1 a;
16-
-- Timeout needs to be long enough to be sure that we've sent the slow query.
17-
SET LOCAL statement_timeout = '100ms';
16+
-- On most machines, 10ms will be enough to be sure that we've sent the slow
17+
-- query. We may sometimes exercise the race condition where we send cancel
18+
-- before the remote side starts the query, but that's fine too.
19+
SET LOCAL statement_timeout = '10ms';
1820
-- This would take very long if not canceled:
1921
SELECT count(*) FROM ft1 a CROSS JOIN ft1 b CROSS JOIN ft1 c CROSS JOIN ft1 d;
2022
COMMIT;

0 commit comments

Comments
 (0)