Skip to content

Commit 3e31166

Browse files
committed
oauth: Track total call count during a client flow
Tracking down the bugs that led to the addition of comb_multiplexer() and drain_timer_events() was difficult, because an inefficient flow is not visibly different from one that is working properly. To help maintainers notice when something has gone wrong, track the number of calls into the flow as part of debug mode, and print the total when the flow finishes. A new test makes sure the total count is less than 100. (We expect something on the order of 10.) This isn't foolproof, but it is able to catch several regressions in the logic of the prior two commits, and future work to add TLS support to the oauth_validator test server should strengthen it as well. Backpatch-through: 18 Discussion: https://postgr.es/m/CAOYmi+nDZxJHaWj9_jRSyf8uMToCADAmOfJEggsKW-kY7aUwHA@mail.gmail.com
1 parent 1749a12 commit 3e31166

File tree

2 files changed

+52
-1
lines changed

2 files changed

+52
-1
lines changed

src/interfaces/libpq-oauth/oauth-curl.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ struct async_ctx
278278
bool user_prompted; /* have we already sent the authz prompt? */
279279
bool used_basic_auth; /* did we send a client secret? */
280280
bool debugging; /* can we give unsafe developer assistance? */
281+
int dbg_num_calls; /* (debug mode) how many times were we called? */
281282
};
282283

283284
/*
@@ -3021,6 +3022,8 @@ PostgresPollingStatusType
30213022
pg_fe_run_oauth_flow(PGconn *conn)
30223023
{
30233024
PostgresPollingStatusType result;
3025+
fe_oauth_state *state = conn_sasl_state(conn);
3026+
struct async_ctx *actx;
30243027
#ifndef WIN32
30253028
sigset_t osigset;
30263029
bool sigpipe_pending;
@@ -3049,6 +3052,25 @@ pg_fe_run_oauth_flow(PGconn *conn)
30493052

30503053
result = pg_fe_run_oauth_flow_impl(conn);
30513054

3055+
/*
3056+
* To assist with finding bugs in comb_multiplexer() and
3057+
* drain_timer_events(), when we're in debug mode, track the total number
3058+
* of calls to this function and print that at the end of the flow.
3059+
*
3060+
* Be careful that state->async_ctx could be NULL if early initialization
3061+
* fails during the first call.
3062+
*/
3063+
actx = state->async_ctx;
3064+
Assert(actx || result == PGRES_POLLING_FAILED);
3065+
3066+
if (actx && actx->debugging)
3067+
{
3068+
actx->dbg_num_calls++;
3069+
if (result == PGRES_POLLING_OK || result == PGRES_POLLING_FAILED)
3070+
fprintf(stderr, "[libpq] total number of polls: %d\n",
3071+
actx->dbg_num_calls);
3072+
}
3073+
30523074
#ifndef WIN32
30533075
if (masked)
30543076
{

src/test/modules/oauth_validator/t/001_server.pl

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,35 @@ sub connstr
418418
qr/failed to obtain access token: mutual TLS required for client \(invalid_client\)/
419419
);
420420

421+
# Count the number of calls to the internal flow when multiple retries are
422+
# triggered. The exact number depends on many things -- the TCP stack, the
423+
# version of Curl in use, random chance -- but a ridiculously high number
424+
# suggests something is wrong with our ability to clear multiplexer events after
425+
# they're no longer applicable.
426+
my ($ret, $stdout, $stderr) = $node->psql(
427+
'postgres',
428+
"SELECT 'connected for call count'",
429+
extra_params => ['-w'],
430+
connstr => connstr(stage => 'token', retries => 2),
431+
on_error_stop => 0);
432+
433+
is($ret, 0, "call count connection succeeds");
434+
like(
435+
$stderr,
436+
qr@Visit https://example\.com/ and enter the code: postgresuser@,
437+
"call count: stderr matches");
438+
439+
my $count_pattern = qr/\[libpq\] total number of polls: (\d+)/;
440+
if (like($stderr, $count_pattern, "call count: count is printed"))
441+
{
442+
# For reference, a typical flow with two retries might take between 5-15
443+
# calls to the client implementation. And while this will probably continue
444+
# to change across OSes and Curl updates, we're likely in trouble if we see
445+
# hundreds or thousands of calls.
446+
$stderr =~ $count_pattern;
447+
cmp_ok($1, '<', 100, "call count is reasonably small");
448+
}
449+
421450
# Stress test: make sure our builtin flow operates correctly even if the client
422451
# application isn't respecting PGRES_POLLING_READING/WRITING signals returned
423452
# from PQconnectPoll().
@@ -428,7 +457,7 @@ sub connstr
428457
connstr(stage => 'all', retries => 1, interval => 1));
429458

430459
note "running '" . join("' '", @cmd) . "'";
431-
my ($stdout, $stderr) = run_command(\@cmd);
460+
($stdout, $stderr) = run_command(\@cmd);
432461

433462
like($stdout, qr/connection succeeded/, "stress-async: stdout matches");
434463
unlike(

0 commit comments

Comments
 (0)