Skip to content

Commit baa78ff

Browse files
committed
Prevent port collisions between concurrent TAP tests
Currently there is a race condition where if concurrent TAP tests both test that they can open a port they will assume that it is free and use it, causing one of them to fail. To prevent this we record a reservation using an exclusive lock, and any TAP test that discovers a reservation checks to see if the reserving process is still alive, and looks for another free port if it is. Ports are reserved in a directory set by the environment setting PG_TEST_PORT_DIR, or if that doesn't exist a subdirectory of the top build directory as set by Makefile.global, or its own tmp_check directory. The prove_check recipe in Makefile.global.in is extended to export top_builddir to the TAP tests. This was already exported by the prove_installcheck recipes. Per complaint from Andres Freund Backpatched from 9b4eafc to all live branches Discussion: https://postgr.es/m/20221002164931.d57hlutrcz4d2zi7@awork3.anarazel.de
1 parent e9c8907 commit baa78ff

File tree

2 files changed

+60
-5
lines changed

2 files changed

+60
-5
lines changed

src/Makefile.global.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ rm -rf '$(CURDIR)'/tmp_check
455455
$(MKDIR_P) '$(CURDIR)'/tmp_check
456456
cd $(srcdir) && \
457457
TESTDIR='$(CURDIR)' $(with_temp_install) PGPORT='6$(DEF_PGPORT)' \
458+
top_builddir='$(CURDIR)/$(top_builddir)' \
458459
PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' \
459460
$(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
460461
endef

src/test/perl/PostgresNode.pm

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ use Carp;
9090
use Config;
9191
use Cwd;
9292
use Exporter 'import';
93-
use Fcntl qw(:mode);
93+
use Fcntl qw(:mode :flock :seek :DEFAULT);
9494
use File::Basename;
95-
use File::Path qw(rmtree);
95+
use File::Path qw(rmtree mkpath);
9696
use File::Spec;
9797
use File::stat qw(stat);
9898
use File::Temp ();
@@ -110,7 +110,10 @@ our @EXPORT = qw(
110110
);
111111

112112
our ($use_tcp, $test_localhost, $test_pghost, $last_host_assigned,
113-
$last_port_assigned, @all_nodes, $died);
113+
$last_port_assigned, @all_nodes, $died, $portdir);
114+
115+
# list of file reservations made by get_free_port
116+
my @port_reservation_files;
114117

115118
INIT
116119
{
@@ -126,6 +129,20 @@ INIT
126129

127130
# Tracking of last port value assigned to accelerate free port lookup.
128131
$last_port_assigned = int(rand() * 16384) + 49152;
132+
133+
# Set the port lock directory
134+
135+
# If we're told to use a directory (e.g. from a buildfarm client)
136+
# explicitly, use that
137+
$portdir = $ENV{PG_TEST_PORT_DIR};
138+
# Otherwise, try to use a directory at the top of the build tree
139+
# or as a last resort use the tmp_check directory
140+
my $build_dir = $ENV{top_builddir}
141+
|| $TestLib::tmp_check ;
142+
$portdir ||= "$build_dir/portlock";
143+
$portdir =~ s!\\!/!g;
144+
# Make sure the directory exists
145+
mkpath($portdir) unless -d $portdir;
129146
}
130147

131148
=pod
@@ -1182,8 +1199,8 @@ by test cases that need to start other, non-Postgres servers.
11821199
Ports assigned to existing PostgresNode objects are automatically
11831200
excluded, even if those servers are not currently running.
11841201
1185-
XXX A port available now may become unavailable by the time we start
1186-
the desired service.
1202+
The port number is reserved so that other concurrent test programs will not
1203+
try to use the same port.
11871204
11881205
=cut
11891206

@@ -1232,6 +1249,7 @@ sub get_free_port
12321249
last;
12331250
}
12341251
}
1252+
$found = _reserve_port($port) if $found;
12351253
}
12361254
}
12371255

@@ -1262,6 +1280,40 @@ sub can_bind
12621280
return $ret;
12631281
}
12641282

1283+
# Internal routine to reserve a port number
1284+
# Returns 1 if successful, 0 if port is already reserved.
1285+
sub _reserve_port
1286+
{
1287+
my $port = shift;
1288+
# open in rw mode so we don't have to reopen it and lose the lock
1289+
my $filename = "$portdir/$port.rsv";
1290+
sysopen(my $portfile, $filename, O_RDWR|O_CREAT)
1291+
|| die "opening port file $filename: $!";
1292+
# take an exclusive lock to avoid concurrent access
1293+
flock($portfile, LOCK_EX) || die "locking port file $filename: $!";
1294+
# see if someone else has or had a reservation of this port
1295+
my $pid = <$portfile>;
1296+
chomp $pid;
1297+
if ($pid +0 > 0)
1298+
{
1299+
if (kill 0, $pid)
1300+
{
1301+
# process exists and is owned by us, so we can't reserve this port
1302+
flock($portfile, LOCK_UN);
1303+
close($portfile);
1304+
return 0;
1305+
}
1306+
}
1307+
# All good, go ahead and reserve the port
1308+
seek($portfile, 0, SEEK_SET);
1309+
# print the pid with a fixed width so we don't leave any trailing junk
1310+
print $portfile sprintf("%10d\n",$$);
1311+
flock($portfile, LOCK_UN);
1312+
close($portfile);
1313+
push(@port_reservation_files, $filename);
1314+
return 1;
1315+
}
1316+
12651317
# Automatically shut down any still-running nodes when the test script exits.
12661318
# Note that this just stops the postmasters (in the same order the nodes were
12671319
# created in). Any temporary directories are deleted, in an unspecified
@@ -1283,6 +1335,8 @@ END
12831335
$node->clean_node if $exit_code == 0 && TestLib::all_tests_passing();
12841336
}
12851337

1338+
unlink @port_reservation_files;
1339+
12861340
$? = $exit_code;
12871341
}
12881342

0 commit comments

Comments
 (0)