Skip to content

Commit a0854f1

Browse files
committed
Avoid parsing catalog data twice during BKI file construction.
In the wake of commit 5602265, we were doing duplicate-OID detection quite inefficiently, by invoking duplicate_oids which does all the same parsing of catalog headers and .dat files as genbki.pl does. That adds under half a second on modern machines, but quite a bit more on slow buildfarm critters, so it seems worth avoiding. Let's just extend genbki.pl a little so it can also detect duplicate OIDs, and remove the duplicate_oids call from the build process. (This also means that duplicate OID detection will happen during Windows builds, which AFAICS it didn't before.) This makes the use-case for duplicate_oids a bit dubious, but it's possible that people will still want to run that check without doing a whole build run, so let's keep that script. In passing, move down genbki.pl's creation of its temp output files so that it doesn't happen until after we've done parsing and validation of the input. This avoids leaving a lot of clutter around after a failure. John Naylor and Tom Lane Discussion: https://postgr.es/m/37D774E4-FE1F-437E-B3D2-593F314B7505@postgrespro.ru
1 parent dd4cc9d commit a0854f1

File tree

4 files changed

+60
-20
lines changed

4 files changed

+60
-20
lines changed

doc/src/sgml/bki.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,8 +382,8 @@
382382
through the catalog headers and <filename>.dat</filename> files
383383
to see which ones do not appear. You can also use
384384
the <filename>duplicate_oids</filename> script to check for mistakes.
385-
(That script is run automatically at compile time, and will stop the
386-
build if a duplicate is found.)
385+
(<filename>genbki.pl</filename> will also detect duplicate OIDs
386+
at compile time.)
387387
</para>
388388

389389
<para>

src/backend/catalog/Catalog.pm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,8 @@ sub FindDefinedSymbolFromData
386386

387387
# Extract an array of all the OIDs assigned in the specified catalog headers
388388
# and their associated data files (if any).
389+
# Caution: genbki.pl contains equivalent logic; change it too if you need to
390+
# touch this.
389391
sub FindAllOidsFromHeaders
390392
{
391393
my @input_files = @_;

src/backend/catalog/Makefile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,7 @@ generated-header-symlinks: $(top_builddir)/src/include/catalog/header-stamp
8484
# configure run, even in distribution tarballs. So depending on configure.in
8585
# instead is cheating a bit, but it will achieve the goal of updating the
8686
# version number when it changes.
87-
bki-stamp: genbki.pl Catalog.pm $(POSTGRES_BKI_SRCS) $(POSTGRES_BKI_DATA) $(top_srcdir)/configure.in $(top_srcdir)/src/include/catalog/duplicate_oids
88-
cd $(top_srcdir)/src/include/catalog && $(PERL) ./duplicate_oids
87+
bki-stamp: genbki.pl Catalog.pm $(POSTGRES_BKI_SRCS) $(POSTGRES_BKI_DATA) $(top_srcdir)/configure.in
8988
$(PERL) -I $(catalogdir) $< --set-version=$(MAJORVERSION) $(POSTGRES_BKI_SRCS)
9089
touch $@
9190

src/backend/catalog/genbki.pl

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -57,27 +57,14 @@
5757
$output_path .= '/';
5858
}
5959

60-
# Open temp files
61-
my $tmpext = ".tmp$$";
62-
my $bkifile = $output_path . 'postgres.bki';
63-
open my $bki, '>', $bkifile . $tmpext
64-
or die "can't open $bkifile$tmpext: $!";
65-
my $schemafile = $output_path . 'schemapg.h';
66-
open my $schemapg, '>', $schemafile . $tmpext
67-
or die "can't open $schemafile$tmpext: $!";
68-
my $descrfile = $output_path . 'postgres.description';
69-
open my $descr, '>', $descrfile . $tmpext
70-
or die "can't open $descrfile$tmpext: $!";
71-
my $shdescrfile = $output_path . 'postgres.shdescription';
72-
open my $shdescr, '>', $shdescrfile . $tmpext
73-
or die "can't open $shdescrfile$tmpext: $!";
74-
7560
# Read all the files into internal data structures.
7661
my @catnames;
7762
my %catalogs;
7863
my %catalog_data;
7964
my @toast_decls;
8065
my @index_decls;
66+
my %oidcounts;
67+
8168
foreach my $header (@input_files)
8269
{
8370
$header =~ /(.+)\.h$/
@@ -94,10 +81,30 @@
9481
$catalogs{$catname} = $catalog;
9582
}
9683

84+
# While checking for duplicated OIDs, we ignore the pg_class OID and
85+
# rowtype OID of bootstrap catalogs, as those are expected to appear
86+
# in the initial data for pg_class and pg_type. For regular catalogs,
87+
# include these OIDs. (See also Catalog::FindAllOidsFromHeaders
88+
# if you change this logic.)
89+
if (!$catalog->{bootstrap})
90+
{
91+
$oidcounts{ $catalog->{relation_oid} }++
92+
if ($catalog->{relation_oid});
93+
$oidcounts{ $catalog->{rowtype_oid} }++
94+
if ($catalog->{rowtype_oid});
95+
}
96+
9797
# Not all catalogs have a data file.
9898
if (-e $datfile)
9999
{
100-
$catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 0);
100+
my $data = Catalog::ParseData($datfile, $schema, 0);
101+
$catalog_data{$catname} = $data;
102+
103+
# Check for duplicated OIDs while we're at it.
104+
foreach my $row (@$data)
105+
{
106+
$oidcounts{ $row->{oid} }++ if defined $row->{oid};
107+
}
101108
}
102109

103110
# If the header file contained toast or index info, build BKI
@@ -108,6 +115,8 @@
108115
sprintf "declare toast %s %s on %s\n",
109116
$toast->{toast_oid}, $toast->{toast_index_oid},
110117
$toast->{parent_table};
118+
$oidcounts{ $toast->{toast_oid} }++;
119+
$oidcounts{ $toast->{toast_index_oid} }++;
111120
}
112121
foreach my $index (@{ $catalog->{indexing} })
113122
{
@@ -116,9 +125,24 @@
116125
$index->{is_unique} ? 'unique ' : '',
117126
$index->{index_name}, $index->{index_oid},
118127
$index->{index_decl};
128+
$oidcounts{ $index->{index_oid} }++;
119129
}
120130
}
121131

132+
# Complain and exit if we found any duplicate OIDs.
133+
# While duplicate OIDs would only cause a failure if they appear in
134+
# the same catalog, our project policy is that manually assigned OIDs
135+
# should be globally unique, to avoid confusion.
136+
my $found = 0;
137+
foreach my $oid (keys %oidcounts)
138+
{
139+
next unless $oidcounts{$oid} > 1;
140+
print "Duplicate oids detected:\n" if !$found;
141+
print "$oid\n";
142+
$found++;
143+
}
144+
die "found $found duplicate OID(s) in catalog data\n" if $found;
145+
122146
# Fetch some special data that we will substitute into the output file.
123147
# CAUTION: be wary about what symbols you substitute into the .bki file here!
124148
# It's okay to substitute things that are expected to be really constant
@@ -224,6 +248,21 @@
224248
pg_type => \%typeoids);
225249

226250

251+
# Open temp files
252+
my $tmpext = ".tmp$$";
253+
my $bkifile = $output_path . 'postgres.bki';
254+
open my $bki, '>', $bkifile . $tmpext
255+
or die "can't open $bkifile$tmpext: $!";
256+
my $schemafile = $output_path . 'schemapg.h';
257+
open my $schemapg, '>', $schemafile . $tmpext
258+
or die "can't open $schemafile$tmpext: $!";
259+
my $descrfile = $output_path . 'postgres.description';
260+
open my $descr, '>', $descrfile . $tmpext
261+
or die "can't open $descrfile$tmpext: $!";
262+
my $shdescrfile = $output_path . 'postgres.shdescription';
263+
open my $shdescr, '>', $shdescrfile . $tmpext
264+
or die "can't open $shdescrfile$tmpext: $!";
265+
227266
# Generate postgres.bki, postgres.description, postgres.shdescription,
228267
# and pg_*_d.h headers.
229268
print "Generating BKI files and symbol definition headers...\n";

0 commit comments

Comments
 (0)