Skip to content

Commit 5e39f06

Browse files
committed
Move bootstrap-time lookup of regproc OIDs into genbki.pl.
Formerly, the bootstrap backend looked up the OIDs corresponding to names in regproc catalog entries using brute-force searches of pg_proc. It was somewhat remarkable that that worked at all, since it was used while populating other pretty-fundamental catalogs like pg_operator. And it was also quite slow, and getting slower as pg_proc gets bigger. This patch moves the lookup work into genbki.pl, so that the values in postgres.bki for regproc columns are always numeric OIDs, an option that regprocin() already supported. Perl isn't the world's speediest language, so this about doubles the time needed to run genbki.pl (from 0.3 to 0.6 sec on my machine). But we only do that at most once per build. The time needed to run initdb drops significantly --- on my machine, initdb --no-sync goes from 1.8 to 1.3 seconds. So this is a small net win even for just one initdb per build, and it becomes quite a nice win for test sequences requiring many initdb runs. Strip out the now-dead code for brute-force catalog searching in regprocin. We'd also cargo-culted similar logic into regoperin and some (not all) of the other reg*in functions. That is all dead code too since we currently have no need to load such values during bootstrap. I removed it all, reasoning that if we ever need such functionality it'd be much better to do it in a similar way to this patch. There might be some simplifications possible in the backend now that regprocin doesn't require doing catalog reads so early in bootstrap. I've not looked into that, though. Andreas Karlsson, with some small adjustments by me Discussion: https://postgr.es/m/30896.1492006367@sss.pgh.pa.us
1 parent a9254e6 commit 5e39f06

File tree

4 files changed

+120
-219
lines changed

4 files changed

+120
-219
lines changed

src/backend/catalog/Catalog.pm

+28-7
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use warnings;
1919
require Exporter;
2020
our @ISA = qw(Exporter);
2121
our @EXPORT = ();
22-
our @EXPORT_OK = qw(Catalogs RenameTempFile);
22+
our @EXPORT_OK = qw(Catalogs SplitDataLine RenameTempFile);
2323

2424
# Call this function with an array of names of header files to parse.
2525
# Returns a nested data structure describing the data in the headers.
@@ -216,6 +216,28 @@ sub Catalogs
216216
return \%catalogs;
217217
}
218218

219+
# Split a DATA line into fields.
220+
# Call this on the bki_values element of a DATA item returned by Catalogs();
221+
# it returns a list of field values. We don't strip quoting from the fields.
222+
# Note: it should be safe to assign the result to a list of length equal to
223+
# the nominal number of catalog fields, because check_natts already checked
224+
# the number of fields.
225+
sub SplitDataLine
226+
{
227+
my $bki_values = shift;
228+
229+
# This handling of quoted strings might look too simplistic, but it
230+
# matches what bootscanner.l does: that has no provision for quote marks
231+
# inside quoted strings, either. If we don't have a quoted string, just
232+
# snarf everything till next whitespace. That will accept some things
233+
# that bootscanner.l will see as erroneous tokens; but it seems wiser
234+
# to do that and let bootscanner.l complain than to silently drop
235+
# non-whitespace characters.
236+
my @result = $bki_values =~ /"[^"]*"|\S+/g;
237+
238+
return @result;
239+
}
240+
219241
# Rename temporary files to final names.
220242
# Call this function with the final file name and the .tmp extension
221243
# Note: recommended extension is ".tmp$$", so that parallel make steps
@@ -229,21 +251,20 @@ sub RenameTempFile
229251
rename($temp_name, $final_name) || die "rename: $temp_name: $!";
230252
}
231253

232-
# verify the number of fields in the passed-in bki structure
254+
# verify the number of fields in the passed-in DATA line
233255
sub check_natts
234256
{
235257
my ($catname, $natts, $bki_val, $file, $line) = @_;
258+
236259
die "Could not find definition for Natts_${catname} before start of DATA() in $file\n"
237260
unless defined $natts;
238261

239-
# we're working with a copy and need to count the fields only, so collapse
240-
$bki_val =~ s/"[^"]*?"/xxx/g;
241-
my @atts = split /\s+/, $bki_val;
262+
my $nfields = scalar(SplitDataLine($bki_val));
242263

243264
die sprintf
244265
"Wrong number of attributes in DATA() entry at %s:%d (expected %d but got %d)\n",
245-
$file, $line, $natts, scalar @atts
246-
unless $natts == @atts;
266+
$file, $line, $natts, $nfields
267+
unless $natts == $nfields;
247268
}
248269

249270
1;

src/backend/catalog/genbki.pl

+42-8
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
# vars to hold data needed for schemapg.h
103103
my %schemapg_entries;
104104
my @tables_needing_macros;
105+
my %regprocoids;
105106
our @types;
106107

107108
# produce output, one catalog at a time
@@ -160,24 +161,57 @@
160161
foreach my $row (@{ $catalog->{data} })
161162
{
162163

163-
# substitute constant values we acquired above
164-
$row->{bki_values} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g;
165-
$row->{bki_values} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g;
164+
# Split line into tokens without interpreting their meaning.
165+
my %bki_values;
166+
@bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
167+
168+
# Perform required substitutions on fields
169+
foreach my $att (keys %bki_values)
170+
{
171+
# Substitute constant values we acquired above.
172+
# (It's intentional that this can apply to parts of a field).
173+
$bki_values{$att} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g;
174+
$bki_values{$att} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g;
175+
176+
# Replace regproc columns' values with OIDs.
177+
# If we don't have a unique value to substitute,
178+
# just do nothing (regprocin will complain).
179+
if ($bki_attr{$att}->{type} eq 'regproc')
180+
{
181+
my $procoid = $regprocoids{$bki_values{$att}};
182+
$bki_values{$att} = $procoid
183+
if defined($procoid) && $procoid ne 'MULTIPLE';
184+
}
185+
}
186+
187+
# Save pg_proc oids for use in later regproc substitutions.
188+
# This relies on the order we process the files in!
189+
if ($catname eq 'pg_proc')
190+
{
191+
if (defined($regprocoids{$bki_values{proname}}))
192+
{
193+
$regprocoids{$bki_values{proname}} = 'MULTIPLE';
194+
}
195+
else
196+
{
197+
$regprocoids{$bki_values{proname}} = $row->{oid};
198+
}
199+
}
166200

167201
# Save pg_type info for pg_attribute processing below
168202
if ($catname eq 'pg_type')
169203
{
170-
my %type;
204+
my %type = %bki_values;
171205
$type{oid} = $row->{oid};
172-
@type{@attnames} = split /\s+/, $row->{bki_values};
173206
push @types, \%type;
174207
}
175208

176209
# Write to postgres.bki
177210
my $oid = $row->{oid} ? "OID = $row->{oid} " : '';
178-
printf $bki "insert %s( %s)\n", $oid, $row->{bki_values};
211+
printf $bki "insert %s( %s )\n", $oid,
212+
join(' ', @bki_values{@attnames});
179213

180-
# Write comments to postgres.description and postgres.shdescription
214+
# Write comments to postgres.description and postgres.shdescription
181215
if (defined $row->{descr})
182216
{
183217
printf $descr "%s\t%s\t0\t%s\n", $row->{oid}, $catname,
@@ -426,7 +460,7 @@ sub bki_insert
426460
my @attnames = @_;
427461
my $oid = $row->{oid} ? "OID = $row->{oid} " : '';
428462
my $bki_values = join ' ', map { $_ eq '' ? '""' : $_ } map $row->{$_}, @attnames;
429-
printf $bki "insert %s( %s)\n", $oid, $bki_values;
463+
printf $bki "insert %s( %s )\n", $oid, $bki_values;
430464
}
431465

432466
# The field values of a Schema_pg_xxx declaration are similar, but not

src/backend/utils/Gen_fmgrtab.pl

+8-18
Original file line numberDiff line numberDiff line change
@@ -58,30 +58,20 @@
5858
my $data = $catalogs->{pg_proc}->{data};
5959
foreach my $row (@$data)
6060
{
61-
62-
# To construct fmgroids.h and fmgrtab.c, we need to inspect some
63-
# of the individual data fields. Just splitting on whitespace
64-
# won't work, because some quoted fields might contain internal
65-
# whitespace. We handle this by folding them all to a simple
66-
# "xxx". Fortunately, this script doesn't need to look at any
67-
# fields that might need quoting, so this simple hack is
68-
# sufficient.
69-
$row->{bki_values} =~ s/"[^"]*"/"xxx"/g;
70-
@{$row}{@attnames} = split /\s+/, $row->{bki_values};
61+
# Split line into tokens without interpreting their meaning.
62+
my %bki_values;
63+
@bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
7164

7265
# Select out just the rows for internal-language procedures.
7366
# Note assumption here that INTERNALlanguageId is 12.
74-
next if $row->{prolang} ne '12';
67+
next if $bki_values{prolang} ne '12';
7568

7669
push @fmgr,
7770
{ oid => $row->{oid},
78-
strict => $row->{proisstrict},
79-
retset => $row->{proretset},
80-
nargs => $row->{pronargs},
81-
prosrc => $row->{prosrc}, };
82-
83-
# Hack to work around memory leak in some versions of Perl
84-
$row = undef;
71+
strict => $bki_values{proisstrict},
72+
retset => $bki_values{proretset},
73+
nargs => $bki_values{pronargs},
74+
prosrc => $bki_values{prosrc}, };
8575
}
8676

8777
# Emit headers for both files

0 commit comments

Comments
 (0)