Skip to content

Commit faf18a9

Browse files
committed
Speed up ruleutils' name de-duplication code, and fix overlength-name case.
Since commit 11e1318, ruleutils.c has attempted to ensure that each RTE in a query or plan tree has a unique alias name. However, the code that was added for this could be quite slow, even as bad as O(N^3) if N identical RTE names must be replaced, as noted by Jeff Janes. Improve matters by building a transient hash table within set_rtable_names. The hash table in itself reduces the cost of detecting a duplicate from O(N) to O(1), and we can save another factor of N by storing the number of de-duplicated names already created for each entry, so that we don't have to re-try names already created. This way is probably a bit slower overall for small range tables, but almost by definition, such cases should not be a performance problem. In principle the same problem applies to the column-name-de-duplication code; but in practice that seems to be less of a problem, first because N is limited since we don't support extremely wide tables, and second because duplicate column names within an RTE are fairly rare, so that in practice the cost is more like O(N^2) not O(N^3). It would be very much messier to fix the column-name code, so for now I've left that alone. An independent problem in the same area was that the de-duplication code paid no attention to the identifier length limit, and would happily produce identifiers that were longer than NAMEDATALEN and wouldn't be unique after truncation to NAMEDATALEN. This could result in dump/reload failures, or perhaps even views that silently behaved differently than before. We can fix that by shortening the base name as needed. Fix it for both the relation and column name cases. In passing, check for interrupts in set_rtable_names, just in case it's still slow enough to be an issue. Back-patch to 9.3 where this code was introduced.
1 parent 7d0e872 commit faf18a9

File tree

3 files changed

+161
-46
lines changed

3 files changed

+161
-46
lines changed

src/backend/utils/adt/ruleutils.c

Lines changed: 125 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "commands/tablespace.h"
3737
#include "executor/spi.h"
3838
#include "funcapi.h"
39+
#include "mb/pg_wchar.h"
3940
#include "miscadmin.h"
4041
#include "nodes/makefuncs.h"
4142
#include "nodes/nodeFuncs.h"
@@ -52,6 +53,7 @@
5253
#include "utils/array.h"
5354
#include "utils/builtins.h"
5455
#include "utils/fmgroids.h"
56+
#include "utils/hsearch.h"
5557
#include "utils/lsyscache.h"
5658
#include "utils/rel.h"
5759
#include "utils/syscache.h"
@@ -260,6 +262,15 @@ typedef struct
260262
#define deparse_columns_fetch(rangetable_index, dpns) \
261263
((deparse_columns *) list_nth((dpns)->rtable_columns, (rangetable_index)-1))
262264

265+
/*
266+
* Entry in set_rtable_names' hash table
267+
*/
268+
typedef struct
269+
{
270+
char name[NAMEDATALEN]; /* Hash key --- must be first */
271+
int counter; /* Largest addition used so far for name */
272+
} NameHashEntry;
273+
263274

264275
/* ----------
265276
* Global data
@@ -304,8 +315,6 @@ static int print_function_arguments(StringInfo buf, HeapTuple proctup,
304315
static void print_function_rettype(StringInfo buf, HeapTuple proctup);
305316
static void set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
306317
Bitmapset *rels_used);
307-
static bool refname_is_unique(char *refname, deparse_namespace *dpns,
308-
List *parent_namespaces);
309318
static void set_deparse_for_query(deparse_namespace *dpns, Query *query,
310319
List *parent_namespaces);
311320
static void set_simple_column_names(deparse_namespace *dpns);
@@ -2491,15 +2500,61 @@ static void
24912500
set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
24922501
Bitmapset *rels_used)
24932502
{
2503+
HASHCTL hash_ctl;
2504+
HTAB *names_hash;
2505+
NameHashEntry *hentry;
2506+
bool found;
2507+
int rtindex;
24942508
ListCell *lc;
2495-
int rtindex = 1;
24962509

24972510
dpns->rtable_names = NIL;
2511+
/* nothing more to do if empty rtable */
2512+
if (dpns->rtable == NIL)
2513+
return;
2514+
2515+
/*
2516+
* We use a hash table to hold known names, so that this process is O(N)
2517+
* not O(N^2) for N names.
2518+
*/
2519+
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
2520+
hash_ctl.keysize = NAMEDATALEN;
2521+
hash_ctl.entrysize = sizeof(NameHashEntry);
2522+
hash_ctl.hcxt = CurrentMemoryContext;
2523+
names_hash = hash_create("set_rtable_names names",
2524+
list_length(dpns->rtable),
2525+
&hash_ctl,
2526+
HASH_ELEM | HASH_CONTEXT);
2527+
/* Preload the hash table with names appearing in parent_namespaces */
2528+
foreach(lc, parent_namespaces)
2529+
{
2530+
deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
2531+
ListCell *lc2;
2532+
2533+
foreach(lc2, olddpns->rtable_names)
2534+
{
2535+
char *oldname = (char *) lfirst(lc2);
2536+
2537+
if (oldname == NULL)
2538+
continue;
2539+
hentry = (NameHashEntry *) hash_search(names_hash,
2540+
oldname,
2541+
HASH_ENTER,
2542+
&found);
2543+
/* we do not complain about duplicate names in parent namespaces */
2544+
hentry->counter = 0;
2545+
}
2546+
}
2547+
2548+
/* Now we can scan the rtable */
2549+
rtindex = 1;
24982550
foreach(lc, dpns->rtable)
24992551
{
25002552
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
25012553
char *refname;
25022554

2555+
/* Just in case this takes an unreasonable amount of time ... */
2556+
CHECK_FOR_INTERRUPTS();
2557+
25032558
if (rels_used && !bms_is_member(rtindex, rels_used))
25042559
{
25052560
/* Ignore unreferenced RTE */
@@ -2527,56 +2582,62 @@ set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
25272582
}
25282583

25292584
/*
2530-
* If the selected name isn't unique, append digits to make it so
2585+
* If the selected name isn't unique, append digits to make it so, and
2586+
* make a new hash entry for it once we've got a unique name. For a
2587+
* very long input name, we might have to truncate to stay within
2588+
* NAMEDATALEN.
25312589
*/
2532-
if (refname &&
2533-
!refname_is_unique(refname, dpns, parent_namespaces))
2590+
if (refname)
25342591
{
2535-
char *modname = (char *) palloc(strlen(refname) + 32);
2536-
int i = 0;
2592+
hentry = (NameHashEntry *) hash_search(names_hash,
2593+
refname,
2594+
HASH_ENTER,
2595+
&found);
2596+
if (found)
2597+
{
2598+
/* Name already in use, must choose a new one */
2599+
int refnamelen = strlen(refname);
2600+
char *modname = (char *) palloc(refnamelen + 16);
2601+
NameHashEntry *hentry2;
25372602

2538-
do
2603+
do
2604+
{
2605+
hentry->counter++;
2606+
for (;;)
2607+
{
2608+
/*
2609+
* We avoid using %.*s here because it can misbehave
2610+
* if the data is not valid in what libc thinks is the
2611+
* prevailing encoding.
2612+
*/
2613+
memcpy(modname, refname, refnamelen);
2614+
sprintf(modname + refnamelen, "_%d", hentry->counter);
2615+
if (strlen(modname) < NAMEDATALEN)
2616+
break;
2617+
/* drop chars from refname to keep all the digits */
2618+
refnamelen = pg_mbcliplen(refname, refnamelen,
2619+
refnamelen - 1);
2620+
}
2621+
hentry2 = (NameHashEntry *) hash_search(names_hash,
2622+
modname,
2623+
HASH_ENTER,
2624+
&found);
2625+
} while (found);
2626+
hentry2->counter = 0; /* init new hash entry */
2627+
refname = modname;
2628+
}
2629+
else
25392630
{
2540-
sprintf(modname, "%s_%d", refname, ++i);
2541-
} while (!refname_is_unique(modname, dpns, parent_namespaces));
2542-
refname = modname;
2631+
/* Name not previously used, need only initialize hentry */
2632+
hentry->counter = 0;
2633+
}
25432634
}
25442635

25452636
dpns->rtable_names = lappend(dpns->rtable_names, refname);
25462637
rtindex++;
25472638
}
2548-
}
2549-
2550-
/*
2551-
* refname_is_unique: is refname distinct from all already-chosen RTE names?
2552-
*/
2553-
static bool
2554-
refname_is_unique(char *refname, deparse_namespace *dpns,
2555-
List *parent_namespaces)
2556-
{
2557-
ListCell *lc;
25582639

2559-
foreach(lc, dpns->rtable_names)
2560-
{
2561-
char *oldname = (char *) lfirst(lc);
2562-
2563-
if (oldname && strcmp(oldname, refname) == 0)
2564-
return false;
2565-
}
2566-
foreach(lc, parent_namespaces)
2567-
{
2568-
deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
2569-
ListCell *lc2;
2570-
2571-
foreach(lc2, olddpns->rtable_names)
2572-
{
2573-
char *oldname = (char *) lfirst(lc2);
2574-
2575-
if (oldname && strcmp(oldname, refname) == 0)
2576-
return false;
2577-
}
2578-
}
2579-
return true;
2640+
hash_destroy(names_hash);
25802641
}
25812642

25822643
/*
@@ -3404,16 +3465,34 @@ make_colname_unique(char *colname, deparse_namespace *dpns,
34043465
deparse_columns *colinfo)
34053466
{
34063467
/*
3407-
* If the selected name isn't unique, append digits to make it so
3468+
* If the selected name isn't unique, append digits to make it so. For a
3469+
* very long input name, we might have to truncate to stay within
3470+
* NAMEDATALEN.
34083471
*/
34093472
if (!colname_is_unique(colname, dpns, colinfo))
34103473
{
3411-
char *modname = (char *) palloc(strlen(colname) + 32);
3474+
int colnamelen = strlen(colname);
3475+
char *modname = (char *) palloc(colnamelen + 16);
34123476
int i = 0;
34133477

34143478
do
34153479
{
3416-
sprintf(modname, "%s_%d", colname, ++i);
3480+
i++;
3481+
for (;;)
3482+
{
3483+
/*
3484+
* We avoid using %.*s here because it can misbehave if the
3485+
* data is not valid in what libc thinks is the prevailing
3486+
* encoding.
3487+
*/
3488+
memcpy(modname, colname, colnamelen);
3489+
sprintf(modname + colnamelen, "_%d", i);
3490+
if (strlen(modname) < NAMEDATALEN)
3491+
break;
3492+
/* drop chars from colname to keep all the digits */
3493+
colnamelen = pg_mbcliplen(colname, colnamelen,
3494+
colnamelen - 1);
3495+
}
34173496
} while (!colname_is_unique(modname, dpns, colinfo));
34183497
colname = modname;
34193498
}

src/test/regress/expected/create_view.out

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,6 +1475,33 @@ select * from int8_tbl i where i.* in (values(i.*::int8_tbl));
14751475
4567890123456789 | -4567890123456789
14761476
(5 rows)
14771477

1478+
-- check unique-ification of overlength names
1479+
create view tt18v as
1480+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
1481+
union all
1482+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz;
1483+
NOTICE: identifier "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy" will be truncated to "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1484+
NOTICE: identifier "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz" will be truncated to "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1485+
select pg_get_viewdef('tt18v', true);
1486+
pg_get_viewdef
1487+
-----------------------------------------------------------------------------------
1488+
SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, +
1489+
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q2 +
1490+
FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +
1491+
UNION ALL +
1492+
SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, +
1493+
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q2 +
1494+
FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
1495+
(1 row)
1496+
1497+
explain (costs off) select * from tt18v;
1498+
QUERY PLAN
1499+
--------------------------------------------------------------------------------------------
1500+
Append
1501+
-> Seq Scan on int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1502+
-> Seq Scan on int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx_1
1503+
(3 rows)
1504+
14781505
-- clean up all the random objects we made above
14791506
set client_min_messages = warning;
14801507
DROP SCHEMA temp_view_test CASCADE;

src/test/regress/sql/create_view.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,15 @@ select * from tt17v;
487487
select pg_get_viewdef('tt17v', true);
488488
select * from int8_tbl i where i.* in (values(i.*::int8_tbl));
489489

490+
-- check unique-ification of overlength names
491+
492+
create view tt18v as
493+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
494+
union all
495+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz;
496+
select pg_get_viewdef('tt18v', true);
497+
explain (costs off) select * from tt18v;
498+
490499
-- clean up all the random objects we made above
491500
set client_min_messages = warning;
492501
DROP SCHEMA temp_view_test CASCADE;

0 commit comments

Comments
 (0)