Skip to content

Commit a6c4c07

Browse files
committed
Speed up ruleutils' name de-duplication code, and fix overlength-name case.
Since commit 11e1318, ruleutils.c has attempted to ensure that each RTE in a query or plan tree has a unique alias name. However, the code that was added for this could be quite slow, even as bad as O(N^3) if N identical RTE names must be replaced, as noted by Jeff Janes. Improve matters by building a transient hash table within set_rtable_names. The hash table in itself reduces the cost of detecting a duplicate from O(N) to O(1), and we can save another factor of N by storing the number of de-duplicated names already created for each entry, so that we don't have to re-try names already created. This way is probably a bit slower overall for small range tables, but almost by definition, such cases should not be a performance problem. In principle the same problem applies to the column-name-de-duplication code; but in practice that seems to be less of a problem, first because N is limited since we don't support extremely wide tables, and second because duplicate column names within an RTE are fairly rare, so that in practice the cost is more like O(N^2) not O(N^3). It would be very much messier to fix the column-name code, so for now I've left that alone. An independent problem in the same area was that the de-duplication code paid no attention to the identifier length limit, and would happily produce identifiers that were longer than NAMEDATALEN and wouldn't be unique after truncation to NAMEDATALEN. This could result in dump/reload failures, or perhaps even views that silently behaved differently than before. We can fix that by shortening the base name as needed. Fix it for both the relation and column name cases. In passing, check for interrupts in set_rtable_names, just in case it's still slow enough to be an issue. Back-patch to 9.3 where this code was introduced.
1 parent d33ab56 commit a6c4c07

File tree

3 files changed

+161
-46
lines changed

3 files changed

+161
-46
lines changed

src/backend/utils/adt/ruleutils.c

+125-46
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "commands/tablespace.h"
3838
#include "executor/spi.h"
3939
#include "funcapi.h"
40+
#include "mb/pg_wchar.h"
4041
#include "miscadmin.h"
4142
#include "nodes/makefuncs.h"
4243
#include "nodes/nodeFuncs.h"
@@ -53,6 +54,7 @@
5354
#include "utils/array.h"
5455
#include "utils/builtins.h"
5556
#include "utils/fmgroids.h"
57+
#include "utils/hsearch.h"
5658
#include "utils/lsyscache.h"
5759
#include "utils/rel.h"
5860
#include "utils/snapmgr.h"
@@ -262,6 +264,15 @@ typedef struct
262264
#define deparse_columns_fetch(rangetable_index, dpns) \
263265
((deparse_columns *) list_nth((dpns)->rtable_columns, (rangetable_index)-1))
264266

267+
/*
268+
* Entry in set_rtable_names' hash table
269+
*/
270+
typedef struct
271+
{
272+
char name[NAMEDATALEN]; /* Hash key --- must be first */
273+
int counter; /* Largest addition used so far for name */
274+
} NameHashEntry;
275+
265276

266277
/* ----------
267278
* Global data
@@ -306,8 +317,6 @@ static int print_function_arguments(StringInfo buf, HeapTuple proctup,
306317
static void print_function_rettype(StringInfo buf, HeapTuple proctup);
307318
static void set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
308319
Bitmapset *rels_used);
309-
static bool refname_is_unique(char *refname, deparse_namespace *dpns,
310-
List *parent_namespaces);
311320
static void set_deparse_for_query(deparse_namespace *dpns, Query *query,
312321
List *parent_namespaces);
313322
static void set_simple_column_names(deparse_namespace *dpns);
@@ -2640,15 +2649,61 @@ static void
26402649
set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
26412650
Bitmapset *rels_used)
26422651
{
2652+
HASHCTL hash_ctl;
2653+
HTAB *names_hash;
2654+
NameHashEntry *hentry;
2655+
bool found;
2656+
int rtindex;
26432657
ListCell *lc;
2644-
int rtindex = 1;
26452658

26462659
dpns->rtable_names = NIL;
2660+
/* nothing more to do if empty rtable */
2661+
if (dpns->rtable == NIL)
2662+
return;
2663+
2664+
/*
2665+
* We use a hash table to hold known names, so that this process is O(N)
2666+
* not O(N^2) for N names.
2667+
*/
2668+
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
2669+
hash_ctl.keysize = NAMEDATALEN;
2670+
hash_ctl.entrysize = sizeof(NameHashEntry);
2671+
hash_ctl.hcxt = CurrentMemoryContext;
2672+
names_hash = hash_create("set_rtable_names names",
2673+
list_length(dpns->rtable),
2674+
&hash_ctl,
2675+
HASH_ELEM | HASH_CONTEXT);
2676+
/* Preload the hash table with names appearing in parent_namespaces */
2677+
foreach(lc, parent_namespaces)
2678+
{
2679+
deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
2680+
ListCell *lc2;
2681+
2682+
foreach(lc2, olddpns->rtable_names)
2683+
{
2684+
char *oldname = (char *) lfirst(lc2);
2685+
2686+
if (oldname == NULL)
2687+
continue;
2688+
hentry = (NameHashEntry *) hash_search(names_hash,
2689+
oldname,
2690+
HASH_ENTER,
2691+
&found);
2692+
/* we do not complain about duplicate names in parent namespaces */
2693+
hentry->counter = 0;
2694+
}
2695+
}
2696+
2697+
/* Now we can scan the rtable */
2698+
rtindex = 1;
26472699
foreach(lc, dpns->rtable)
26482700
{
26492701
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
26502702
char *refname;
26512703

2704+
/* Just in case this takes an unreasonable amount of time ... */
2705+
CHECK_FOR_INTERRUPTS();
2706+
26522707
if (rels_used && !bms_is_member(rtindex, rels_used))
26532708
{
26542709
/* Ignore unreferenced RTE */
@@ -2676,56 +2731,62 @@ set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
26762731
}
26772732

26782733
/*
2679-
* If the selected name isn't unique, append digits to make it so
2734+
* If the selected name isn't unique, append digits to make it so, and
2735+
* make a new hash entry for it once we've got a unique name. For a
2736+
* very long input name, we might have to truncate to stay within
2737+
* NAMEDATALEN.
26802738
*/
2681-
if (refname &&
2682-
!refname_is_unique(refname, dpns, parent_namespaces))
2739+
if (refname)
26832740
{
2684-
char *modname = (char *) palloc(strlen(refname) + 32);
2685-
int i = 0;
2741+
hentry = (NameHashEntry *) hash_search(names_hash,
2742+
refname,
2743+
HASH_ENTER,
2744+
&found);
2745+
if (found)
2746+
{
2747+
/* Name already in use, must choose a new one */
2748+
int refnamelen = strlen(refname);
2749+
char *modname = (char *) palloc(refnamelen + 16);
2750+
NameHashEntry *hentry2;
26862751

2687-
do
2752+
do
2753+
{
2754+
hentry->counter++;
2755+
for (;;)
2756+
{
2757+
/*
2758+
* We avoid using %.*s here because it can misbehave
2759+
* if the data is not valid in what libc thinks is the
2760+
* prevailing encoding.
2761+
*/
2762+
memcpy(modname, refname, refnamelen);
2763+
sprintf(modname + refnamelen, "_%d", hentry->counter);
2764+
if (strlen(modname) < NAMEDATALEN)
2765+
break;
2766+
/* drop chars from refname to keep all the digits */
2767+
refnamelen = pg_mbcliplen(refname, refnamelen,
2768+
refnamelen - 1);
2769+
}
2770+
hentry2 = (NameHashEntry *) hash_search(names_hash,
2771+
modname,
2772+
HASH_ENTER,
2773+
&found);
2774+
} while (found);
2775+
hentry2->counter = 0; /* init new hash entry */
2776+
refname = modname;
2777+
}
2778+
else
26882779
{
2689-
sprintf(modname, "%s_%d", refname, ++i);
2690-
} while (!refname_is_unique(modname, dpns, parent_namespaces));
2691-
refname = modname;
2780+
/* Name not previously used, need only initialize hentry */
2781+
hentry->counter = 0;
2782+
}
26922783
}
26932784

26942785
dpns->rtable_names = lappend(dpns->rtable_names, refname);
26952786
rtindex++;
26962787
}
2697-
}
2698-
2699-
/*
2700-
* refname_is_unique: is refname distinct from all already-chosen RTE names?
2701-
*/
2702-
static bool
2703-
refname_is_unique(char *refname, deparse_namespace *dpns,
2704-
List *parent_namespaces)
2705-
{
2706-
ListCell *lc;
27072788

2708-
foreach(lc, dpns->rtable_names)
2709-
{
2710-
char *oldname = (char *) lfirst(lc);
2711-
2712-
if (oldname && strcmp(oldname, refname) == 0)
2713-
return false;
2714-
}
2715-
foreach(lc, parent_namespaces)
2716-
{
2717-
deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
2718-
ListCell *lc2;
2719-
2720-
foreach(lc2, olddpns->rtable_names)
2721-
{
2722-
char *oldname = (char *) lfirst(lc2);
2723-
2724-
if (oldname && strcmp(oldname, refname) == 0)
2725-
return false;
2726-
}
2727-
}
2728-
return true;
2789+
hash_destroy(names_hash);
27292790
}
27302791

27312792
/*
@@ -3553,16 +3614,34 @@ make_colname_unique(char *colname, deparse_namespace *dpns,
35533614
deparse_columns *colinfo)
35543615
{
35553616
/*
3556-
* If the selected name isn't unique, append digits to make it so
3617+
* If the selected name isn't unique, append digits to make it so. For a
3618+
* very long input name, we might have to truncate to stay within
3619+
* NAMEDATALEN.
35573620
*/
35583621
if (!colname_is_unique(colname, dpns, colinfo))
35593622
{
3560-
char *modname = (char *) palloc(strlen(colname) + 32);
3623+
int colnamelen = strlen(colname);
3624+
char *modname = (char *) palloc(colnamelen + 16);
35613625
int i = 0;
35623626

35633627
do
35643628
{
3565-
sprintf(modname, "%s_%d", colname, ++i);
3629+
i++;
3630+
for (;;)
3631+
{
3632+
/*
3633+
* We avoid using %.*s here because it can misbehave if the
3634+
* data is not valid in what libc thinks is the prevailing
3635+
* encoding.
3636+
*/
3637+
memcpy(modname, colname, colnamelen);
3638+
sprintf(modname + colnamelen, "_%d", i);
3639+
if (strlen(modname) < NAMEDATALEN)
3640+
break;
3641+
/* drop chars from colname to keep all the digits */
3642+
colnamelen = pg_mbcliplen(colname, colnamelen,
3643+
colnamelen - 1);
3644+
}
35663645
} while (!colname_is_unique(modname, dpns, colinfo));
35673646
colname = modname;
35683647
}

src/test/regress/expected/create_view.out

+27
Original file line numberDiff line numberDiff line change
@@ -1475,6 +1475,33 @@ select * from int8_tbl i where i.* in (values(i.*::int8_tbl));
14751475
4567890123456789 | -4567890123456789
14761476
(5 rows)
14771477

1478+
-- check unique-ification of overlength names
1479+
create view tt18v as
1480+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
1481+
union all
1482+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz;
1483+
NOTICE: identifier "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy" will be truncated to "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1484+
NOTICE: identifier "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz" will be truncated to "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1485+
select pg_get_viewdef('tt18v', true);
1486+
pg_get_viewdef
1487+
-----------------------------------------------------------------------------------
1488+
SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, +
1489+
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q2 +
1490+
FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +
1491+
UNION ALL +
1492+
SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, +
1493+
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q2 +
1494+
FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
1495+
(1 row)
1496+
1497+
explain (costs off) select * from tt18v;
1498+
QUERY PLAN
1499+
--------------------------------------------------------------------------------------------
1500+
Append
1501+
-> Seq Scan on int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1502+
-> Seq Scan on int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx_1
1503+
(3 rows)
1504+
14781505
-- clean up all the random objects we made above
14791506
set client_min_messages = warning;
14801507
DROP SCHEMA temp_view_test CASCADE;

src/test/regress/sql/create_view.sql

+9
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,15 @@ select * from tt17v;
487487
select pg_get_viewdef('tt17v', true);
488488
select * from int8_tbl i where i.* in (values(i.*::int8_tbl));
489489

490+
-- check unique-ification of overlength names
491+
492+
create view tt18v as
493+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
494+
union all
495+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz;
496+
select pg_get_viewdef('tt18v', true);
497+
explain (costs off) select * from tt18v;
498+
490499
-- clean up all the random objects we made above
491500
set client_min_messages = warning;
492501
DROP SCHEMA temp_view_test CASCADE;

0 commit comments

Comments
 (0)