Skip to content

Commit 3e91dba

Browse files
committed
Fix various issues with ALTER TEXT SEARCH CONFIGURATION
This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4eb, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
1 parent 6ee26c6 commit 3e91dba

File tree

4 files changed

+132
-30
lines changed

4 files changed

+132
-30
lines changed

src/backend/commands/tsearchcmds.c

Lines changed: 67 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@
4848
#include "utils/rel.h"
4949
#include "utils/syscache.h"
5050

51+
/* Single entry of List returned by getTokenTypes() */
52+
typedef struct
53+
{
54+
int num; /* token type number */
55+
char *name; /* token type name */
56+
} TSTokenTypeItem;
5157

5258
static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
5359
HeapTuple tup, Relation relMap);
@@ -1192,22 +1198,45 @@ AlterTSConfiguration(AlterTSConfigurationStmt *stmt)
11921198
}
11931199

11941200
/*
1195-
* Translate a list of token type names to an array of token type numbers
1201+
* Check whether a token type name is a member of a TSTokenTypeItem list.
11961202
*/
1197-
static int *
1203+
static bool
1204+
tstoken_list_member(char *token_name, List *tokens)
1205+
{
1206+
ListCell *c;
1207+
bool found = false;
1208+
1209+
foreach(c, tokens)
1210+
{
1211+
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
1212+
1213+
if (strcmp(token_name, ts->name) == 0)
1214+
{
1215+
found = true;
1216+
break;
1217+
}
1218+
}
1219+
1220+
return found;
1221+
}
1222+
1223+
/*
1224+
* Translate a list of token type names to a list of unique TSTokenTypeItem.
1225+
*
1226+
* Duplicated entries list are removed from tokennames.
1227+
*/
1228+
static List *
11981229
getTokenTypes(Oid prsId, List *tokennames)
11991230
{
12001231
TSParserCacheEntry *prs = lookup_ts_parser_cache(prsId);
12011232
LexDescr *list;
1202-
int *res,
1203-
i,
1204-
ntoken;
1233+
List *result = NIL;
1234+
int ntoken;
12051235
ListCell *tn;
12061236

12071237
ntoken = list_length(tokennames);
12081238
if (ntoken == 0)
1209-
return NULL;
1210-
res = (int *) palloc(sizeof(int) * ntoken);
1239+
return NIL;
12111240

12121241
if (!OidIsValid(prs->lextypeOid))
12131242
elog(ERROR, "method lextype isn't defined for text search parser %u",
@@ -1217,19 +1246,26 @@ getTokenTypes(Oid prsId, List *tokennames)
12171246
list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
12181247
(Datum) 0));
12191248

1220-
i = 0;
12211249
foreach(tn, tokennames)
12221250
{
12231251
String *val = lfirst_node(String, tn);
12241252
bool found = false;
12251253
int j;
12261254

1255+
/* Skip if this token is already in the result */
1256+
if (tstoken_list_member(strVal(val), result))
1257+
continue;
1258+
12271259
j = 0;
12281260
while (list && list[j].lexid)
12291261
{
12301262
if (strcmp(strVal(val), list[j].alias) == 0)
12311263
{
1232-
res[i] = list[j].lexid;
1264+
TSTokenTypeItem *ts = (TSTokenTypeItem *) palloc0(sizeof(TSTokenTypeItem));
1265+
1266+
ts->num = list[j].lexid;
1267+
ts->name = pstrdup(strVal(val));
1268+
result = lappend(result, ts);
12331269
found = true;
12341270
break;
12351271
}
@@ -1240,10 +1276,9 @@ getTokenTypes(Oid prsId, List *tokennames)
12401276
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
12411277
errmsg("token type \"%s\" does not exist",
12421278
strVal(val))));
1243-
i++;
12441279
}
12451280

1246-
return res;
1281+
return result;
12471282
}
12481283

12491284
/*
@@ -1261,8 +1296,8 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
12611296
int i;
12621297
int j;
12631298
Oid prsId;
1264-
int *tokens,
1265-
ntoken;
1299+
List *tokens = NIL;
1300+
int ntoken;
12661301
Oid *dictIds;
12671302
int ndict;
12681303
ListCell *c;
@@ -1273,23 +1308,25 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
12731308
prsId = tsform->cfgparser;
12741309

12751310
tokens = getTokenTypes(prsId, stmt->tokentype);
1276-
ntoken = list_length(stmt->tokentype);
1311+
ntoken = list_length(tokens);
12771312

12781313
if (stmt->override)
12791314
{
12801315
/*
12811316
* delete maps for tokens if they exist and command was ALTER
12821317
*/
1283-
for (i = 0; i < ntoken; i++)
1318+
foreach(c, tokens)
12841319
{
1320+
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
1321+
12851322
ScanKeyInit(&skey[0],
12861323
Anum_pg_ts_config_map_mapcfg,
12871324
BTEqualStrategyNumber, F_OIDEQ,
12881325
ObjectIdGetDatum(cfgId));
12891326
ScanKeyInit(&skey[1],
12901327
Anum_pg_ts_config_map_maptokentype,
12911328
BTEqualStrategyNumber, F_INT4EQ,
1292-
Int32GetDatum(tokens[i]));
1329+
Int32GetDatum(ts->num));
12931330

12941331
scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
12951332
NULL, 2, skey);
@@ -1346,9 +1383,11 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
13461383
{
13471384
bool tokmatch = false;
13481385

1349-
for (j = 0; j < ntoken; j++)
1386+
foreach(c, tokens)
13501387
{
1351-
if (cfgmap->maptokentype == tokens[j])
1388+
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
1389+
1390+
if (cfgmap->maptokentype == ts->num)
13521391
{
13531392
tokmatch = true;
13541393
break;
@@ -1401,8 +1440,10 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
14011440
/*
14021441
* Insertion of new entries
14031442
*/
1404-
for (i = 0; i < ntoken; i++)
1443+
foreach(c, tokens)
14051444
{
1445+
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
1446+
14061447
for (j = 0; j < ndict; j++)
14071448
{
14081449
ExecClearTuple(slot[slotCount]);
@@ -1411,7 +1452,7 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
14111452
slot[slotCount]->tts_tupleDescriptor->natts * sizeof(bool));
14121453

14131454
slot[slotCount]->tts_values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgId);
1414-
slot[slotCount]->tts_values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(tokens[i]);
1455+
slot[slotCount]->tts_values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(ts->num);
14151456
slot[slotCount]->tts_values[Anum_pg_ts_config_map_mapseqno - 1] = Int32GetDatum(j + 1);
14161457
slot[slotCount]->tts_values[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictIds[j]);
14171458

@@ -1455,9 +1496,8 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
14551496
ScanKeyData skey[2];
14561497
SysScanDesc scan;
14571498
HeapTuple maptup;
1458-
int i;
14591499
Oid prsId;
1460-
int *tokens;
1500+
List *tokens = NIL;
14611501
ListCell *c;
14621502

14631503
tsform = (Form_pg_ts_config) GETSTRUCT(tup);
@@ -1466,10 +1506,9 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
14661506

14671507
tokens = getTokenTypes(prsId, stmt->tokentype);
14681508

1469-
i = 0;
1470-
foreach(c, stmt->tokentype)
1509+
foreach(c, tokens)
14711510
{
1472-
String *val = lfirst_node(String, c);
1511+
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
14731512
bool found = false;
14741513

14751514
ScanKeyInit(&skey[0],
@@ -1479,7 +1518,7 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
14791518
ScanKeyInit(&skey[1],
14801519
Anum_pg_ts_config_map_maptokentype,
14811520
BTEqualStrategyNumber, F_INT4EQ,
1482-
Int32GetDatum(tokens[i]));
1521+
Int32GetDatum(ts->num));
14831522

14841523
scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
14851524
NULL, 2, skey);
@@ -1499,17 +1538,15 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
14991538
ereport(ERROR,
15001539
(errcode(ERRCODE_UNDEFINED_OBJECT),
15011540
errmsg("mapping for token type \"%s\" does not exist",
1502-
strVal(val))));
1541+
ts->name)));
15031542
}
15041543
else
15051544
{
15061545
ereport(NOTICE,
15071546
(errmsg("mapping for token type \"%s\" does not exist, skipping",
1508-
strVal(val))));
1547+
ts->name)));
15091548
}
15101549
}
1511-
1512-
i++;
15131550
}
15141551

15151552
EventTriggerCollectAlterTSConfig(stmt, cfgId, NULL, 0);

src/test/regress/expected/tsdicts.out

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,3 +687,37 @@ CREATE TEXT SEARCH DICTIONARY tsdict_case
687687
"AffFile" = ispell_sample
688688
);
689689
ERROR: unrecognized Ispell parameter: "DictFile"
690+
-- Test grammar for configurations
691+
CREATE TEXT SEARCH CONFIGURATION dummy_tst (COPY=english);
692+
-- Overriden mapping change with duplicated tokens.
693+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
694+
ALTER MAPPING FOR word, word WITH ispell;
695+
-- Not a token supported by the configuration's parser, fails.
696+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
697+
DROP MAPPING FOR not_a_token, not_a_token;
698+
ERROR: token type "not_a_token" does not exist
699+
-- Not a token supported by the configuration's parser, fails even
700+
-- with IF EXISTS.
701+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
702+
DROP MAPPING IF EXISTS FOR not_a_token, not_a_token;
703+
ERROR: token type "not_a_token" does not exist
704+
-- Token supported by the configuration's parser, succeeds.
705+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
706+
DROP MAPPING FOR word, word;
707+
-- No mapping for token supported by the configuration's parser, fails.
708+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
709+
DROP MAPPING FOR word;
710+
ERROR: mapping for token type "word" does not exist
711+
-- Token supported by the configuration's parser, cannot be found,
712+
-- succeeds with IF EXISTS.
713+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
714+
DROP MAPPING IF EXISTS FOR word, word;
715+
NOTICE: mapping for token type "word" does not exist, skipping
716+
-- Re-add mapping, with duplicated tokens supported by the parser.
717+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
718+
ADD MAPPING FOR word, word WITH ispell;
719+
-- Not a token supported by the configuration's parser, fails.
720+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
721+
ADD MAPPING FOR not_a_token WITH ispell;
722+
ERROR: token type "not_a_token" does not exist
723+
DROP TEXT SEARCH CONFIGURATION dummy_tst;

src/test/regress/sql/tsdicts.sql

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,3 +251,33 @@ CREATE TEXT SEARCH DICTIONARY tsdict_case
251251
"DictFile" = ispell_sample,
252252
"AffFile" = ispell_sample
253253
);
254+
255+
-- Test grammar for configurations
256+
CREATE TEXT SEARCH CONFIGURATION dummy_tst (COPY=english);
257+
-- Overriden mapping change with duplicated tokens.
258+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
259+
ALTER MAPPING FOR word, word WITH ispell;
260+
-- Not a token supported by the configuration's parser, fails.
261+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
262+
DROP MAPPING FOR not_a_token, not_a_token;
263+
-- Not a token supported by the configuration's parser, fails even
264+
-- with IF EXISTS.
265+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
266+
DROP MAPPING IF EXISTS FOR not_a_token, not_a_token;
267+
-- Token supported by the configuration's parser, succeeds.
268+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
269+
DROP MAPPING FOR word, word;
270+
-- No mapping for token supported by the configuration's parser, fails.
271+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
272+
DROP MAPPING FOR word;
273+
-- Token supported by the configuration's parser, cannot be found,
274+
-- succeeds with IF EXISTS.
275+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
276+
DROP MAPPING IF EXISTS FOR word, word;
277+
-- Re-add mapping, with duplicated tokens supported by the parser.
278+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
279+
ADD MAPPING FOR word, word WITH ispell;
280+
-- Not a token supported by the configuration's parser, fails.
281+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
282+
ADD MAPPING FOR not_a_token WITH ispell;
283+
DROP TEXT SEARCH CONFIGURATION dummy_tst;

src/tools/pgindent/typedefs.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2758,6 +2758,7 @@ TSQuerySign
27582758
TSReadPointer
27592759
TSTemplateInfo
27602760
TSTernaryValue
2761+
TSTokenTypeItem
27612762
TSTokenTypeStorage
27622763
TSVector
27632764
TSVectorBuildState

0 commit comments

Comments
 (0)