Skip to content

Commit 0561097

Browse files
committed
Fix various issues with ALTER TEXT SEARCH CONFIGURATION
This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4eb, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
1 parent c944bf2 commit 0561097

File tree

4 files changed

+130
-30
lines changed

4 files changed

+130
-30
lines changed

src/backend/commands/tsearchcmds.c

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@
4747
#include "utils/rel.h"
4848
#include "utils/syscache.h"
4949

50+
/* Single entry of List returned by getTokenTypes() */
51+
typedef struct
52+
{
53+
int num; /* token type number */
54+
char *name; /* token type name */
55+
} TSTokenTypeItem;
5056

5157
static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
5258
HeapTuple tup, Relation relMap);
@@ -1227,22 +1233,45 @@ AlterTSConfiguration(AlterTSConfigurationStmt *stmt)
12271233
}
12281234

12291235
/*
1230-
* Translate a list of token type names to an array of token type numbers
1236+
* Check whether a token type name is a member of a TSTokenTypeItem list.
12311237
*/
1232-
static int *
1238+
static bool
1239+
tstoken_list_member(char *token_name, List *tokens)
1240+
{
1241+
ListCell *c;
1242+
bool found = false;
1243+
1244+
foreach(c, tokens)
1245+
{
1246+
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
1247+
1248+
if (strcmp(token_name, ts->name) == 0)
1249+
{
1250+
found = true;
1251+
break;
1252+
}
1253+
}
1254+
1255+
return found;
1256+
}
1257+
1258+
/*
1259+
* Translate a list of token type names to a list of unique TSTokenTypeItem.
1260+
*
1261+
* Duplicated entries list are removed from tokennames.
1262+
*/
1263+
static List *
12331264
getTokenTypes(Oid prsId, List *tokennames)
12341265
{
12351266
TSParserCacheEntry *prs = lookup_ts_parser_cache(prsId);
12361267
LexDescr *list;
1237-
int *res,
1238-
i,
1239-
ntoken;
1268+
List *result = NIL;
1269+
int ntoken;
12401270
ListCell *tn;
12411271

12421272
ntoken = list_length(tokennames);
12431273
if (ntoken == 0)
1244-
return NULL;
1245-
res = (int *) palloc(sizeof(int) * ntoken);
1274+
return NIL;
12461275

12471276
if (!OidIsValid(prs->lextypeOid))
12481277
elog(ERROR, "method lextype isn't defined for text search parser %u",
@@ -1252,19 +1281,26 @@ getTokenTypes(Oid prsId, List *tokennames)
12521281
list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
12531282
(Datum) 0));
12541283

1255-
i = 0;
12561284
foreach(tn, tokennames)
12571285
{
12581286
Value *val = (Value *) lfirst(tn);
12591287
bool found = false;
12601288
int j;
12611289

1290+
/* Skip if this token is already in the result */
1291+
if (tstoken_list_member(strVal(val), result))
1292+
continue;
1293+
12621294
j = 0;
12631295
while (list && list[j].lexid)
12641296
{
12651297
if (strcmp(strVal(val), list[j].alias) == 0)
12661298
{
1267-
res[i] = list[j].lexid;
1299+
TSTokenTypeItem *ts = (TSTokenTypeItem *) palloc0(sizeof(TSTokenTypeItem));
1300+
1301+
ts->num = list[j].lexid;
1302+
ts->name = pstrdup(strVal(val));
1303+
result = lappend(result, ts);
12681304
found = true;
12691305
break;
12701306
}
@@ -1275,10 +1311,9 @@ getTokenTypes(Oid prsId, List *tokennames)
12751311
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
12761312
errmsg("token type \"%s\" does not exist",
12771313
strVal(val))));
1278-
i++;
12791314
}
12801315

1281-
return res;
1316+
return result;
12821317
}
12831318

12841319
/*
@@ -1296,8 +1331,7 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
12961331
int i;
12971332
int j;
12981333
Oid prsId;
1299-
int *tokens,
1300-
ntoken;
1334+
List *tokens = NIL;
13011335
Oid *dictIds;
13021336
int ndict;
13031337
ListCell *c;
@@ -1307,23 +1341,24 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
13071341
prsId = tsform->cfgparser;
13081342

13091343
tokens = getTokenTypes(prsId, stmt->tokentype);
1310-
ntoken = list_length(stmt->tokentype);
13111344

13121345
if (stmt->override)
13131346
{
13141347
/*
13151348
* delete maps for tokens if they exist and command was ALTER
13161349
*/
1317-
for (i = 0; i < ntoken; i++)
1350+
foreach(c, tokens)
13181351
{
1352+
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
1353+
13191354
ScanKeyInit(&skey[0],
13201355
Anum_pg_ts_config_map_mapcfg,
13211356
BTEqualStrategyNumber, F_OIDEQ,
13221357
ObjectIdGetDatum(cfgId));
13231358
ScanKeyInit(&skey[1],
13241359
Anum_pg_ts_config_map_maptokentype,
13251360
BTEqualStrategyNumber, F_INT4EQ,
1326-
Int32GetDatum(tokens[i]));
1361+
Int32GetDatum(ts->num));
13271362

13281363
scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
13291364
NULL, 2, skey);
@@ -1378,9 +1413,11 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
13781413
{
13791414
bool tokmatch = false;
13801415

1381-
for (j = 0; j < ntoken; j++)
1416+
foreach(c, tokens)
13821417
{
1383-
if (cfgmap->maptokentype == tokens[j])
1418+
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
1419+
1420+
if (cfgmap->maptokentype == ts->num)
13841421
{
13851422
tokmatch = true;
13861423
break;
@@ -1421,16 +1458,18 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
14211458
/*
14221459
* Insertion of new entries
14231460
*/
1424-
for (i = 0; i < ntoken; i++)
1461+
foreach(c, tokens)
14251462
{
1463+
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
1464+
14261465
for (j = 0; j < ndict; j++)
14271466
{
14281467
Datum values[Natts_pg_ts_config_map];
14291468
bool nulls[Natts_pg_ts_config_map];
14301469

14311470
memset(nulls, false, sizeof(nulls));
14321471
values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgId);
1433-
values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(tokens[i]);
1472+
values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(ts->num);
14341473
values[Anum_pg_ts_config_map_mapseqno - 1] = Int32GetDatum(j + 1);
14351474
values[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictIds[j]);
14361475

@@ -1457,9 +1496,8 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
14571496
ScanKeyData skey[2];
14581497
SysScanDesc scan;
14591498
HeapTuple maptup;
1460-
int i;
14611499
Oid prsId;
1462-
int *tokens;
1500+
List *tokens = NIL;
14631501
ListCell *c;
14641502

14651503
tsform = (Form_pg_ts_config) GETSTRUCT(tup);
@@ -1468,10 +1506,9 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
14681506

14691507
tokens = getTokenTypes(prsId, stmt->tokentype);
14701508

1471-
i = 0;
1472-
foreach(c, stmt->tokentype)
1509+
foreach(c, tokens)
14731510
{
1474-
Value *val = (Value *) lfirst(c);
1511+
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
14751512
bool found = false;
14761513

14771514
ScanKeyInit(&skey[0],
@@ -1481,7 +1518,7 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
14811518
ScanKeyInit(&skey[1],
14821519
Anum_pg_ts_config_map_maptokentype,
14831520
BTEqualStrategyNumber, F_INT4EQ,
1484-
Int32GetDatum(tokens[i]));
1521+
Int32GetDatum(ts->num));
14851522

14861523
scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
14871524
NULL, 2, skey);
@@ -1501,17 +1538,15 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
15011538
ereport(ERROR,
15021539
(errcode(ERRCODE_UNDEFINED_OBJECT),
15031540
errmsg("mapping for token type \"%s\" does not exist",
1504-
strVal(val))));
1541+
ts->name)));
15051542
}
15061543
else
15071544
{
15081545
ereport(NOTICE,
15091546
(errmsg("mapping for token type \"%s\" does not exist, skipping",
1510-
strVal(val))));
1547+
ts->name)));
15111548
}
15121549
}
1513-
1514-
i++;
15151550
}
15161551

15171552
EventTriggerCollectAlterTSConfig(stmt, cfgId, NULL, 0);

src/test/regress/expected/tsdicts.out

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,3 +652,37 @@ CREATE TEXT SEARCH DICTIONARY tsdict_case
652652
"AffFile" = ispell_sample
653653
);
654654
ERROR: unrecognized Ispell parameter: "DictFile"
655+
-- Test grammar for configurations
656+
CREATE TEXT SEARCH CONFIGURATION dummy_tst (COPY=english);
657+
-- Overriden mapping change with duplicated tokens.
658+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
659+
ALTER MAPPING FOR word, word WITH ispell;
660+
-- Not a token supported by the configuration's parser, fails.
661+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
662+
DROP MAPPING FOR not_a_token, not_a_token;
663+
ERROR: token type "not_a_token" does not exist
664+
-- Not a token supported by the configuration's parser, fails even
665+
-- with IF EXISTS.
666+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
667+
DROP MAPPING IF EXISTS FOR not_a_token, not_a_token;
668+
ERROR: token type "not_a_token" does not exist
669+
-- Token supported by the configuration's parser, succeeds.
670+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
671+
DROP MAPPING FOR word, word;
672+
-- No mapping for token supported by the configuration's parser, fails.
673+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
674+
DROP MAPPING FOR word;
675+
ERROR: mapping for token type "word" does not exist
676+
-- Token supported by the configuration's parser, cannot be found,
677+
-- succeeds with IF EXISTS.
678+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
679+
DROP MAPPING IF EXISTS FOR word, word;
680+
NOTICE: mapping for token type "word" does not exist, skipping
681+
-- Re-add mapping, with duplicated tokens supported by the parser.
682+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
683+
ADD MAPPING FOR word, word WITH ispell;
684+
-- Not a token supported by the configuration's parser, fails.
685+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
686+
ADD MAPPING FOR not_a_token WITH ispell;
687+
ERROR: token type "not_a_token" does not exist
688+
DROP TEXT SEARCH CONFIGURATION dummy_tst;

src/test/regress/sql/tsdicts.sql

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,3 +238,33 @@ CREATE TEXT SEARCH DICTIONARY tsdict_case
238238
"DictFile" = ispell_sample,
239239
"AffFile" = ispell_sample
240240
);
241+
242+
-- Test grammar for configurations
243+
CREATE TEXT SEARCH CONFIGURATION dummy_tst (COPY=english);
244+
-- Overriden mapping change with duplicated tokens.
245+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
246+
ALTER MAPPING FOR word, word WITH ispell;
247+
-- Not a token supported by the configuration's parser, fails.
248+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
249+
DROP MAPPING FOR not_a_token, not_a_token;
250+
-- Not a token supported by the configuration's parser, fails even
251+
-- with IF EXISTS.
252+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
253+
DROP MAPPING IF EXISTS FOR not_a_token, not_a_token;
254+
-- Token supported by the configuration's parser, succeeds.
255+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
256+
DROP MAPPING FOR word, word;
257+
-- No mapping for token supported by the configuration's parser, fails.
258+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
259+
DROP MAPPING FOR word;
260+
-- Token supported by the configuration's parser, cannot be found,
261+
-- succeeds with IF EXISTS.
262+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
263+
DROP MAPPING IF EXISTS FOR word, word;
264+
-- Re-add mapping, with duplicated tokens supported by the parser.
265+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
266+
ADD MAPPING FOR word, word WITH ispell;
267+
-- Not a token supported by the configuration's parser, fails.
268+
ALTER TEXT SEARCH CONFIGURATION dummy_tst
269+
ADD MAPPING FOR not_a_token WITH ispell;
270+
DROP TEXT SEARCH CONFIGURATION dummy_tst;

src/tools/pgindent/typedefs.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2380,6 +2380,7 @@ TSQueryParserState
23802380
TSQuerySign
23812381
TSReadPointer
23822382
TSTemplateInfo
2383+
TSTokenTypeItem
23832384
TSTokenTypeStorage
23842385
TSVector
23852386
TSVectorBuildState

0 commit comments

Comments
 (0)