Skip to content

Commit d01f03a

Browse files
committed
Preserve integer and float values accurately in (de)serialize_deflist.
Previously, this code just smashed all types of DefElem values to strings, cavalierly reasoning that nobody would care. But in point of fact, most of the defGetFoo functions do distinguish among different input syntaxes; for instance defGetBoolean will accept 1 as an integer but not "1" as a string. This led to CREATE/ALTER TEXT SEARCH DICTIONARY accepting 0 and 1 as values for boolean dictionary properties, only to have the dictionary fail at runtime. We can upgrade this behavior by teaching serialize_deflist that it does not need to quote T_Integer or T_Float nodes' values on output, and then teaching deserialize_deflist to restore unquoted integer or float values as the appropriate node type. This should not break anything using pg_ts_dict.dictinitoption, since that field is just defined as being something valid to include in CREATE TEXT SEARCH DICTIONARY. deserialize_deflist is also used to parse the options arguments for the ts_headline family of functions, but so far as I can see this won't cause any problems there either: the only consumer of that output is prsd_headline which always uses defGetString. (Really that's a bad idea, but I won't risk changing it here.) This is surely a bug fix, but given the lack of field complaints I don't think it's necessary to back-patch. Discussion: https://postgr.es/m/CAMkU=1xRcs_BUPzR0+V3WndaCAv0E_m3h6aUEJ8NF-sY1nnHsw@mail.gmail.com
1 parent 40b3e2c commit d01f03a

File tree

5 files changed

+154
-23
lines changed

5 files changed

+154
-23
lines changed

contrib/dict_int/expected/dict_int.out

+28-1
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,10 @@ select ts_lexize('intdict', '314532610153');
300300
{314532}
301301
(1 row)
302302

303-
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648);
303+
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648); -- fail
304304
ERROR: maxlen value has to be >= 1
305+
-- This ought to fail, perhaps, but historically it has not:
306+
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 6.7);
305307
select ts_lexize('intdict', '-40865854');
306308
ts_lexize
307309
-----------
@@ -327,3 +329,28 @@ select ts_lexize('intdict', '+40865854');
327329
{408658}
328330
(1 row)
329331

332+
ALTER TEXT SEARCH DICTIONARY intdict (REJECTLONG = 1);
333+
select ts_lexize('intdict', '-40865854');
334+
ts_lexize
335+
-----------
336+
{}
337+
(1 row)
338+
339+
select ts_lexize('intdict', '-4086585');
340+
ts_lexize
341+
-----------
342+
{}
343+
(1 row)
344+
345+
select ts_lexize('intdict', '-408658');
346+
ts_lexize
347+
-----------
348+
{408658}
349+
(1 row)
350+
351+
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'intdict';
352+
dictinitoption
353+
-----------------------------------------------
354+
maxlen = 6.7, absval = 'true', rejectlong = 1
355+
(1 row)
356+

contrib/dict_int/sql/dict_int.sql

+9-1
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,18 @@ select ts_lexize('intdict', '313425');
5252
select ts_lexize('intdict', '641439323669');
5353
select ts_lexize('intdict', '314532610153');
5454

55-
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648);
55+
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648); -- fail
56+
-- This ought to fail, perhaps, but historically it has not:
57+
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 6.7);
5658

5759
select ts_lexize('intdict', '-40865854');
5860
select ts_lexize('intdict', '+40865854');
5961
ALTER TEXT SEARCH DICTIONARY intdict (ABSVAL = true);
6062
select ts_lexize('intdict', '-40865854');
6163
select ts_lexize('intdict', '+40865854');
64+
ALTER TEXT SEARCH DICTIONARY intdict (REJECTLONG = 1);
65+
select ts_lexize('intdict', '-40865854');
66+
select ts_lexize('intdict', '-4086585');
67+
select ts_lexize('intdict', '-408658');
68+
69+
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'intdict';

src/backend/commands/tsearchcmds.c

+69-21
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "commands/alter.h"
3737
#include "commands/defrem.h"
3838
#include "commands/event_trigger.h"
39+
#include "common/string.h"
3940
#include "miscadmin.h"
4041
#include "nodes/makefuncs.h"
4142
#include "parser/parse_func.h"
@@ -52,6 +53,8 @@ static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
5253
HeapTuple tup, Relation relMap);
5354
static void DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
5455
HeapTuple tup, Relation relMap);
56+
static DefElem *buildDefItem(const char *name, const char *val,
57+
bool was_quoted);
5558

5659

5760
/* --------------------- TS Parser commands ------------------------ */
@@ -1519,9 +1522,6 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
15191522
* For the convenience of pg_dump, the output is formatted exactly as it
15201523
* would need to appear in CREATE TEXT SEARCH DICTIONARY to reproduce the
15211524
* same options.
1522-
*
1523-
* Note that we assume that only the textual representation of an option's
1524-
* value is interesting --- hence, non-string DefElems get forced to strings.
15251525
*/
15261526
text *
15271527
serialize_deflist(List *deflist)
@@ -1539,19 +1539,30 @@ serialize_deflist(List *deflist)
15391539

15401540
appendStringInfo(&buf, "%s = ",
15411541
quote_identifier(defel->defname));
1542-
/* If backslashes appear, force E syntax to determine their handling */
1543-
if (strchr(val, '\\'))
1544-
appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
1545-
appendStringInfoChar(&buf, '\'');
1546-
while (*val)
1542+
1543+
/*
1544+
* If the value is a T_Integer or T_Float, emit it without quotes,
1545+
* otherwise with quotes. This is essential to allow correct
1546+
* reconstruction of the node type as well as the value.
1547+
*/
1548+
if (IsA(defel->arg, Integer) || IsA(defel->arg, Float))
1549+
appendStringInfoString(&buf, val);
1550+
else
15471551
{
1548-
char ch = *val++;
1552+
/* If backslashes appear, force E syntax to quote them safely */
1553+
if (strchr(val, '\\'))
1554+
appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
1555+
appendStringInfoChar(&buf, '\'');
1556+
while (*val)
1557+
{
1558+
char ch = *val++;
15491559

1550-
if (SQL_STR_DOUBLE(ch, true))
1560+
if (SQL_STR_DOUBLE(ch, true))
1561+
appendStringInfoChar(&buf, ch);
15511562
appendStringInfoChar(&buf, ch);
1552-
appendStringInfoChar(&buf, ch);
1563+
}
1564+
appendStringInfoChar(&buf, '\'');
15531565
}
1554-
appendStringInfoChar(&buf, '\'');
15551566
if (lnext(deflist, l) != NULL)
15561567
appendStringInfoString(&buf, ", ");
15571568
}
@@ -1566,7 +1577,7 @@ serialize_deflist(List *deflist)
15661577
*
15671578
* This is also used for prsheadline options, so for backward compatibility
15681579
* we need to accept a few things serialize_deflist() will never emit:
1569-
* in particular, unquoted and double-quoted values.
1580+
* in particular, unquoted and double-quoted strings.
15701581
*/
15711582
List *
15721583
deserialize_deflist(Datum txt)
@@ -1694,8 +1705,9 @@ deserialize_deflist(Datum txt)
16941705
{
16951706
*wsptr++ = '\0';
16961707
result = lappend(result,
1697-
makeDefElem(pstrdup(workspace),
1698-
(Node *) makeString(pstrdup(startvalue)), -1));
1708+
buildDefItem(workspace,
1709+
startvalue,
1710+
true));
16991711
state = CS_WAITKEY;
17001712
}
17011713
}
@@ -1726,8 +1738,9 @@ deserialize_deflist(Datum txt)
17261738
{
17271739
*wsptr++ = '\0';
17281740
result = lappend(result,
1729-
makeDefElem(pstrdup(workspace),
1730-
(Node *) makeString(pstrdup(startvalue)), -1));
1741+
buildDefItem(workspace,
1742+
startvalue,
1743+
true));
17311744
state = CS_WAITKEY;
17321745
}
17331746
}
@@ -1741,8 +1754,9 @@ deserialize_deflist(Datum txt)
17411754
{
17421755
*wsptr++ = '\0';
17431756
result = lappend(result,
1744-
makeDefElem(pstrdup(workspace),
1745-
(Node *) makeString(pstrdup(startvalue)), -1));
1757+
buildDefItem(workspace,
1758+
startvalue,
1759+
false));
17461760
state = CS_WAITKEY;
17471761
}
17481762
else
@@ -1760,8 +1774,9 @@ deserialize_deflist(Datum txt)
17601774
{
17611775
*wsptr++ = '\0';
17621776
result = lappend(result,
1763-
makeDefElem(pstrdup(workspace),
1764-
(Node *) makeString(pstrdup(startvalue)), -1));
1777+
buildDefItem(workspace,
1778+
startvalue,
1779+
false));
17651780
}
17661781
else if (state != CS_WAITKEY)
17671782
ereport(ERROR,
@@ -1773,3 +1788,36 @@ deserialize_deflist(Datum txt)
17731788

17741789
return result;
17751790
}
1791+
1792+
/*
1793+
* Build one DefElem for deserialize_deflist
1794+
*/
1795+
static DefElem *
1796+
buildDefItem(const char *name, const char *val, bool was_quoted)
1797+
{
1798+
/* If input was quoted, always emit as string */
1799+
if (!was_quoted && val[0] != '\0')
1800+
{
1801+
int v;
1802+
char *endptr;
1803+
1804+
/* Try to parse as an integer */
1805+
errno = 0;
1806+
v = strtoint(val, &endptr, 10);
1807+
if (errno == 0 && *endptr == '\0')
1808+
return makeDefElem(pstrdup(name),
1809+
(Node *) makeInteger(v),
1810+
-1);
1811+
/* Nope, how about as a float? */
1812+
errno = 0;
1813+
(void) strtod(val, &endptr);
1814+
if (errno == 0 && *endptr == '\0')
1815+
return makeDefElem(pstrdup(name),
1816+
(Node *) makeFloat(pstrdup(val)),
1817+
-1);
1818+
}
1819+
/* Just make it a string */
1820+
return makeDefElem(pstrdup(name),
1821+
(Node *) makeString(pstrdup(val)),
1822+
-1);
1823+
}

src/test/regress/expected/tsdicts.out

+35
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,41 @@ SELECT ts_lexize('synonym', 'indices');
470470
{index}
471471
(1 row)
472472

473+
-- test altering boolean parameters
474+
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
475+
dictinitoption
476+
-----------------------------
477+
synonyms = 'synonym_sample'
478+
(1 row)
479+
480+
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 1);
481+
SELECT ts_lexize('synonym', 'PoStGrEs');
482+
ts_lexize
483+
-----------
484+
485+
(1 row)
486+
487+
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
488+
dictinitoption
489+
------------------------------------------------
490+
synonyms = 'synonym_sample', casesensitive = 1
491+
(1 row)
492+
493+
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 2); -- fail
494+
ERROR: casesensitive requires a Boolean value
495+
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = off);
496+
SELECT ts_lexize('synonym', 'PoStGrEs');
497+
ts_lexize
498+
-----------
499+
{pgsql}
500+
(1 row)
501+
502+
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
503+
dictinitoption
504+
----------------------------------------------------
505+
synonyms = 'synonym_sample', casesensitive = 'off'
506+
(1 row)
507+
473508
-- Create and simple test thesaurus dictionary
474509
-- More tests in configuration checks because ts_lexize()
475510
-- cannot pass more than one word to thesaurus.

src/test/regress/sql/tsdicts.sql

+13
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,19 @@ SELECT ts_lexize('synonym', 'PoStGrEs');
148148
SELECT ts_lexize('synonym', 'Gogle');
149149
SELECT ts_lexize('synonym', 'indices');
150150

151+
-- test altering boolean parameters
152+
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
153+
154+
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 1);
155+
SELECT ts_lexize('synonym', 'PoStGrEs');
156+
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
157+
158+
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 2); -- fail
159+
160+
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = off);
161+
SELECT ts_lexize('synonym', 'PoStGrEs');
162+
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
163+
151164
-- Create and simple test thesaurus dictionary
152165
-- More tests in configuration checks because ts_lexize()
153166
-- cannot pass more than one word to thesaurus.

0 commit comments

Comments
 (0)