Skip to content

Commit 45b1a67

Browse files
committed
pg_clean_ascii(): escape bytes rather than lose them
Rather than replace each unprintable byte with a '?' character, replace it with a hex escape instead. The API now allocates a copy rather than modifying the input in place. Author: Jacob Champion <jchampion@timescale.com> Discussion: https://www.postgresql.org/message-id/CAAWbhmgsvHrH9wLU2kYc3pOi1KSenHSLAHBbCVmmddW6-mc_=w@mail.gmail.com
1 parent da5d4ea commit 45b1a67

File tree

4 files changed

+65
-17
lines changed

4 files changed

+65
-17
lines changed

src/backend/postmaster/postmaster.c

+1-5
Original file line numberDiff line numberDiff line change
@@ -2280,11 +2280,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
22802280
*/
22812281
if (strcmp(nameptr, "application_name") == 0)
22822282
{
2283-
char *tmp_app_name = pstrdup(valptr);
2284-
2285-
pg_clean_ascii(tmp_app_name);
2286-
2287-
port->application_name = tmp_app_name;
2283+
port->application_name = pg_clean_ascii(valptr, 0);
22882284
}
22892285
}
22902286
offset = valoffset + strlen(valptr) + 1;

src/backend/utils/misc/guc.c

+20-2
Original file line numberDiff line numberDiff line change
@@ -12921,9 +12921,18 @@ assign_maintenance_io_concurrency(int newval, void *extra)
1292112921
static bool
1292212922
check_application_name(char **newval, void **extra, GucSource source)
1292312923
{
12924+
char *clean;
12925+
1292412926
/* Only allow clean ASCII chars in the application name */
12925-
pg_clean_ascii(*newval);
12927+
clean = pg_clean_ascii(*newval, MCXT_ALLOC_NO_OOM);
12928+
if (!clean)
12929+
return false;
12930+
12931+
clean = guc_strdup(WARNING, clean);
12932+
if (!clean)
12933+
return false;
1292612934

12935+
*newval = clean;
1292712936
return true;
1292812937
}
1292912938

@@ -12937,9 +12946,18 @@ assign_application_name(const char *newval, void *extra)
1293712946
static bool
1293812947
check_cluster_name(char **newval, void **extra, GucSource source)
1293912948
{
12949+
char *clean;
12950+
1294012951
/* Only allow clean ASCII chars in the cluster name */
12941-
pg_clean_ascii(*newval);
12952+
clean = pg_clean_ascii(*newval, MCXT_ALLOC_NO_OOM);
12953+
if (!clean)
12954+
return false;
12955+
12956+
clean = guc_strdup(WARNING, clean);
12957+
if (!clean)
12958+
return false;
1294212959

12960+
*newval = clean;
1294312961
return true;
1294412962
}
1294512963

src/common/string.c

+43-9
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#endif
2323

2424
#include "common/string.h"
25+
#include "lib/stringinfo.h"
2526

2627

2728
/*
@@ -59,9 +60,12 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
5960

6061

6162
/*
62-
* pg_clean_ascii -- Replace any non-ASCII chars with a '?' char
63+
* pg_clean_ascii -- Replace any non-ASCII chars with a "\xXX" string
6364
*
64-
* Modifies the string passed in which must be '\0'-terminated.
65+
* Makes a newly allocated copy of the string passed in, which must be
66+
* '\0'-terminated. In the backend, additional alloc_flags may be provided and
67+
* will be passed as-is to palloc_extended(); in the frontend, alloc_flags is
68+
* ignored and the copy is malloc'd.
6569
*
6670
* This function exists specifically to deal with filtering out
6771
* non-ASCII characters in a few places where the client can provide an almost
@@ -73,22 +77,52 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
7377
* In general, this function should NOT be used- instead, consider how to handle
7478
* the string without needing to filter out the non-ASCII characters.
7579
*
76-
* Ultimately, we'd like to improve the situation to not require stripping out
77-
* all non-ASCII but perform more intelligent filtering which would allow UTF or
80+
* Ultimately, we'd like to improve the situation to not require replacing all
81+
* non-ASCII but perform more intelligent filtering which would allow UTF or
7882
* similar, but it's unclear exactly what we should allow, so stick to ASCII only
7983
* for now.
8084
*/
81-
void
82-
pg_clean_ascii(char *str)
85+
char *
86+
pg_clean_ascii(const char *str, int alloc_flags)
8387
{
84-
/* Only allow clean ASCII chars in the string */
85-
char *p;
88+
size_t dstlen;
89+
char *dst;
90+
const char *p;
91+
size_t i = 0;
92+
93+
/* Worst case, each byte can become four bytes, plus a null terminator. */
94+
dstlen = strlen(str) * 4 + 1;
95+
96+
#ifdef FRONTEND
97+
dst = malloc(dstlen);
98+
#else
99+
dst = palloc_extended(dstlen, alloc_flags);
100+
#endif
101+
102+
if (!dst)
103+
return NULL;
86104

87105
for (p = str; *p != '\0'; p++)
88106
{
107+
108+
/* Only allow clean ASCII chars in the string */
89109
if (*p < 32 || *p > 126)
90-
*p = '?';
110+
{
111+
Assert(i < (dstlen - 3));
112+
snprintf(&dst[i], dstlen - i, "\\x%02x", (unsigned char) *p);
113+
i += 4;
114+
}
115+
else
116+
{
117+
Assert(i < dstlen);
118+
dst[i] = *p;
119+
i++;
120+
}
91121
}
122+
123+
Assert(i < dstlen);
124+
dst[i] = '\0';
125+
return dst;
92126
}
93127

94128

src/include/common/string.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ typedef struct PromptInterruptContext
2424
extern bool pg_str_endswith(const char *str, const char *end);
2525
extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr,
2626
int base);
27-
extern void pg_clean_ascii(char *str);
27+
extern char *pg_clean_ascii(const char *str, int alloc_flags);
2828
extern int pg_strip_crlf(char *str);
2929
extern bool pg_is_ascii(const char *str);
3030

0 commit comments

Comments
 (0)