Skip to content

Commit 0b713b9

Browse files
committed
Avoid breaking SJIS encoding while de-backslashing Windows paths.
When running on Windows, canonicalize_path() converts '\' to '/' to prevent confusing the Windows command processor. It was doing that in a non-encoding-aware fashion; but in SJIS there are valid two-byte characters whose second byte matches '\'. So encoding corruption ensues if such a character is used in the path. We can fairly easily fix this if we know which encoding is in use, but a lot of our utilities don't have much of a clue about that. After some discussion we decided we'd settle for fixing this only in psql, and assuming that its value of client_encoding matches what the user is typing. It seems hopeless to get the server to deal with the problematic characters in database path names, so we'll just declare that case to be unsupported. That means nothing need be done in the server, nor in utility programs whose only contact with file path names is for database paths. But psql frequently deals with client-side file paths, so it'd be good if it didn't mess those up. Bug: #18735 Reported-by: Koichi Suzuki <koichi.suzuki@enterprisedb.com> Author: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Koichi Suzuki <koichi.suzuki@enterprisedb.com> Discussion: https://postgr.es/m/18735-4acdb3998bb9f2b1@postgresql.org Backpatch-through: 13
1 parent 6555fe1 commit 0b713b9

File tree

4 files changed

+97
-19
lines changed

4 files changed

+97
-19
lines changed

src/bin/psql/command.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,7 @@ exec_command_edit(PsqlScanState scan_state, bool active_branch,
11101110
expand_tilde(&fname);
11111111
if (fname)
11121112
{
1113-
canonicalize_path(fname);
1113+
canonicalize_path_enc(fname, pset.encoding);
11141114
/* Always clear buffer if the file isn't modified */
11151115
discard_on_quit = true;
11161116
}
@@ -2694,7 +2694,7 @@ exec_command_write(PsqlScanState scan_state, bool active_branch,
26942694
}
26952695
else
26962696
{
2697-
canonicalize_path(fname);
2697+
canonicalize_path_enc(fname, pset.encoding);
26982698
fd = fopen(fname, "w");
26992699
}
27002700
if (!fd)
@@ -4298,7 +4298,7 @@ process_file(char *filename, bool use_relative_path)
42984298
}
42994299
else if (strcmp(filename, "-") != 0)
43004300
{
4301-
canonicalize_path(filename);
4301+
canonicalize_path_enc(filename, pset.encoding);
43024302

43034303
/*
43044304
* If we were asked to resolve the pathname relative to the location
@@ -4312,7 +4312,7 @@ process_file(char *filename, bool use_relative_path)
43124312
strlcpy(relpath, pset.inputfile, sizeof(relpath));
43134313
get_parent_directory(relpath);
43144314
join_path_components(relpath, relpath, filename);
4315-
canonicalize_path(relpath);
4315+
canonicalize_path_enc(relpath, pset.encoding);
43164316

43174317
filename = relpath;
43184318
}

src/bin/psql/copy.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ do_copy(const char *args)
280280

281281
/* prepare to read or write the target file */
282282
if (options->file && !options->program)
283-
canonicalize_path(options->file);
283+
canonicalize_path_enc(options->file, pset.encoding);
284284

285285
if (options->from)
286286
{

src/include/port.h

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ extern char *first_path_var_separator(const char *pathlist);
5353
extern void join_path_components(char *ret_path,
5454
const char *head, const char *tail);
5555
extern void canonicalize_path(char *path);
56+
extern void canonicalize_path_enc(char *path, int encoding);
5657
extern void make_native_path(char *filename);
5758
extern void cleanup_path(char *path);
5859
extern bool path_contains_parent_reference(const char *path);

src/port/path.c

+91-14
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <unistd.h>
3636
#endif
3737

38+
#include "mb/pg_wchar.h"
3839
#include "pg_config_paths.h"
3940

4041

@@ -44,6 +45,10 @@
4445
#define IS_PATH_VAR_SEP(ch) ((ch) == ';')
4546
#endif
4647

48+
#ifdef WIN32
49+
static void debackslash_path(char *path, int encoding);
50+
static int pg_sjis_mblen(const unsigned char *s);
51+
#endif
4752
static void make_relative_path(char *ret_path, const char *target_path,
4853
const char *bin_path, const char *my_exec_path);
4954
static char *trim_directory(char *path);
@@ -148,10 +153,73 @@ last_dir_separator(const char *filename)
148153
}
149154

150155

156+
#ifdef WIN32
157+
158+
/*
159+
* Convert '\' to '/' within the given path, assuming the path
160+
* is in the specified encoding.
161+
*/
162+
static void
163+
debackslash_path(char *path, int encoding)
164+
{
165+
char *p;
166+
167+
/*
168+
* Of the supported encodings, only Shift-JIS has multibyte characters
169+
* that can include a byte equal to '\' (0x5C). So rather than implement
170+
* a fully encoding-aware conversion, we special-case SJIS. (Invoking the
171+
* general encoding-aware logic in wchar.c is impractical here for
172+
* assorted reasons.)
173+
*/
174+
if (encoding == PG_SJIS)
175+
{
176+
for (p = path; *p; p += pg_sjis_mblen((const unsigned char *) p))
177+
{
178+
if (*p == '\\')
179+
*p = '/';
180+
}
181+
}
182+
else
183+
{
184+
for (p = path; *p; p++)
185+
{
186+
if (*p == '\\')
187+
*p = '/';
188+
}
189+
}
190+
}
191+
151192
/*
152-
* make_native_path - on WIN32, change / to \ in the path
193+
* SJIS character length
153194
*
154-
* This effectively undoes canonicalize_path.
195+
* This must match the behavior of
196+
* pg_encoding_mblen_bounded(PG_SJIS, s)
197+
* In particular, unlike the version of pg_sjis_mblen in src/common/wchar.c,
198+
* do not allow caller to accidentally step past end-of-string.
199+
*/
200+
static int
201+
pg_sjis_mblen(const unsigned char *s)
202+
{
203+
int len;
204+
205+
if (*s >= 0xa1 && *s <= 0xdf)
206+
len = 1; /* 1 byte kana? */
207+
else if (IS_HIGHBIT_SET(*s) && s[1] != '\0')
208+
len = 2; /* kanji? */
209+
else
210+
len = 1; /* should be ASCII */
211+
return len;
212+
}
213+
214+
#endif /* WIN32 */
215+
216+
217+
/*
218+
* make_native_path - on WIN32, change '/' to '\' in the path
219+
*
220+
* This reverses the '\'-to-'/' transformation of debackslash_path.
221+
* We need not worry about encodings here, since '/' does not appear
222+
* as a byte of a multibyte character in any supported encoding.
155223
*
156224
* This is required because WIN32 COPY is an internal CMD.EXE
157225
* command and doesn't process forward slashes in the same way
@@ -181,13 +249,14 @@ make_native_path(char *filename)
181249
* on Windows. We need them to use filenames without spaces, for which a
182250
* short filename is the safest equivalent, eg:
183251
* C:/Progra~1/
252+
*
253+
* Presently, this is only used on paths that we can assume are in a
254+
* server-safe encoding, so there's no need for an encoding-aware variant.
184255
*/
185256
void
186257
cleanup_path(char *path)
187258
{
188259
#ifdef WIN32
189-
char *ptr;
190-
191260
/*
192261
* GetShortPathName() will fail if the path does not exist, or short names
193262
* are disabled on this file system. In both cases, we just return the
@@ -197,11 +266,8 @@ cleanup_path(char *path)
197266
GetShortPathName(path, path, MAXPGPATH - 1);
198267

199268
/* Replace '\' with '/' */
200-
for (ptr = path; *ptr; ptr++)
201-
{
202-
if (*ptr == '\\')
203-
*ptr = '/';
204-
}
269+
/* All server-safe encodings are alike here, so just use PG_SQL_ASCII */
270+
debackslash_path(path, PG_SQL_ASCII);
205271
#endif
206272
}
207273

@@ -252,16 +318,29 @@ typedef enum
252318
} canonicalize_state;
253319

254320
/*
321+
* canonicalize_path()
322+
*
255323
* Clean up path by:
256324
* o make Win32 path use Unix slashes
257325
* o remove trailing quote on Win32
258326
* o remove trailing slash
259327
* o remove duplicate (adjacent) separators
260328
* o remove '.' (unless path reduces to only '.')
261329
* o process '..' ourselves, removing it if possible
330+
* Modifies path in-place.
331+
*
332+
* This comes in two variants: encoding-aware and not. The non-aware version
333+
* is only safe to use on strings that are in a server-safe encoding.
262334
*/
263335
void
264336
canonicalize_path(char *path)
337+
{
338+
/* All server-safe encodings are alike here, so just use PG_SQL_ASCII */
339+
canonicalize_path_enc(path, PG_SQL_ASCII);
340+
}
341+
342+
void
343+
canonicalize_path_enc(char *path, int encoding)
265344
{
266345
char *p,
267346
*to_p;
@@ -277,17 +356,15 @@ canonicalize_path(char *path)
277356
/*
278357
* The Windows command processor will accept suitably quoted paths with
279358
* forward slashes, but barfs badly with mixed forward and back slashes.
359+
* Hence, start by converting all back slashes to forward slashes.
280360
*/
281-
for (p = path; *p; p++)
282-
{
283-
if (*p == '\\')
284-
*p = '/';
285-
}
361+
debackslash_path(path, encoding);
286362

287363
/*
288364
* In Win32, if you do: prog.exe "a b" "\c\d\" the system will pass \c\d"
289365
* as argv[2], so trim off trailing quote.
290366
*/
367+
p = path + strlen(path);
291368
if (p > path && *(p - 1) == '"')
292369
*(p - 1) = '/';
293370
#endif

0 commit comments

Comments
 (0)