@@ -180,40 +180,25 @@ fmtIdEnc(const char *rawid, int encoding)
180
180
/* Slow path for possible multibyte characters */
181
181
charlen = pg_encoding_mblen (encoding , cp );
182
182
183
- if (remaining < charlen )
184
- {
185
- /*
186
- * If the character is longer than the available input,
187
- * replace the string with an invalid sequence. The invalid
188
- * sequence ensures that the escaped string will trigger an
189
- * error on the server-side, even if we can't directly report
190
- * an error here.
191
- */
192
- enlargePQExpBuffer (id_return , 2 );
193
- pg_encoding_set_invalid (encoding ,
194
- id_return -> data + id_return -> len );
195
- id_return -> len += 2 ;
196
- id_return -> data [id_return -> len ] = '\0' ;
197
-
198
- /* there's no more input data, so we can stop */
199
- break ;
200
- }
201
- else if (pg_encoding_verifymbchar (encoding , cp , charlen ) == -1 )
183
+ if (remaining < charlen ||
184
+ pg_encoding_verifymbchar (encoding , cp , charlen ) == -1 )
202
185
{
203
186
/*
204
187
* Multibyte character is invalid. It's important to verify
205
- * that as invalid multi-byte characters could e.g. be used to
188
+ * that as invalid multibyte characters could e.g. be used to
206
189
* "skip" over quote characters, e.g. when parsing
207
190
* character-by-character.
208
191
*
209
- * Replace the bytes corresponding to the invalid character
210
- * with an invalid sequence, for the same reason as above.
192
+ * Replace the character's first byte with an invalid
193
+ * sequence. The invalid sequence ensures that the escaped
194
+ * string will trigger an error on the server-side, even if we
195
+ * can't directly report an error here.
211
196
*
212
197
* It would be a bit faster to verify the whole string the
213
198
* first time we encounter a set highbit, but this way we can
214
- * replace just the invalid characters , which probably makes
215
- * it easier for users to find the invalidly encoded portion
216
- * of a larger string.
199
+ * replace just the invalid data , which probably makes it
200
+ * easier for users to find the invalidly encoded portion of a
201
+ * larger string.
217
202
*/
218
203
enlargePQExpBuffer (id_return , 2 );
219
204
pg_encoding_set_invalid (encoding ,
@@ -222,11 +207,13 @@ fmtIdEnc(const char *rawid, int encoding)
222
207
id_return -> data [id_return -> len ] = '\0' ;
223
208
224
209
/*
225
- * Copy the rest of the string after the invalid multi-byte
226
- * character.
210
+ * Handle the following bytes as if this byte didn't exist.
211
+ * That's safer in case the subsequent bytes contain
212
+ * characters that are significant for the caller (e.g. '>' in
213
+ * html).
227
214
*/
228
- remaining -= charlen ;
229
- cp += charlen ;
215
+ remaining -- ;
216
+ cp ++ ;
230
217
}
231
218
else
232
219
{
@@ -395,49 +382,39 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
395
382
/* Slow path for possible multibyte characters */
396
383
charlen = PQmblen (source , encoding );
397
384
398
- if (remaining < charlen )
399
- {
400
- /*
401
- * If the character is longer than the available input, replace
402
- * the string with an invalid sequence. The invalid sequence
403
- * ensures that the escaped string will trigger an error on the
404
- * server-side, even if we can't directly report an error here.
405
- *
406
- * We know there's enough space for the invalid sequence because
407
- * the "target" buffer is 2 * length + 2 long, and at worst we're
408
- * replacing a single input byte with two invalid bytes.
409
- */
410
- pg_encoding_set_invalid (encoding , target );
411
- target += 2 ;
412
-
413
- /* there's no more valid input data, so we can stop */
414
- break ;
415
- }
416
- else if (pg_encoding_verifymbchar (encoding , source , charlen ) == -1 )
385
+ if (remaining < charlen ||
386
+ pg_encoding_verifymbchar (encoding , source , charlen ) == -1 )
417
387
{
418
388
/*
419
389
* Multibyte character is invalid. It's important to verify that
420
- * as invalid multi-byte characters could e.g. be used to "skip"
390
+ * as invalid multibyte characters could e.g. be used to "skip"
421
391
* over quote characters, e.g. when parsing
422
392
* character-by-character.
423
393
*
424
- * Replace the bytes corresponding to the invalid character with
425
- * an invalid sequence, for the same reason as above.
394
+ * Replace the character's first byte with an invalid sequence.
395
+ * The invalid sequence ensures that the escaped string will
396
+ * trigger an error on the server-side, even if we can't directly
397
+ * report an error here.
398
+ *
399
+ * We know there's enough space for the invalid sequence because
400
+ * the "target" buffer is 2 * length + 2 long, and at worst we're
401
+ * replacing a single input byte with two invalid bytes.
426
402
*
427
403
* It would be a bit faster to verify the whole string the first
428
404
* time we encounter a set highbit, but this way we can replace
429
- * just the invalid characters , which probably makes it easier for
430
- * users to find the invalidly encoded portion of a larger string.
405
+ * just the invalid data , which probably makes it easier for users
406
+ * to find the invalidly encoded portion of a larger string.
431
407
*/
432
408
pg_encoding_set_invalid (encoding , target );
433
409
target += 2 ;
434
- remaining -= charlen ;
435
410
436
411
/*
437
- * Copy the rest of the string after the invalid multi-byte
438
- * character.
412
+ * Handle the following bytes as if this byte didn't exist. That's
413
+ * safer in case the subsequent bytes contain important characters
414
+ * for the caller (e.g. '>' in html).
439
415
*/
440
- source += charlen ;
416
+ source ++ ;
417
+ remaining -- ;
441
418
}
442
419
else
443
420
{
0 commit comments