Skip to content

Commit 1032445

Browse files
committed
TODO item:
* Make n of CHAR(n)/VARCHAR(n) the number of letters, not bytes
1 parent b08e86d commit 1032445

File tree

3 files changed

+106
-50
lines changed

3 files changed

+106
-50
lines changed

src/backend/utils/adt/varchar.c

Lines changed: 77 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.80 2001/06/09 23:21:55 petere Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.81 2001/07/15 11:07:37 ishii Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -73,33 +73,65 @@ bpcharin(PG_FUNCTION_ARGS)
7373
char *r;
7474
size_t len, maxlen;
7575
int i;
76+
#ifdef MULTIBYTE
77+
int charlen; /* number of charcters in the input string */
78+
#endif
7679

7780
len = strlen(s);
81+
#ifdef MULTIBYTE
82+
charlen = pg_mbstrlen(s);
83+
#endif
7884

7985
/* If typmod is -1 (or invalid), use the actual string length */
8086
if (atttypmod < (int32) VARHDRSZ)
87+
#ifdef MULTIBYTE
88+
maxlen = charlen;
89+
#else
8190
maxlen = len;
91+
#endif
8292
else
8393
maxlen = atttypmod - VARHDRSZ;
8494

95+
#ifdef MULTIBYTE
96+
if (charlen > maxlen)
97+
#else
8598
if (len > maxlen)
99+
#endif
86100
{
87101
/* Verify that extra characters are spaces, and clip them off */
88102
#ifdef MULTIBYTE
89-
size_t mbmaxlen = pg_mbcliplen(s, len, maxlen);
90-
103+
size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
104+
/*
105+
* at this point, len is the actual BYTE length of the
106+
* input string, maxlen is the max number of
107+
* CHARACTERS allowed for this bpchar type.
108+
*/
91109
if (strspn(s + mbmaxlen, " ") == len - mbmaxlen)
92110
len = mbmaxlen;
93111
else
94112
elog(ERROR, "value too long for type character(%d)", maxlen);
95-
Assert(len <= maxlen);
113+
/*
114+
* XXX: at this point, maxlen is the necessary byte
115+
* length, not the number of CHARACTERS!
116+
*/
117+
maxlen = len;
96118
#else
97119
if (strspn(s + maxlen, " ") == len - maxlen)
98120
len = maxlen;
99121
else
100122
elog(ERROR, "value too long for type character(%d)", maxlen);
101123
#endif
102124
}
125+
#ifdef MULTIBYTE
126+
else
127+
{
128+
/*
129+
* XXX: at this point, maxlen is the necessary byte
130+
* length, not the number of CHARACTERS!
131+
*/
132+
maxlen = len + (maxlen - charlen);
133+
}
134+
#endif
103135

104136
result = palloc(maxlen + VARHDRSZ);
105137
VARATT_SIZEP(result) = maxlen + VARHDRSZ;
@@ -158,19 +190,29 @@ bpchar(PG_FUNCTION_ARGS)
158190
char *r;
159191
char *s;
160192
int i;
193+
#ifdef MULTIBYTE
194+
int charlen; /* number of charcters in the input string
195+
+ VARHDRSZ*/
196+
#endif
161197

162198
len = VARSIZE(source);
199+
#ifdef MULTIBYTE
200+
charlen = pg_mbstrlen_with_len(VARDATA(source), len - VARHDRSZ) + VARHDRSZ;
201+
#endif
163202
/* No work if typmod is invalid or supplied data matches it already */
164203
if (maxlen < (int32) VARHDRSZ || len == maxlen)
165204
PG_RETURN_BPCHAR_P(source);
166-
205+
#ifdef MULTIBYTE
206+
if (charlen > maxlen)
207+
#else
167208
if (len > maxlen)
209+
#endif
168210
{
169211
/* Verify that extra characters are spaces, and clip them off */
170212
#ifdef MULTIBYTE
171213
size_t maxmblen;
172214

173-
maxmblen = pg_mbcliplen(VARDATA(source), len - VARHDRSZ,
215+
maxmblen = pg_mbcharcliplen(VARDATA(source), len - VARHDRSZ,
174216
maxlen - VARHDRSZ) + VARHDRSZ;
175217

176218
for (i = maxmblen - VARHDRSZ; i < len - VARHDRSZ; i++)
@@ -179,7 +221,11 @@ bpchar(PG_FUNCTION_ARGS)
179221
maxlen - VARHDRSZ);
180222

181223
len = maxmblen;
182-
Assert(len <= maxlen);
224+
/*
225+
* XXX: at this point, maxlen is the necessary byte
226+
* length+VARHDRSZ, not the number of CHARACTERS!
227+
*/
228+
maxlen = len;
183229
#else
184230
for (i = maxlen - VARHDRSZ; i < len - VARHDRSZ; i++)
185231
if (*(VARDATA(source) + i) != ' ')
@@ -189,6 +235,16 @@ bpchar(PG_FUNCTION_ARGS)
189235
len = maxlen;
190236
#endif
191237
}
238+
#ifdef MULTIBYTE
239+
else
240+
{
241+
/*
242+
* XXX: at this point, maxlen is the necessary byte
243+
* length+VARHDRSZ, not the number of CHARACTERS!
244+
*/
245+
maxlen = len + (maxlen - charlen);
246+
}
247+
#endif
192248

193249
s = VARDATA(source);
194250

@@ -333,9 +389,12 @@ name_bpchar(PG_FUNCTION_ARGS)
333389
* Convert a C string to VARCHAR internal representation. atttypmod
334390
* is the declared length of the type plus VARHDRSZ.
335391
*
336-
* If the C string is too long, raise an error, unless the extra
337-
* characters are spaces, in which case they're truncated. (per SQL)
338-
*/
392+
* Note that if MULTIBYTE is enabled, atttypmod is regarded as the
393+
* number of characters, rather than number of bytes.
394+
*
395+
* If the C string is too long,
396+
* raise an error, unless the extra characters are spaces, in which
397+
* case they're truncated. (per SQL) */
339398
Datum
340399
varcharin(PG_FUNCTION_ARGS)
341400
{
@@ -354,7 +413,7 @@ varcharin(PG_FUNCTION_ARGS)
354413
{
355414
/* Verify that extra characters are spaces, and clip them off */
356415
#ifdef MULTIBYTE
357-
size_t mbmaxlen = pg_mbcliplen(s, len, maxlen);
416+
size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
358417

359418
if (strspn(s + mbmaxlen, " ") == len - mbmaxlen)
360419
len = mbmaxlen;
@@ -428,7 +487,7 @@ varchar(PG_FUNCTION_ARGS)
428487
size_t maxmblen;
429488

430489
/* truncate multi-byte string preserving multi-byte boundary */
431-
maxmblen = pg_mbcliplen(VARDATA(source), len - VARHDRSZ,
490+
maxmblen = pg_mbcharcliplen(VARDATA(source), len - VARHDRSZ,
432491
maxlen - VARHDRSZ) + VARHDRSZ;
433492

434493
for (i = maxmblen - VARHDRSZ; i < len - VARHDRSZ; i++)
@@ -515,22 +574,9 @@ bpcharlen(PG_FUNCTION_ARGS)
515574
BpChar *arg = PG_GETARG_BPCHAR_P(0);
516575

517576
#ifdef MULTIBYTE
518-
unsigned char *s;
519-
int len,
520-
l,
521-
wl;
522-
523-
l = VARSIZE(arg) - VARHDRSZ;
524-
len = 0;
525-
s = VARDATA(arg);
526-
while (l > 0)
527-
{
528-
wl = pg_mblen(s);
529-
l -= wl;
530-
s += wl;
531-
len++;
532-
}
533-
PG_RETURN_INT32(len);
577+
PG_RETURN_INT32(
578+
pg_mbstrlen_with_len(VARDATA(arg), VARSIZE(arg) - VARHDRSZ)
579+
);
534580
#else
535581
PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
536582
#endif
@@ -736,22 +782,9 @@ varcharlen(PG_FUNCTION_ARGS)
736782
VarChar *arg = PG_GETARG_VARCHAR_P(0);
737783

738784
#ifdef MULTIBYTE
739-
unsigned char *s;
740-
int len,
741-
l,
742-
wl;
743-
744-
len = 0;
745-
s = VARDATA(arg);
746-
l = VARSIZE(arg) - VARHDRSZ;
747-
while (l > 0)
748-
{
749-
wl = pg_mblen(s);
750-
l -= wl;
751-
s += wl;
752-
len++;
753-
}
754-
PG_RETURN_INT32(len);
785+
PG_RETURN_INT32(
786+
pg_mbstrlen_with_len(VARDATA(arg), VARSIZE(arg) - VARHDRSZ)
787+
);
755788
#else
756789
PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
757790
#endif

src/backend/utils/mb/mbutils.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* client encoding and server internal encoding.
44
* (currently mule internal code (mic) is used)
55
* Tatsuo Ishii
6-
* $Id: mbutils.c,v 1.17 2001/04/16 02:42:01 tgl Exp $
6+
* $Id: mbutils.c,v 1.18 2001/07/15 11:07:36 ishii Exp $
77
*/
88
#include "postgres.h"
99

@@ -241,9 +241,9 @@ pg_mbstrlen_with_len(const unsigned char *mbstr, int limit)
241241
}
242242

243243
/*
244-
* returns the length of a multi-byte string
244+
* returns the byte length of a multi-byte string
245245
* (not necessarily NULL terminated)
246-
* that is not longer than limit.
246+
* that is no longer than limit.
247247
* this function does not break multi-byte word boundary.
248248
*/
249249
int
@@ -267,8 +267,30 @@ pg_mbcliplen(const unsigned char *mbstr, int len, int limit)
267267
}
268268

269269
/*
270-
* functions for utils/init
271-
*/
270+
* Similar to pg_mbcliplen but the limit parameter specifies the
271+
* character length, not the byte length. */
272+
int
273+
pg_mbcharcliplen(const unsigned char *mbstr, int len, int limit)
274+
{
275+
int clen = 0;
276+
int nch = 0;
277+
int l;
278+
279+
while (len > 0 && *mbstr)
280+
{
281+
l = pg_mblen(mbstr);
282+
nch++;
283+
if (nch > limit)
284+
break;
285+
clen += l;
286+
len -= l;
287+
mbstr += l;
288+
}
289+
return (clen);
290+
}
291+
292+
/*
293+
* functions for utils/init */
272294
static int DatabaseEncoding = MULTIBYTE;
273295

274296
void

src/include/mb/pg_wchar.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* $Id: pg_wchar.h,v 1.26 2001/05/03 21:38:44 momjian Exp $ */
1+
/* $Id: pg_wchar.h,v 1.27 2001/07/15 11:07:37 ishii Exp $ */
22

33
#ifndef PG_WCHAR_H
44
#define PG_WCHAR_H
@@ -136,6 +136,7 @@ extern int pg_mic_mblen(const unsigned char *);
136136
extern int pg_mbstrlen(const unsigned char *);
137137
extern int pg_mbstrlen_with_len(const unsigned char *, int);
138138
extern int pg_mbcliplen(const unsigned char *, int, int);
139+
extern int pg_mbcharcliplen(const unsigned char *, int, int);
139140
extern pg_encoding_conv_tbl *pg_get_encent_by_encoding(int);
140141
extern int pg_set_client_encoding(int);
141142
extern int pg_get_client_encoding(void);

0 commit comments

Comments
 (0)