7
7
*
8
8
*
9
9
* IDENTIFICATION
10
- * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.2 2007/08/22 04:13:15 tgl Exp $
10
+ * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
11
11
*
12
12
*-------------------------------------------------------------------------
13
13
*/
20
20
#include "tsearch/ts_utils.h"
21
21
#include "utils/builtins.h"
22
22
23
-
24
- #define SYNBUFLEN 4096
25
-
26
23
typedef struct
27
24
{
28
25
char * in ;
@@ -31,23 +28,34 @@ typedef struct
31
28
32
29
typedef struct
33
30
{
34
- int len ;
31
+ int len ; /* length of syn array */
35
32
Syn * syn ;
36
33
} DictSyn ;
37
34
35
+ /*
36
+ * Finds the next whitespace-delimited word within the 'in' string.
37
+ * Returns a pointer to the first character of the word, and a pointer
38
+ * to the next byte after the last character in the word (in *end).
39
+ */
38
40
static char *
39
41
findwrd (char * in , char * * end )
40
42
{
41
43
char * start ;
42
44
43
- * end = NULL ;
45
+ /* Skip leading spaces */
44
46
while (* in && t_isspace (in ))
45
47
in += pg_mblen (in );
46
48
49
+ /* Return NULL on empty lines */
47
50
if (* in == '\0' )
51
+ {
52
+ * end = NULL ;
48
53
return NULL ;
54
+ }
55
+
49
56
start = in ;
50
57
58
+ /* Find end of word */
51
59
while (* in && !t_isspace (in ))
52
60
in += pg_mblen (in );
53
61
@@ -70,12 +78,11 @@ dsynonym_init(PG_FUNCTION_ARGS)
70
78
ListCell * l ;
71
79
char * filename = NULL ;
72
80
FILE * fin ;
73
- char buf [SYNBUFLEN ];
74
81
char * starti ,
75
82
* starto ,
76
83
* end = NULL ;
77
84
int cur = 0 ;
78
- int slen ;
85
+ char * line = NULL ;
79
86
80
87
foreach (l , dictoptions )
81
88
{
@@ -105,10 +112,33 @@ dsynonym_init(PG_FUNCTION_ARGS)
105
112
106
113
d = (DictSyn * ) palloc0 (sizeof (DictSyn ));
107
114
108
- while (fgets ( buf , SYNBUFLEN , fin ))
115
+ while (( line = t_readline ( fin )) != NULL )
109
116
{
110
- slen = strlen (buf );
111
- pg_verifymbstr (buf , slen , false);
117
+ starti = findwrd (line , & end );
118
+ if (!starti )
119
+ {
120
+ /* Empty line */
121
+ goto skipline ;
122
+ }
123
+ * end = '\0' ;
124
+ if (end >= line + strlen (line ))
125
+ {
126
+ /* A line with only one word. Ignore silently. */
127
+ goto skipline ;
128
+ }
129
+
130
+ starto = findwrd (end + 1 , & end );
131
+ if (!starto )
132
+ {
133
+ /* A line with only one word. Ignore silently. */
134
+ goto skipline ;
135
+ }
136
+ * end = '\0' ;
137
+
138
+ /* starti now points to the first word, and starto to the second
139
+ * word on the line, with a \0 terminator at the end of both words.
140
+ */
141
+
112
142
if (cur == d -> len )
113
143
{
114
144
if (d -> len == 0 )
@@ -123,36 +153,19 @@ dsynonym_init(PG_FUNCTION_ARGS)
123
153
}
124
154
}
125
155
126
- starti = findwrd (buf , & end );
127
- if (!starti )
128
- continue ;
129
- * end = '\0' ;
130
- if (end >= buf + slen )
131
- continue ;
132
-
133
- starto = findwrd (end + 1 , & end );
134
- if (!starto )
135
- continue ;
136
- * end = '\0' ;
137
-
138
- d -> syn [cur ].in = recode_and_lowerstr (starti );
139
- d -> syn [cur ].out = recode_and_lowerstr (starto );
140
- if (!(d -> syn [cur ].in && d -> syn [cur ].out ))
141
- {
142
- FreeFile (fin );
143
- ereport (ERROR ,
144
- (errcode (ERRCODE_OUT_OF_MEMORY ),
145
- errmsg ("out of memory" )));
146
- }
156
+ d -> syn [cur ].in = lowerstr (starti );
157
+ d -> syn [cur ].out = lowerstr (starto );
147
158
148
159
cur ++ ;
160
+
161
+ skipline :
162
+ pfree (line );
149
163
}
150
164
151
165
FreeFile (fin );
152
166
153
167
d -> len = cur ;
154
- if (cur > 1 )
155
- qsort (d -> syn , d -> len , sizeof (Syn ), compareSyn );
168
+ qsort (d -> syn , d -> len , sizeof (Syn ), compareSyn );
156
169
157
170
PG_RETURN_POINTER (d );
158
171
}
@@ -179,8 +192,7 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
179
192
if (!found )
180
193
PG_RETURN_POINTER (NULL );
181
194
182
- res = palloc (sizeof (TSLexeme ) * 2 );
183
- memset (res , 0 , sizeof (TSLexeme ) * 2 );
195
+ res = palloc0 (sizeof (TSLexeme ) * 2 );
184
196
res [0 ].lexeme = pstrdup (found -> out );
185
197
186
198
PG_RETURN_POINTER (res );
0 commit comments