22
22
* 11/6/21
23
23
* - switch to rsvg_handle_render_document()
24
24
* - librsvg can no longer render very large images :(
25
+ * 14/10/21
26
+ * - allow utf-8 headers for svg detection
25
27
*/
26
28
27
29
/*
@@ -131,6 +133,85 @@ vips_foreign_load_svg_zfree( void *opaque, void *ptr )
131
133
}
132
134
#endif /*HANDLE_SVGZ*/
133
135
136
+ /* Find a utf-8 substring within the first len_bytes (not characters).
137
+ *
138
+ * - case-insensitive
139
+ * - needle must be zero-terminated, but hackstack need not be
140
+ * - haystack can be null-terminated
141
+ * - if haystack is shorter than len bytes, that'll end the search
142
+ * - if we hit invalid utf-8, we return NULL
143
+ */
144
+ static const char *
145
+ vips_utf8_strcasestr ( const char * haystack_start , const char * needle_start ,
146
+ int len_bytes )
147
+ {
148
+ int needle_len = g_utf8_strlen ( needle_start , -1 );
149
+ int needle_len_bytes = strlen ( needle_start );
150
+
151
+ const char * haystack ;
152
+
153
+ for ( haystack = haystack_start ;
154
+ haystack - haystack_start <= len_bytes - needle_len_bytes ;
155
+ haystack = g_utf8_find_next_char ( haystack , NULL ) ) {
156
+ const char * needle_char ;
157
+ const char * haystack_char ;
158
+ int i ;
159
+
160
+ haystack_char = haystack ;
161
+ needle_char = needle_start ;
162
+ for ( i = 0 ; i < needle_len ; i ++ ) {
163
+ /* Haystack isn't necessarily null-terminated and
164
+ * might end half-way through a utf-8 character, so we
165
+ * need to be careful not to run off the end.
166
+ */
167
+ gunichar a =
168
+ g_utf8_get_char_validated ( haystack_char ,
169
+ haystack_start + len_bytes - haystack );
170
+ gunichar b =
171
+ g_utf8_get_char_validated ( needle_char , -1 );
172
+
173
+ /* Invalid utf8?
174
+ *
175
+ * gunichar is a uint32, so we can't compare < 0, we
176
+ * have to look for -1 and -2 (the two possible error
177
+ * values).
178
+ */
179
+ if ( a == (gunichar ) - 1 ||
180
+ a == (gunichar ) - 2 ||
181
+ b == (gunichar ) - 1 ||
182
+ b == (gunichar ) - 2 )
183
+ return ( NULL );
184
+
185
+ /* End of haystack. There can't be a complete needle
186
+ * anywhere.
187
+ */
188
+ if ( a == (gunichar ) 0 )
189
+ return ( NULL );
190
+
191
+ /* Mismatch.
192
+ */
193
+ if ( g_unichar_tolower ( a ) != g_unichar_tolower ( b ) )
194
+ break ;
195
+
196
+ haystack_char =
197
+ g_utf8_find_next_char ( haystack_char ,
198
+ haystack_start + len_bytes );
199
+ needle_char =
200
+ g_utf8_find_next_char ( needle_char , NULL );
201
+ }
202
+
203
+ if ( i == needle_len )
204
+ /* Walked the whole of needle, so we must have found a
205
+ * complete match.
206
+ */
207
+ return ( haystack );
208
+ }
209
+
210
+ /* Walked the whole of haystack without finding a match.
211
+ */
212
+ return ( NULL );
213
+ }
214
+
134
215
/* This is used by both the file and buffer subclasses.
135
216
*/
136
217
static gboolean
@@ -145,8 +226,6 @@ vips_foreign_load_svg_is_a( const void *buf, size_t len )
145
226
char obuf [SVG_HEADER_SIZE ];
146
227
#endif /*HANDLE_SVGZ*/
147
228
148
- int i ;
149
-
150
229
/* Start with str pointing at the argument buffer, swap to it pointing
151
230
* into obuf if we see zip data.
152
231
*/
@@ -200,23 +279,17 @@ vips_foreign_load_svg_is_a( const void *buf, size_t len )
200
279
*
201
280
* But there can be a doctype in there too. And case and whitespace can
202
281
* vary a lot. And the <?xml can be missing. And you can have a comment
203
- * before the <svg line.
282
+ * before the <svg line. And it can be utf-8, so non ASCII characters.
204
283
*
205
- * Simple rules:
206
- * - first 24 chars are plain ascii (x09-x7F)
207
- * - first SVG_HEADER_SIZE chars contain "<svg", upper or lower case .
284
+ * All we do is look for "<svg", any case, within the first
285
+ * SVG_HEADER_SIZE bytes, where the bytestream up to the "<svg" is
286
+ * valid utf-8 .
208
287
*
209
288
* We could rsvg_handle_new_from_data() on the buffer, but that can be
210
289
* horribly slow for large documents.
211
290
*/
212
- if ( len < 24 )
213
- return ( 0 );
214
- for ( i = 0 ; i < 24 ; i ++ )
215
- if ( !isascii ( str [i ] ) || str [i ] < 9 )
216
- return ( FALSE );
217
- for ( i = 0 ; i < SVG_HEADER_SIZE && i < len - 5 ; i ++ )
218
- if ( g_ascii_strncasecmp ( str + i , "<svg" , 4 ) == 0 )
219
- return ( TRUE );
291
+ if ( vips_utf8_strcasestr ( str , "<svg" , len ) )
292
+ return ( TRUE );
220
293
221
294
return ( FALSE );
222
295
}
0 commit comments