Skip to content

Commit 1b0356e

Browse files
author
Sean Bright
committed
Fix for bug php#11796. Also, fixed a problem in get_meta_tags that required
NAME and CONTENT to be in that order. Meta tags with no CONTENT attribute are now added to the array as empty strings. Meta tags with no NAME attribute are ignored.
1 parent d5cf0ab commit 1b0356e

File tree

2 files changed

+124
-79
lines changed

2 files changed

+124
-79
lines changed

ext/standard/file.c

Lines changed: 105 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -290,14 +290,14 @@ PHP_FUNCTION(flock)
290290
PHP_FUNCTION(get_meta_tags)
291291
{
292292
pval **filename, **arg2;
293-
FILE *fp;
294293
int use_include_path = 0;
295-
int issock=0, socketd=0;
296-
int in_tag=0, in_meta_tag=0, looking_for_val=0, done=0, ulc=0;
297-
int num_parts=0, lc=0;
298-
int token_len=0;
299-
char *token_data=NULL, *name=NULL, *value=NULL, *temp=NULL;
294+
int in_tag=0, in_meta_tag=0, done=0;
295+
int looking_for_val=0, have_name=0, have_content=0;
296+
int saw_name=0, saw_content=0;
297+
int num_parts=0;
298+
char *name=NULL, *value=NULL, *temp=NULL;
300299
php_meta_tags_token tok, tok_last;
300+
php_meta_tags_data md;
301301
PLS_FETCH();
302302

303303
/* check args */
@@ -319,9 +319,9 @@ PHP_FUNCTION(get_meta_tags)
319319
}
320320
convert_to_string_ex(filename);
321321

322-
fp = php_fopen_wrapper((*filename)->value.str.val,"rb", use_include_path|ENFORCE_SAFE_MODE, &issock, &socketd, NULL);
323-
if (!fp && !socketd) {
324-
if (issock != BAD_URL) {
322+
md.fp = php_fopen_wrapper((*filename)->value.str.val, "rb", use_include_path|ENFORCE_SAFE_MODE, &md.issock, &md.socketd, NULL);
323+
if (!md.fp && !md.socketd) {
324+
if (md.issock != BAD_URL) {
325325
char *tmp = estrndup(Z_STRVAL_PP(filename), Z_STRLEN_PP(filename));
326326
php_strip_url_passwd(tmp);
327327
php_error(E_WARNING,"get_meta_tags(\"%s\") - %s", tmp, strerror(errno));
@@ -331,109 +331,133 @@ PHP_FUNCTION(get_meta_tags)
331331
}
332332

333333
if (array_init(return_value)==FAILURE) {
334-
if (issock) {
335-
SOCK_FCLOSE(socketd);
334+
if (md.issock) {
335+
SOCK_FCLOSE(md.socketd);
336336
} else {
337-
fclose(fp);
337+
fclose(md.fp);
338338
}
339339
RETURN_FALSE;
340340
}
341341

342342
tok_last = TOK_EOF;
343343

344-
while (!done && (tok = php_next_meta_token(fp,socketd,issock,&ulc,&lc,&token_data,&token_len)) != TOK_EOF) {
344+
md.ulc = 0;
345+
md.token_data = NULL;
346+
md.token_len = 0;
347+
348+
while (!done && (tok = php_next_meta_token(&md)) != TOK_EOF) {
349+
345350
if (tok == TOK_ID) {
346351
if (tok_last == TOK_OPENTAG) {
347-
in_meta_tag = !strcasecmp("meta",token_data);
352+
in_meta_tag = !strcasecmp("meta",md.token_data);
348353
} else if (tok_last == TOK_SLASH && in_tag) {
349-
if (strcasecmp("head",token_data) == 0) {
354+
if (strcasecmp("head", md.token_data) == 0) {
350355
/* We are done here! */
351356
done = 1;
352357
}
353358
} else if (tok_last == TOK_EQUAL && looking_for_val) {
354-
355-
if (!num_parts) {
356-
/* This is a single word attribute */
357-
temp = name = estrndup(token_data,token_len);
359+
if (saw_name) {
360+
/* Get the NAME attr (Single word attr, non-quoted) */
361+
temp = name = estrndup(md.token_data,md.token_len);
358362

359363
while (temp && *temp) {
360364
if (strchr(PHP_META_UNSAFE, *temp)) {
361365
*temp = '_';
362366
}
363367
temp++;
364368
}
365-
num_parts++;
366-
} else {
369+
370+
have_name = 1;
371+
} else if (saw_content) {
372+
/* Get the CONTENT attr (Single word attr, non-quoted) */
367373
if (PG(magic_quotes_runtime)) {
368-
value = php_addslashes(token_data,0,&token_len,0);
374+
value = php_addslashes(md.token_data,0,&md.token_len,0);
369375
} else {
370-
value = estrndup(token_data,token_len);
376+
value = estrndup(md.token_data,md.token_len);
371377
}
372378

373-
/* Insert the value into the array */
374-
add_assoc_string(return_value, name, value, 0);
375-
num_parts = 0;
379+
have_content = 1;
376380
}
381+
377382
looking_for_val = 0;
378383
} else {
379384
if (in_meta_tag) {
380-
if (strcasecmp("name",token_data) == 0 || strcasecmp("content",token_data) == 0) {
385+
if (strcasecmp("name", md.token_data) == 0) {
386+
saw_name = 1;
387+
saw_content = 0;
388+
looking_for_val = 1;
389+
} else if (strcasecmp("content", md.token_data) == 0) {
390+
saw_name = 0;
391+
saw_content = 1;
381392
looking_for_val = 1;
382-
} else {
383-
looking_for_val = 0;
384393
}
385394
}
386395
}
387396
} else if (tok == TOK_STRING && tok_last == TOK_EQUAL && looking_for_val) {
388-
if (!num_parts) {
389-
/* First, get the name value and store it */
390-
temp = name = estrndup(token_data,token_len);
397+
if (saw_name) {
398+
/* Get the NAME attr (Quoted single/double) */
399+
temp = name = estrndup(md.token_data,md.token_len);
400+
391401
while (temp && *temp) {
392402
if (strchr(PHP_META_UNSAFE, *temp)) {
393403
*temp = '_';
394404
}
395405
temp++;
396406
}
397-
num_parts++;
398-
} else {
399-
/* Then get the value value and store it, quoting if neccessary */
407+
408+
have_name = 1;
409+
} else if (saw_content) {
410+
/* Get the CONTENT attr (Single word attr, non-quoted) */
400411
if (PG(magic_quotes_runtime)) {
401-
value = php_addslashes(token_data,0,&token_len,0);
412+
value = php_addslashes(md.token_data,0,&md.token_len,0);
402413
} else {
403-
value = estrndup(token_data,token_len);
414+
value = estrndup(md.token_data,md.token_len);
404415
}
405416

406-
/* Insert the value into the array */
407-
add_assoc_string(return_value, name, value, 0);
408-
num_parts = 0;
417+
have_content = 1;
409418
}
419+
410420
looking_for_val = 0;
411421
} else if (tok == TOK_OPENTAG) {
412422
if (looking_for_val) {
413423
looking_for_val = 0;
424+
have_name = saw_name = 0;
425+
have_content = saw_content = 0;
414426
}
415427
in_tag = 1;
416428
} else if (tok == TOK_CLOSETAG) {
417-
/* We never made it to the value, free the name */
418-
if (num_parts) {
429+
if (have_name) {
430+
if (have_content) {
431+
add_assoc_string(return_value, name, value, 0);
432+
} else {
433+
add_assoc_string(return_value, name, empty_string, 0);
434+
}
435+
419436
efree(name);
437+
} else if (have_content) {
438+
efree(value);
420439
}
440+
441+
name = value = NULL;
442+
421443
/* Reset all of our flags */
422444
in_tag = in_meta_tag = looking_for_val = num_parts = 0;
445+
have_name = saw_name = 0;
446+
have_content = saw_content = 0;
423447
}
424448

425449
tok_last = tok;
426450

427-
if (token_data)
428-
efree(token_data);
451+
if (md.token_data)
452+
efree(md.token_data);
429453

430-
token_data = NULL;
454+
md.token_data = NULL;
431455
}
432456

433-
if (issock) {
434-
SOCK_FCLOSE(socketd);
457+
if (md.issock) {
458+
SOCK_FCLOSE(md.socketd);
435459
} else {
436-
fclose(fp);
460+
fclose(md.fp);
437461
}
438462
}
439463

@@ -2367,20 +2391,21 @@ size_t php_fread_all(char **buf, int socket, FILE *fp, int issock) {
23672391

23682392
/* {{{ php_next_meta_token
23692393
Tokenizes an HTML file for get_meta_tags */
2370-
php_meta_tags_token php_next_meta_token(FILE *fp, int socketd, int issock, int *use_last_char, int *last_char, char **data, int *datalen) {
2371-
int ch, compliment;
2394+
php_meta_tags_token php_next_meta_token(php_meta_tags_data *md)
2395+
{
2396+
int ch = 0, compliment;
23722397
char buff[META_DEF_BUFSIZE + 1];
23732398

23742399
memset((void *)buff,0,META_DEF_BUFSIZE + 1);
23752400

2376-
while (*use_last_char || (!FP_FEOF(socketd,fp,issock) && (ch = FP_FGETC(socketd,fp,issock)))) {
2401+
while (md->ulc || (!FP_FEOF(md->socketd,md->fp,md->issock) && (ch = FP_FGETC(md->socketd,md->fp,md->issock)))) {
23772402

2378-
if(FP_FEOF(socketd,fp,issock))
2403+
if(FP_FEOF(md->socketd,md->fp,md->issock))
23792404
break;
23802405

2381-
if (*use_last_char) {
2382-
ch = *last_char;
2383-
*use_last_char = 0;
2406+
if (md->ulc) {
2407+
ch = md->lc;
2408+
md->ulc = 0;
23842409
}
23852410

23862411
switch (ch) {
@@ -2399,16 +2424,25 @@ php_meta_tags_token php_next_meta_token(FILE *fp, int socketd, int issock, int *
23992424
case '\'':
24002425
case '"':
24012426
compliment = ch;
2402-
*datalen = 0;
2403-
while (!FP_FEOF(socketd,fp,issock) && (ch = FP_FGETC(socketd,fp,issock)) && ch != compliment) {
2404-
buff[(*datalen)++] = ch;
2427+
md->token_len = 0;
2428+
while (!FP_FEOF(md->socketd,md->fp,md->issock) &&
2429+
(ch = FP_FGETC(md->socketd,md->fp,md->issock)) &&
2430+
ch != compliment && ch != '<' && ch != '>') {
24052431

2406-
if (*datalen == META_DEF_BUFSIZE)
2432+
buff[(md->token_len)++] = ch;
2433+
2434+
if (md->token_len == META_DEF_BUFSIZE)
24072435
break;
24082436
}
24092437

2410-
*data = (char *) emalloc( *datalen + 1 );
2411-
memcpy(*data,buff,*datalen+1);
2438+
if (ch == '<' || ch == '>') {
2439+
/* Was just an apostrohpe */
2440+
md->ulc = 1;
2441+
md->lc = ch;
2442+
}
2443+
2444+
md->token_data = (char *) emalloc(md->token_len + 1);
2445+
memcpy(md->token_data,buff,md->token_len+1);
24122446

24132447
return TOK_STRING;
24142448
break;
@@ -2421,26 +2455,26 @@ php_meta_tags_token php_next_meta_token(FILE *fp, int socketd, int issock, int *
24212455
break;
24222456
default:
24232457
if (isalnum(ch)) {
2424-
*datalen = 0;
2425-
buff[(*datalen)++] = ch;
2426-
while (!FP_FEOF(socketd,fp,issock) &&
2427-
(ch = FP_FGETC(socketd,fp,issock)) &&
2458+
md->token_len = 0;
2459+
buff[(md->token_len)++] = ch;
2460+
while (!FP_FEOF(md->socketd,md->fp,md->issock) &&
2461+
(ch = FP_FGETC(md->socketd,md->fp,md->issock)) &&
24282462
(isalnum(ch) || strchr(PHP_META_HTML401_CHARS,ch))) {
24292463

2430-
buff[(*datalen)++] = ch;
2464+
buff[(md->token_len)++] = ch;
24312465

2432-
if (*datalen == META_DEF_BUFSIZE)
2466+
if (md->token_len == META_DEF_BUFSIZE)
24332467
break;
24342468
}
24352469

24362470
/* This is ugly, but we have to replace ungetc */
24372471
if (!isalpha(ch) && ch != '-') {
2438-
*use_last_char = 1;
2439-
*last_char = ch;
2472+
md->ulc = 1;
2473+
md->lc = ch;
24402474
}
24412475

2442-
*data = (char *) emalloc( *datalen + 1 );
2443-
memcpy(*data,buff,*datalen+1);
2476+
md->token_data = (char *) emalloc(md->token_len + 1);
2477+
memcpy(md->token_data,buff,md->token_len+1);
24442478

24452479
return TOK_ID;
24462480
} else {

ext/standard/file.h

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ PHP_NAMED_FUNCTION(php_if_fstat);
7070
/* temporary function for testing streams */
7171
PHP_FUNCTION(fopenstream);
7272

73-
7473
PHPAPI int php_set_sock_blocking(int socketd, int block);
7574
PHPAPI int php_file_le_fopen(void);
7675
PHPAPI int php_file_le_stream(void);
@@ -92,15 +91,26 @@ typedef enum _php_meta_tags_token {
9291
TOK_OTHER
9392
} php_meta_tags_token;
9493

95-
php_meta_tags_token php_next_meta_token(FILE *, int, int, int *, int *, char **, int *);
94+
typedef struct _php_meta_tags_data {
95+
FILE *fp;
96+
int socketd;
97+
int issock;
98+
int ulc;
99+
int lc;
100+
char *input_buffer;
101+
char *token_data;
102+
int token_len;
103+
} php_meta_tags_data;
104+
105+
php_meta_tags_token php_next_meta_token(php_meta_tags_data *);
96106

97107
typedef struct {
98-
int fgetss_state;
99-
int pclose_ret;
100-
HashTable ht_fsock_keys;
101-
HashTable ht_fsock_socks;
102-
struct php_sockbuf *phpsockbuf;
103-
size_t def_chunk_size;
108+
int fgetss_state;
109+
int pclose_ret;
110+
HashTable ht_fsock_keys;
111+
HashTable ht_fsock_socks;
112+
struct php_sockbuf *phpsockbuf;
113+
size_t def_chunk_size;
104114
} php_file_globals;
105115

106116
#ifdef ZTS
@@ -123,3 +133,4 @@ extern php_file_globals file_globals;
123133

124134

125135
#endif /* FILE_H */
136+

0 commit comments

Comments
 (0)