Skip to content

Commit d5b1a7f

Browse files
author
Thies C. Arntzen
committed
tags need to be UTF8-decoded as well.
guys, case_folding can only work when the parser target encoding equal (or compatible) with the locale setting of the system as we use toupper to "do it"
1 parent f5c5986 commit d5b1a7f

File tree

2 files changed

+94
-54
lines changed

2 files changed

+94
-54
lines changed

ext/xml/tests/007.phpt

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
--TEST--
2+
xml_parse_into_struct/umlauts in tags
3+
--SKIPIF--
4+
<?php include("skipif.inc"); ?>
5+
--FILE--
6+
<?php
7+
function startHandler($parser,$tag,$attr) { var_dump($tag,$attr); }
8+
function endHandler($parser,$tag) { var_dump($tag); }
9+
10+
$xmldata = '<?xml version="1.0" encoding="ISO-8859-1"?><äöü üäß="Üäß">ÄÖÜ</äöü>';
11+
$parser = xml_parser_create('ISO-8859-1');
12+
xml_set_element_handler($parser, "startHandler", "endHandler");
13+
xml_parse_into_struct($parser, $xmldata, $struct, $index);
14+
var_dump($struct);
15+
?>
16+
--GET--
17+
--POST--
18+
--EXPECT--
19+
string(3) "ÄÖÜ"
20+
array(1) {
21+
["ÜÄß"]=>
22+
string(3) "Üäß"
23+
}
24+
string(3) "ÄÖÜ"
25+
array(1) {
26+
[0]=>
27+
array(5) {
28+
["tag"]=>
29+
string(3) "ÄÖÜ"
30+
["type"]=>
31+
string(8) "complete"
32+
["level"]=>
33+
int(1)
34+
["attributes"]=>
35+
array(1) {
36+
["ÜÄß"]=>
37+
string(3) "Üäß"
38+
}
39+
["value"]=>
40+
string(3) "ÄÖÜ"
41+
}
42+
}

ext/xml/xml.c

Lines changed: 52 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ static zval *xml_call_handler(xml_parser *, zval *, int, zval **);
8585
static zval *_xml_xmlchar_zval(const XML_Char *, int, const XML_Char *);
8686
static int _xml_xmlcharlen(const XML_Char *);
8787
static void _xml_add_to_info(xml_parser *parser,char *name);
88-
88+
inline static char *_xml_decode_tag(xml_parser *parser, const char *tag);
8989

9090
void _xml_startElementHandler(void *, const char *, const char **);
9191
void _xml_endElementHandler(void *, const char *);
@@ -621,47 +621,55 @@ static void _xml_add_to_info(xml_parser *parser,char *name)
621621

622622
parser->curtag++;
623623
}
624+
/* }}} */
625+
/* {{{ _xml_decode_tag() */
626+
627+
static char *_xml_decode_tag(xml_parser *parser, const char *tag)
628+
{
629+
char *newstr;
630+
int out_len;
631+
632+
newstr = xml_utf8_decode(tag, strlen(tag), &out_len, parser->target_encoding);
633+
634+
if (parser->case_folding) {
635+
php_strtoupper(newstr, out_len);
636+
}
637+
638+
return newstr;
639+
}
624640

625641
/* }}} */
626-
/* {{{ _xml_startElementHandler() */
642+
/* {{{ _xml_startElementHandler() */
627643

628-
void _xml_startElementHandler(void *userData, const char *name,
629-
const char **attributes)
644+
void _xml_startElementHandler(void *userData, const char *name, const char **attributes)
630645
{
631646
xml_parser *parser = (xml_parser *)userData;
632647
const char **attrs = attributes;
648+
char *tag_name;
649+
char *att, *val;
650+
int att_len, val_len;
651+
zval *retval, *args[3];
633652

634653
if (parser) {
635-
zval *retval, *args[3];
636-
637654
parser->level++;
638655

639-
if (parser->case_folding) {
640-
name = php_strtoupper(estrdup(name), strlen(name));
641-
}
656+
tag_name = _xml_decode_tag(parser, name);
642657

643658
if (parser->startElementHandler) {
644659
args[0] = _xml_resource_zval(parser->index);
645-
args[1] = _xml_string_zval(name);
660+
args[1] = _xml_string_zval(tag_name);
646661
MAKE_STD_ZVAL(args[2]);
647662
array_init(args[2]);
663+
648664
while (attributes && *attributes) {
649-
char *key = (char *)attributes[0];
650-
char *value = (char *)attributes[1];
651-
char *decoded_value;
652-
int decoded_len;
653-
if (parser->case_folding) {
654-
key = php_strtoupper(estrdup(key), strlen(key));
655-
}
656-
decoded_value = xml_utf8_decode(value, strlen(value),
657-
&decoded_len,
658-
parser->target_encoding);
659-
660-
add_assoc_stringl(args[2], key, decoded_value, decoded_len, 0);
661-
if (parser->case_folding) {
662-
efree(key);
663-
}
665+
att = _xml_decode_tag(parser, attributes[0]);
666+
val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding);
667+
668+
add_assoc_stringl(args[2], att, val, val_len, 0);
669+
664670
attributes += 2;
671+
672+
efree(att);
665673
}
666674

667675
if ((retval = xml_call_handler(parser, parser->startElementHandler, 3, args))) {
@@ -680,34 +688,27 @@ void _xml_startElementHandler(void *userData, const char *name,
680688
array_init(tag);
681689
array_init(atr);
682690

683-
_xml_add_to_info(parser,((char *) name) + parser->toffset);
691+
_xml_add_to_info(parser,((char *) tag_name) + parser->toffset);
684692

685-
add_assoc_string(tag,"tag",((char *) name) + parser->toffset,1); /* cast to avoid gcc-warning */
693+
add_assoc_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */
686694
add_assoc_string(tag,"type","open",1);
687695
add_assoc_long(tag,"level",parser->level);
688696

689-
parser->ltags[parser->level-1] = estrdup(name);
697+
parser->ltags[parser->level-1] = estrdup(tag_name);
690698
parser->lastwasopen = 1;
691699

692700
attributes = attrs;
701+
693702
while (attributes && *attributes) {
694-
char *key = (char *)attributes[0];
695-
char *value = (char *)attributes[1];
696-
char *decoded_value;
697-
int decoded_len;
698-
if (parser->case_folding) {
699-
key = php_strtoupper(estrdup(key), strlen(key));
700-
}
701-
decoded_value = xml_utf8_decode(value, strlen(value),
702-
&decoded_len,
703-
parser->target_encoding);
703+
att = _xml_decode_tag(parser, attributes[0]);
704+
val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding);
704705

705-
add_assoc_stringl(atr,key,decoded_value,decoded_len,0);
706+
add_assoc_stringl(atr,att,val,val_len,0);
707+
706708
atcnt++;
707-
if (parser->case_folding) {
708-
efree(key);
709-
}
710709
attributes += 2;
710+
711+
efree(att);
711712
}
712713

713714
if (atcnt) {
@@ -720,9 +721,7 @@ void _xml_startElementHandler(void *userData, const char *name,
720721
zend_hash_next_index_insert(parser->data->value.ht,&tag,sizeof(zval*),(void *) &parser->ctag);
721722
}
722723

723-
if (parser->case_folding) {
724-
efree((char *)name);
725-
}
724+
efree(tag_name);
726725
}
727726
}
728727

@@ -732,17 +731,16 @@ void _xml_startElementHandler(void *userData, const char *name,
732731
void _xml_endElementHandler(void *userData, const char *name)
733732
{
734733
xml_parser *parser = (xml_parser *)userData;
734+
char *tag_name;
735735

736736
if (parser) {
737737
zval *retval, *args[2];
738738

739-
if (parser->case_folding) {
740-
name = php_strtoupper(estrdup(name), strlen(name));
741-
}
739+
tag_name = _xml_decode_tag(parser, name);
742740

743741
if (parser->endElementHandler) {
744742
args[0] = _xml_resource_zval(parser->index);
745-
args[1] = _xml_string_zval(name);
743+
args[1] = _xml_string_zval(tag_name);
746744

747745
if ((retval = xml_call_handler(parser, parser->endElementHandler, 2, args))) {
748746
zval_dtor(retval);
@@ -760,9 +758,9 @@ void _xml_endElementHandler(void *userData, const char *name)
760758

761759
array_init(tag);
762760

763-
_xml_add_to_info(parser,((char *) name) + parser->toffset);
761+
_xml_add_to_info(parser,((char *) tag_name) + parser->toffset);
764762

765-
add_assoc_string(tag,"tag",((char *) name) + parser->toffset,1); /* cast to avoid gcc-warning */
763+
add_assoc_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */
766764
add_assoc_string(tag,"type","close",1);
767765
add_assoc_long(tag,"level",parser->level);
768766

@@ -772,12 +770,12 @@ void _xml_endElementHandler(void *userData, const char *name)
772770
parser->lastwasopen = 0;
773771
}
774772

775-
if (parser->case_folding) {
776-
efree((char *)name);
777-
}
773+
efree(tag_name);
774+
778775
if (parser->ltags) {
779776
efree(parser->ltags[parser->level-1]);
780777
}
778+
781779
parser->level--;
782780
}
783781
}

0 commit comments

Comments
 (0)