Skip to content

Commit be2f824

Browse files
taliseinkjellahl
authored andcommitted
parsers: Avoid std::getline for istream inputs
For streambuf implementations that can't set a public get area, getline falls back to a character-by-character implementation. Instead we can pass the stream to xmlCreateIOParserCtxt(). This simplifies the parse stream methods and brings them nearly identical to the other parse methods.
1 parent cc48aae commit be2f824

File tree

4 files changed

+196
-115
lines changed

4 files changed

+196
-115
lines changed

libxml++/parsers/domparser.cc

Lines changed: 23 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ void DomParser::check_xinclude_and_finish_parsing()
153153
int options = xinclude_options_;
154154
// Turn on/off any xinclude options.
155155
options |= set_options;
156-
options &= ~clear_options;
157-
156+
options &= ~clear_options;
157+
158158
if (options & XML_PARSE_XINCLUDE)
159159
{
160160
const int n_substitutions = xmlXIncludeProcessFlags(context_->myDoc, options);
@@ -174,74 +174,40 @@ void DomParser::check_xinclude_and_finish_parsing()
174174
Parser::release_underlying();
175175
}
176176

177+
namespace {
178+
extern "C" {
179+
static int _io_read_callback(void * context,
180+
char * buffer,
181+
int len)
182+
{
183+
std::istream *in = static_cast<std::istream*>(context);
184+
in->read(buffer, len);
185+
return in->gcount();
186+
}
187+
}
188+
}
189+
177190
void DomParser::parse_stream(std::istream& in)
178191
{
179192
release_underlying(); //Free any existing document.
180193

181194
KeepBlanks k(KeepBlanks::Default);
182195
xmlResetLastError();
183196

184-
context_ = xmlCreatePushParserCtxt(
185-
nullptr, // Setting those two parameters to nullptr force the parser
186-
nullptr, // to create a document while parsing.
187-
nullptr, // chunk
188-
0, // size
189-
nullptr); // no filename for fetching external entities
197+
context_ = xmlCreateIOParserCtxt(
198+
nullptr, // Setting those two parameters to nullptr force the parser
199+
nullptr, // to create a document while parsing.
200+
_io_read_callback,
201+
nullptr, // inputCloseCallback
202+
&in,
203+
XML_CHAR_ENCODING_NONE);
190204

191205
if(!context_)
192206
{
193207
throw internal_error("Could not create parser context\n" + format_xml_error());
194208
}
195209

196-
initialize_context();
197-
198-
// std::string or ustring?
199-
// Output from the XML parser is UTF-8 encoded.
200-
// But the istream "in" is input, i.e. an XML file. It can use any encoding.
201-
// If it's not UTF-8, the file itself must contain information about which
202-
// encoding it uses. See the XML specification. Thus use std::string.
203-
int firstParseError = XML_ERR_OK;
204-
std::string line;
205-
while(std::getline(in, line))
206-
{
207-
// since getline does not get the line separator, we have to add it since the parser cares
208-
// about layout in certain cases.
209-
line += '\n';
210-
211-
const int parseError = xmlParseChunk(context_, line.c_str(),
212-
line.size() /* This is a std::string, not a ustring, so this is the number of bytes. */, 0);
213-
214-
// Save the first error code if any, but read on.
215-
// More errors might be reported and then thrown by check_for_exception().
216-
if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
217-
firstParseError = parseError;
218-
}
219-
220-
const int parseError = xmlParseChunk(context_, nullptr, 0, 1 /* last chunk */);
221-
if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
222-
firstParseError = parseError;
223-
224-
try
225-
{
226-
check_for_exception();
227-
}
228-
catch (...)
229-
{
230-
release_underlying(); //Free doc_ and context_
231-
throw; // re-throw exception
232-
}
233-
234-
auto error_str = format_xml_parser_error(context_);
235-
if (error_str.empty() && firstParseError != XML_ERR_OK)
236-
error_str = "Error code from xmlParseChunk(): " + std::to_string(firstParseError);
237-
238-
if(!error_str.empty())
239-
{
240-
release_underlying(); //Free doc_ and context_
241-
throw parse_error(error_str);
242-
}
243-
244-
check_xinclude_and_finish_parsing();
210+
parse_context();
245211
}
246212

247213
void DomParser::release_underlying()

libxml++/parsers/saxparser.cc

Lines changed: 20 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,19 @@ void SaxParser::parse_memory(const ustring& contents)
209209
parse_memory_raw((const unsigned char*)contents.c_str(), contents.size());
210210
}
211211

212+
namespace {
213+
extern "C" {
214+
static int _io_read_callback(void * context,
215+
char * buffer,
216+
int len)
217+
{
218+
std::istream *in = static_cast<std::istream*>(context);
219+
in->read(buffer, len);
220+
return in->gcount();
221+
}
222+
}
223+
}
224+
212225
void SaxParser::parse_stream(std::istream& in)
213226
{
214227
if(context_)
@@ -217,66 +230,15 @@ void SaxParser::parse_stream(std::istream& in)
217230
}
218231

219232
KeepBlanks k(KeepBlanks::Default);
220-
xmlResetLastError();
221233

222-
context_ = xmlCreatePushParserCtxt(
234+
context_ = xmlCreateIOParserCtxt(
223235
sax_handler_.get(),
224-
nullptr, // user_data
225-
nullptr, // chunk
226-
0, // size
227-
nullptr); // no filename for fetching external entities
228-
229-
if(!context_)
230-
{
231-
throw internal_error("Could not create parser context\n" + format_xml_error());
232-
}
233-
234-
initialize_context();
235-
236-
// std::string or ustring?
237-
// Output from the XML parser is UTF-8 encoded.
238-
// But the istream "in" is input, i.e. an XML file. It can use any encoding.
239-
// If it's not UTF-8, the file itself must contain information about which
240-
// encoding it uses. See the XML specification. Thus use std::string.
241-
int firstParseError = XML_ERR_OK;
242-
std::string line;
243-
while (!exception_ && std::getline(in, line))
244-
{
245-
// since getline does not get the line separator, we have to add it since the parser care
246-
// about layout in certain cases.
247-
line += '\n';
248-
249-
const int parseError = xmlParseChunk(context_, line.c_str(),
250-
line.size() /* This is a std::string, not a ustring, so this is the number of bytes. */,
251-
0 /* don't terminate */);
252-
253-
// Save the first error code if any, but read on.
254-
// More errors might be reported and then thrown by check_for_exception().
255-
if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
256-
firstParseError = parseError;
257-
}
258-
259-
if (!exception_)
260-
{
261-
//This is called just to terminate parsing.
262-
const int parseError = xmlParseChunk(context_, nullptr /* chunk */, 0 /* size */, 1 /* terminate (1 or 0) */);
263-
264-
if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
265-
firstParseError = parseError;
266-
}
267-
268-
auto error_str = format_xml_parser_error(context_);
269-
if (error_str.empty() && firstParseError != XML_ERR_OK)
270-
error_str = "Error code from xmlParseChunk(): " + std::to_string(firstParseError);
271-
272-
release_underlying(); // Free context_
273-
274-
check_for_exception();
275-
276-
if(!error_str.empty())
277-
{
278-
throw parse_error(error_str);
279-
}
236+
nullptr, // user_data
237+
_io_read_callback,
238+
nullptr, // inputCloseCallback
239+
&in,
240+
XML_CHAR_ENCODING_NONE);
241+
parse();
280242
}
281243

282244
void SaxParser::parse_chunk(const ustring& chunk)

tests/istream_ioparser/main.cc

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/* Copyright (C) 2022 The libxml++ development team
2+
*
3+
* This library is free software; you can redistribute it and/or
4+
* modify it under the terms of the GNU Lesser General Public
5+
* License as published by the Free Software Foundation; either
6+
* version 2.1 of the License, or (at your option) any later version.
7+
*
8+
* This library is distributed in the hope that it will be useful,
9+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11+
* Lesser General Public License for more details.
12+
*
13+
* You should have received a copy of the GNU Lesser General Public
14+
* License along with this library; if not, write to the Free Software
15+
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16+
*/
17+
18+
#include <libxml++/libxml++.h>
19+
20+
#include <cassert>
21+
#include <cstdlib>
22+
#include <cstring>
23+
#include <sstream>
24+
25+
class test_streambuf : public std::streambuf
26+
{
27+
public:
28+
test_streambuf() :
29+
uflow_calls(0),
30+
underflow_calls(0),
31+
ofs(0),
32+
buf("<root>\n</root>")
33+
{
34+
}
35+
36+
protected:
37+
/* Simulate some kind of streambuf impl that doesn't setg() */
38+
virtual int_type underflow() override final
39+
{
40+
++underflow_calls;
41+
if (ofs >= (sizeof(buf)-1))
42+
return traits_type::eof();
43+
return traits_type::to_int_type(buf[ofs]);
44+
}
45+
46+
virtual int_type uflow() override final
47+
{
48+
++uflow_calls;
49+
if (ofs >= (sizeof(buf)-1))
50+
return traits_type::eof();
51+
return traits_type::to_int_type(buf[ofs++]);
52+
}
53+
54+
virtual std::streamsize showmanyc() override final
55+
{
56+
if (ofs >= (sizeof(buf)-1))
57+
return traits_type::eof();
58+
return sizeof(buf)-1-ofs;
59+
}
60+
61+
virtual std::streamsize xsgetn(char_type* s, std::streamsize count) override final
62+
{
63+
auto n = std::min(count, static_cast<std::streamsize>(sizeof(buf)-1-ofs));
64+
memcpy(s, buf + ofs, n);
65+
ofs += n;
66+
return n;
67+
}
68+
69+
public:
70+
int uflow_calls;
71+
int underflow_calls;
72+
73+
private:
74+
size_t ofs;
75+
char buf[15];
76+
};
77+
78+
class MySaxParser : public xmlpp::SaxParser {
79+
public:
80+
bool saw_root = false;
81+
protected:
82+
virtual void on_start_document() override final
83+
{
84+
saw_root = false;
85+
}
86+
virtual void on_end_element(const xmlpp::ustring &name) override final
87+
{
88+
if (name == "root")
89+
saw_root = true;
90+
}
91+
};
92+
93+
int main()
94+
{
95+
{ // Check DomParser works well with normal and custom istreams
96+
xmlpp::DomParser parser;
97+
try
98+
{
99+
std::stringstream ss("<root></root>");
100+
parser.parse_stream(ss);
101+
}
102+
catch(...)
103+
{
104+
assert(false);
105+
}
106+
107+
{
108+
auto doc = parser.get_document();
109+
assert(doc->get_root_node()->get_name() == "root");
110+
}
111+
112+
{
113+
test_streambuf buf;
114+
try {
115+
std::istream is(&buf);
116+
parser.parse_stream(is);
117+
} catch (...) {
118+
assert(false);
119+
}
120+
assert(buf.underflow_calls + buf.uflow_calls < 3);
121+
auto doc = parser.get_document();
122+
assert(doc->get_root_node()->get_name() == "root");
123+
}
124+
}
125+
{ // Check SaxParser works well with normal and custom istreams.
126+
MySaxParser parser;
127+
try
128+
{
129+
std::stringstream ss("<root></root>");
130+
parser.parse_stream(ss);
131+
}
132+
catch(...)
133+
{
134+
assert(false);
135+
}
136+
assert(parser.saw_root);
137+
138+
{
139+
test_streambuf buf;
140+
try {
141+
std::istream is(&buf);
142+
parser.parse_stream(is);
143+
} catch (...) {
144+
assert(false);
145+
}
146+
assert(buf.underflow_calls + buf.uflow_calls < 3);
147+
assert(parser.saw_root);
148+
}
149+
}
150+
assert(true);
151+
return EXIT_SUCCESS;
152+
}

tests/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ test_programs = [
88
[['saxparser_chunk_parsing_inconsistent_state'], 'test', ['main.cc']],
99
[['saxparser_parse_double_free'], 'test', ['main.cc']],
1010
[['saxparser_parse_stream_inconsistent_state'], 'test', ['main.cc']],
11+
[['istream_ioparser'], 'test', ['main.cc']],
1112
]
1213

1314
foreach ex : test_programs

0 commit comments

Comments
 (0)