Skip to content

parsers: Avoid std::getline for istream inputs #28

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 23 additions & 57 deletions libxml++/parsers/domparser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ void DomParser::check_xinclude_and_finish_parsing()
int options = xinclude_options_;
// Turn on/off any xinclude options.
options |= set_options;
options &= ~clear_options;
options &= ~clear_options;

if (options & XML_PARSE_XINCLUDE)
{
const int n_substitutions = xmlXIncludeProcessFlags(context_->myDoc, options);
Expand All @@ -174,74 +174,40 @@ void DomParser::check_xinclude_and_finish_parsing()
Parser::release_underlying();
}

namespace {
extern "C" {
static int _io_read_callback(void * context,
char * buffer,
int len)
{
std::istream *in = static_cast<std::istream*>(context);
in->read(buffer, len);
return in->gcount();
}
}
}

void DomParser::parse_stream(std::istream& in)
{
release_underlying(); //Free any existing document.

KeepBlanks k(KeepBlanks::Default);
xmlResetLastError();

context_ = xmlCreatePushParserCtxt(
nullptr, // Setting those two parameters to nullptr force the parser
nullptr, // to create a document while parsing.
nullptr, // chunk
0, // size
nullptr); // no filename for fetching external entities
context_ = xmlCreateIOParserCtxt(
nullptr, // Setting those two parameters to nullptr force the parser
nullptr, // to create a document while parsing.
_io_read_callback,
nullptr, // inputCloseCallback
&in,
XML_CHAR_ENCODING_NONE);

if(!context_)
{
throw internal_error("Could not create parser context\n" + format_xml_error());
}

initialize_context();

// std::string or ustring?
// Output from the XML parser is UTF-8 encoded.
// But the istream "in" is input, i.e. an XML file. It can use any encoding.
// If it's not UTF-8, the file itself must contain information about which
// encoding it uses. See the XML specification. Thus use std::string.
int firstParseError = XML_ERR_OK;
std::string line;
while(std::getline(in, line))
{
// since getline does not get the line separator, we have to add it since the parser cares
// about layout in certain cases.
line += '\n';

const int parseError = xmlParseChunk(context_, line.c_str(),
line.size() /* This is a std::string, not a ustring, so this is the number of bytes. */, 0);

// Save the first error code if any, but read on.
// More errors might be reported and then thrown by check_for_exception().
if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
firstParseError = parseError;
}

const int parseError = xmlParseChunk(context_, nullptr, 0, 1 /* last chunk */);
if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
firstParseError = parseError;

try
{
check_for_exception();
}
catch (...)
{
release_underlying(); //Free doc_ and context_
throw; // re-throw exception
}

auto error_str = format_xml_parser_error(context_);
if (error_str.empty() && firstParseError != XML_ERR_OK)
error_str = "Error code from xmlParseChunk(): " + std::to_string(firstParseError);

if(!error_str.empty())
{
release_underlying(); //Free doc_ and context_
throw parse_error(error_str);
}

check_xinclude_and_finish_parsing();
parse_context();
}

void DomParser::release_underlying()
Expand Down
78 changes: 20 additions & 58 deletions libxml++/parsers/saxparser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,19 @@ void SaxParser::parse_memory(const ustring& contents)
parse_memory_raw((const unsigned char*)contents.c_str(), contents.size());
}

namespace {
extern "C" {
static int _io_read_callback(void * context,
char * buffer,
int len)
{
std::istream *in = static_cast<std::istream*>(context);
in->read(buffer, len);
return in->gcount();
}
}
}

void SaxParser::parse_stream(std::istream& in)
{
if(context_)
Expand All @@ -217,66 +230,15 @@ void SaxParser::parse_stream(std::istream& in)
}

KeepBlanks k(KeepBlanks::Default);
xmlResetLastError();

context_ = xmlCreatePushParserCtxt(
context_ = xmlCreateIOParserCtxt(
sax_handler_.get(),
nullptr, // user_data
nullptr, // chunk
0, // size
nullptr); // no filename for fetching external entities

if(!context_)
{
throw internal_error("Could not create parser context\n" + format_xml_error());
}

initialize_context();

// std::string or ustring?
// Output from the XML parser is UTF-8 encoded.
// But the istream "in" is input, i.e. an XML file. It can use any encoding.
// If it's not UTF-8, the file itself must contain information about which
// encoding it uses. See the XML specification. Thus use std::string.
int firstParseError = XML_ERR_OK;
std::string line;
while (!exception_ && std::getline(in, line))
{
// since getline does not get the line separator, we have to add it since the parser care
// about layout in certain cases.
line += '\n';

const int parseError = xmlParseChunk(context_, line.c_str(),
line.size() /* This is a std::string, not a ustring, so this is the number of bytes. */,
0 /* don't terminate */);

// Save the first error code if any, but read on.
// More errors might be reported and then thrown by check_for_exception().
if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
firstParseError = parseError;
}

if (!exception_)
{
//This is called just to terminate parsing.
const int parseError = xmlParseChunk(context_, nullptr /* chunk */, 0 /* size */, 1 /* terminate (1 or 0) */);

if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
firstParseError = parseError;
}

auto error_str = format_xml_parser_error(context_);
if (error_str.empty() && firstParseError != XML_ERR_OK)
error_str = "Error code from xmlParseChunk(): " + std::to_string(firstParseError);

release_underlying(); // Free context_

check_for_exception();

if(!error_str.empty())
{
throw parse_error(error_str);
}
nullptr, // user_data
_io_read_callback,
nullptr, // inputCloseCallback
&in,
XML_CHAR_ENCODING_NONE);
parse();
}

void SaxParser::parse_chunk(const ustring& chunk)
Expand Down
4 changes: 3 additions & 1 deletion tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ LDADD = $(top_builddir)/libxml++/libxml++-$(LIBXMLXX_API_VERSION).la $(LIBXMLXX_
check_PROGRAMS = \
saxparser_chunk_parsing_inconsistent_state/test \
saxparser_parse_double_free/test \
saxparser_parse_stream_inconsistent_state/test
saxparser_parse_stream_inconsistent_state/test \
istream_ioparser/test

TESTS = $(check_PROGRAMS)

saxparser_chunk_parsing_inconsistent_state_test_SOURCES = saxparser_chunk_parsing_inconsistent_state/main.cc
saxparser_parse_double_free_test_SOURCES = saxparser_parse_double_free/main.cc
saxparser_parse_stream_inconsistent_state_test_SOURCES = saxparser_parse_stream_inconsistent_state/main.cc
istream_ioparser_test_SOURCES = istream_ioparser/main.cc
152 changes: 152 additions & 0 deletions tests/istream_ioparser/main.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/* Copyright (C) 2022 The libxml++ development team
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/

#include <libxml++/libxml++.h>

#include <cassert>
#include <cstdlib>
#include <cstring>
#include <sstream>

class test_streambuf : public std::streambuf
{
public:
test_streambuf() :
uflow_calls(0),
underflow_calls(0),
ofs(0),
buf("<root>\n</root>")
{
}

protected:
/* Simulate some kind of streambuf impl that doesn't setg() */
virtual int_type underflow() override final
{
++underflow_calls;
if (ofs >= (sizeof(buf)-1))
return traits_type::eof();
return traits_type::to_int_type(buf[ofs]);
}

virtual int_type uflow() override final
{
++uflow_calls;
if (ofs >= (sizeof(buf)-1))
return traits_type::eof();
return traits_type::to_int_type(buf[ofs++]);
}

virtual std::streamsize showmanyc() override final
{
if (ofs >= (sizeof(buf)-1))
return traits_type::eof();
return sizeof(buf)-1-ofs;
}

virtual std::streamsize xsgetn(char_type* s, std::streamsize count) override final
{
auto n = std::min(count, static_cast<std::streamsize>(sizeof(buf)-1-ofs));
memcpy(s, buf + ofs, n);
ofs += n;
return n;
}

public:
int uflow_calls;
int underflow_calls;

private:
size_t ofs;
char buf[15];
};

class MySaxParser : public xmlpp::SaxParser {
public:
bool saw_root = false;
protected:
virtual void on_start_document() override final
{
saw_root = false;
}
virtual void on_end_element(const xmlpp::ustring &name) override final
{
if (name == "root")
saw_root = true;
}
};

int main()
{
{ // Check DomParser works well with normal and custom istreams
xmlpp::DomParser parser;
try
{
std::stringstream ss("<root></root>");
parser.parse_stream(ss);
}
catch(...)
{
assert(false);
}

{
auto doc = parser.get_document();
assert(doc->get_root_node()->get_name() == "root");
}

{
test_streambuf buf;
try {
std::istream is(&buf);
parser.parse_stream(is);
} catch (...) {
assert(false);
}
assert(buf.underflow_calls + buf.uflow_calls < 3);
auto doc = parser.get_document();
assert(doc->get_root_node()->get_name() == "root");
}
}
{ // Check SaxParser works well with normal and custom istreams.
MySaxParser parser;
try
{
std::stringstream ss("<root></root>");
parser.parse_stream(ss);
}
catch(...)
{
assert(false);
}
assert(parser.saw_root);

{
test_streambuf buf;
try {
std::istream is(&buf);
parser.parse_stream(is);
} catch (...) {
assert(false);
}
assert(buf.underflow_calls + buf.uflow_calls < 3);
assert(parser.saw_root);
}
}
assert(true);
return EXIT_SUCCESS;
}
1 change: 1 addition & 0 deletions tests/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ test_programs = [
[['saxparser_chunk_parsing_inconsistent_state'], 'test', ['main.cc']],
[['saxparser_parse_double_free'], 'test', ['main.cc']],
[['saxparser_parse_stream_inconsistent_state'], 'test', ['main.cc']],
[['istream_ioparser'], 'test', ['main.cc']],
]

foreach ex : test_programs
Expand Down