Skip to content

Commit 4341325

Browse files
committed
Document, DomParser: Improve XInclude processing
* examples/Makefile.am: * examples/dom_xinclude/example.xml: Changed due to moved include files. * examples/dom_xinclude/include1.txt: * examples/dom_xinclude/include2.xml: Moved to examples/dom_xinclude/xinclude/ * examples/dom_xinclude/main.cc: Test both Document::process_xinclude() and Xinclude processing with DomParser::parse_file(). * libxml++/document.[cc|h]: Add fixup_base_uris parameter to process_xinclude(). * libxml++/parsers/domparser.[cc|h]: Add set/get_xinclude_options(). Add optional XInclude processing to the parse methods. * libxml++/parsers/parser.[cc|h]: Add set/get_xinclude_options_internal(). Bug 781566
1 parent 2af973f commit 4341325

File tree

12 files changed

+171
-26
lines changed

12 files changed

+171
-26
lines changed

examples/Makefile.am

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,8 @@ dist_noinst_DATA = \
124124
dom_update_namespace/example1.xml \
125125
dom_update_namespace/example2.xml \
126126
dom_xinclude/example.xml \
127-
dom_xinclude/include1.txt \
128-
dom_xinclude/include2.xml \
127+
dom_xinclude/xinclude/include1.txt \
128+
dom_xinclude/xinclude/include2.xml \
129129
dom_xpath/example.xml \
130130
dtdvalidation/example.dtd \
131131
import_node/example1.xml \

examples/dom_xinclude/example.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?xml version="1.0"?>
22
<document xmlns:xi="http://www.w3.org/2001/XInclude">
3-
<p><xi:include href="include1.txt" parse="text">
4-
<xi:fallback>Did not find include1.txt.</xi:fallback>
3+
<p><xi:include href="xinclude/include1.txt" parse="text">
4+
<xi:fallback>Did not find xinclude/include1.txt.</xi:fallback>
55
</xi:include></p>
6-
<xi:include href="include2.xml"/>
6+
<xi:include href="xinclude/include2.xml"/>
77
</document>

examples/dom_xinclude/include1.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

examples/dom_xinclude/main.cc

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ int main(int argc, char* argv[])
117117
bool throw_messages = false;
118118
bool substitute_entities = true;
119119
bool generate_xinclude_nodes = true;
120+
bool fixup_base_uris = true;
120121

121122
int argi = 1;
122123
while (argc > argi && *argv[argi] == '-') // option
@@ -140,13 +141,17 @@ int main(int argc, char* argv[])
140141
case 'X':
141142
generate_xinclude_nodes = false;
142143
break;
144+
case 'B':
145+
fixup_base_uris = false;
146+
break;
143147
default:
144-
std::cout << "Usage: " << argv[0] << " [-v] [-t] [-e] [-x] [filename]" << std::endl
148+
std::cout << "Usage: " << argv[0] << " [options]... [filename]" << std::endl
145149
<< " -v Validate" << std::endl
146150
<< " -t Throw messages in an exception" << std::endl
147151
<< " -e Write messages to stderr" << std::endl
148152
<< " -E Do not substitute entities" << std::endl
149-
<< " -X Do not generate XInclude nodes" << std::endl;
153+
<< " -X Do not generate XInclude nodes" << std::endl
154+
<< " -B Do not fix up base URIs" << std::endl;
150155
return EXIT_FAILURE;
151156
}
152157
argi++;
@@ -160,8 +165,7 @@ int main(int argc, char* argv[])
160165
try
161166
{
162167
xmlpp::DomParser parser;
163-
if (validate)
164-
parser.set_validate();
168+
parser.set_validate(validate);
165169
if (set_throw_messages)
166170
parser.set_throw_messages(throw_messages);
167171
//We can have the text resolved/unescaped automatically.
@@ -174,14 +178,31 @@ int main(int argc, char* argv[])
174178
print_node(pNode);
175179

176180
std::cout << std::endl << ">>>>> Number of XInclude substitutions: "
177-
<< parser.get_document()->process_xinclude(generate_xinclude_nodes)
181+
<< parser.get_document()->process_xinclude(
182+
generate_xinclude_nodes, fixup_base_uris)
183+
<< std::endl << std::endl;
184+
185+
std::cout << ">>>>> After XInclude processing with xmlpp::Document::process_xinclude(): "
178186
<< std::endl << std::endl;
179187
pNode = parser.get_document()->get_root_node();
180188
print_node(pNode);
181189

190+
// xmlpp::Document::write_to_string() does not write XIncludeStart and
191+
// XIncludeEnd nodes.
192+
const auto whole = parser.get_document()->write_to_string();
193+
std::cout << std::endl << whole << std::endl;
194+
}
195+
196+
parser.set_xinclude_options(true, generate_xinclude_nodes, fixup_base_uris);
197+
parser.parse_file(filepath);
198+
if (parser)
199+
{
200+
std::cout << ">>>>> After XInclude processing with xmlpp::DomParser::parse_file(): "
201+
<< std::endl << std::endl;
202+
print_node(parser.get_document()->get_root_node());
203+
182204
const auto whole = parser.get_document()->write_to_string();
183-
std::cout << std::endl << ">>>>> XML after XInclude processing: " << std::endl
184-
<< whole << std::endl;
205+
std::cout << std::endl << whole << std::endl;
185206
}
186207
}
187208
catch (const std::exception& ex)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This is the contents of file xinclude/include1.txt.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
<?xml version="1.0"?>
22
<chapter id="chapter-introduction">
3-
<p>This is the contents of file include2.xml.</p>
3+
<p>This is the contents of file xinclude/include2.xml.</p>
44
</chapter>

libxml++/document.cc

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#include <libxml/tree.h>
1818
#include <libxml/xinclude.h>
19-
#include <libxml/parser.h> // XML_PARSE_NOXINCNODE
19+
#include <libxml/parser.h> // XML_PARSE_NOXINCNODE, XML_PARSE_NOBASEFIX
2020

2121
#include <iostream>
2222
#include <map>
@@ -428,6 +428,11 @@ void Document::set_entity_declaration(const Glib::ustring& name, XmlEntityType t
428428
}
429429

430430
int Document::process_xinclude(bool generate_xinclude_nodes)
431+
{
432+
return process_xinclude(generate_xinclude_nodes, true);
433+
}
434+
435+
int Document::process_xinclude(bool generate_xinclude_nodes, bool fixup_base_uris)
431436
{
432437
NodeMap node_map;
433438

@@ -436,8 +441,13 @@ int Document::process_xinclude(bool generate_xinclude_nodes)
436441
find_wrappers(root, node_map);
437442

438443
xmlResetLastError();
439-
const int n_substitutions = xmlXIncludeProcessTreeFlags(root,
440-
generate_xinclude_nodes ? 0 : XML_PARSE_NOXINCNODE);
444+
445+
int flags = 0;
446+
if (!generate_xinclude_nodes)
447+
flags |= XML_PARSE_NOXINCNODE;
448+
if (!fixup_base_uris)
449+
flags |= XML_PARSE_NOBASEFIX;
450+
const int n_substitutions = xmlXIncludeProcessTreeFlags(root, flags);
441451

442452
remove_found_wrappers(reinterpret_cast<xmlNode*>(impl_), node_map);
443453

libxml++/document.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ class Document : public NonCopyable
236236
const Glib::ustring& publicId, const Glib::ustring& systemId,
237237
const Glib::ustring& content);
238238

239+
//TODO: When we can break ABI, remove the process_xinclude() with one parameter,
240+
// and add default values = true in the other process_xinclude()
239241
/** Perform XInclude substitution on the XML document.
240242
* XInclude substitution may both add and delete nodes in the document,
241243
* as well as change the type of some nodes. All pointers to deleted nodes
@@ -244,6 +246,9 @@ class Document : public NonCopyable
244246
* The type of a C++ wrapper can't change. The old wrapper is deleted, and a
245247
* new one is created if and when it's required.)
246248
*
249+
* Parser::set_parser_options() and DomParser::set_xinclude_options() do not
250+
* affect %Document::process_xinclude().
251+
*
247252
* @newin{2,36}
248253
*
249254
* @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
@@ -252,6 +257,27 @@ class Document : public NonCopyable
252257
*/
253258
int process_xinclude(bool generate_xinclude_nodes = true);
254259

260+
/** Perform XInclude substitution on the XML document.
261+
* XInclude substitution may both add and delete nodes in the document,
262+
* as well as change the type of some nodes. All pointers to deleted nodes
263+
* and nodes whose type is changed become invalid.
264+
* (The node type represented by an underlying xmlNode struct can change.
265+
* The type of a C++ wrapper can't change. The old wrapper is deleted, and a
266+
* new one is created if and when it's required.)
267+
*
268+
* Parser::set_parser_options() and DomParser::set_xinclude_options() do not
269+
* affect %Document::process_xinclude().
270+
*
271+
* @newin{3,2}
272+
*
273+
* @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
274+
* @param fixup_base_uris Add or replace xml:base attributes in included element
275+
* nodes, if necessary to preserve the target of relative URIs.
276+
* @returns The number of substitutions.
277+
* @throws xmlpp::exception
278+
*/
279+
int process_xinclude(bool generate_xinclude_nodes, bool fixup_base_uris);
280+
255281
///Access the underlying libxml implementation.
256282
_xmlDoc* cobj() noexcept;
257283

libxml++/parsers/domparser.cc

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "libxml++/keepblanks.h"
1313
#include "libxml++/exceptions/internal_error.h"
1414
#include <libxml/parserInternals.h>//For xmlCreateFileParserCtxt().
15+
#include <libxml/xinclude.h>
1516

1617
#include <sstream>
1718
#include <iostream>
@@ -38,6 +39,32 @@ DomParser::~DomParser()
3839
release_underlying();
3940
}
4041

42+
//TODO: When we can break ABI, remove set/get_xinclude_options_internal() in
43+
// Parser and move all XInclude stuff to DomParser.
44+
void DomParser::set_xinclude_options(bool process_xinclude,
45+
bool generate_xinclude_nodes, bool fixup_base_uris) noexcept
46+
{
47+
int xinclude_options = 0;
48+
if (process_xinclude)
49+
xinclude_options |= XML_PARSE_XINCLUDE;
50+
if (!generate_xinclude_nodes)
51+
xinclude_options |= XML_PARSE_NOXINCNODE;
52+
if (!fixup_base_uris)
53+
xinclude_options |= XML_PARSE_NOBASEFIX;
54+
55+
set_xinclude_options_internal(xinclude_options);
56+
}
57+
58+
void DomParser::get_xinclude_options(bool& process_xinclude,
59+
bool& generate_xinclude_nodes, bool& fixup_base_uris) const noexcept
60+
{
61+
const int xinclude_options = get_xinclude_options_internal();
62+
63+
process_xinclude = (xinclude_options & XML_PARSE_XINCLUDE) != 0;
64+
generate_xinclude_nodes = (xinclude_options & XML_PARSE_NOXINCNODE) == 0;
65+
fixup_base_uris = (xinclude_options & XML_PARSE_NOBASEFIX) == 0;
66+
}
67+
4168
void DomParser::parse_file(const std::string& filename)
4269
{
4370
release_underlying(); //Free any existing document.
@@ -120,6 +147,29 @@ void DomParser::parse_context()
120147
throw parse_error(error_str);
121148
}
122149

150+
check_xinclude_and_finish_parsing();
151+
}
152+
153+
void DomParser::check_xinclude_and_finish_parsing()
154+
{
155+
int set_options = 0;
156+
int clear_options = 0;
157+
get_parser_options(set_options, clear_options);
158+
159+
int options = get_xinclude_options_internal();
160+
// Turn on/off any xinclude options.
161+
options |= set_options;
162+
options &= ~clear_options;
163+
164+
if (options & XML_PARSE_XINCLUDE)
165+
{
166+
const int n_substitutions = xmlXIncludeProcessFlags(context_->myDoc, options);
167+
if (n_substitutions < 0)
168+
{
169+
throw parse_error("Couldn't process XInclude\n" + format_xml_error());
170+
}
171+
}
172+
123173
doc_ = new Document(context_->myDoc);
124174
// This is to indicate to release_underlying() that we took the
125175
// ownership on the doc.
@@ -197,14 +247,7 @@ void DomParser::parse_stream(std::istream& in)
197247
throw parse_error(error_str);
198248
}
199249

200-
doc_ = new Document(context_->myDoc);
201-
// This is to indicate to release_underlying() that we took the
202-
// ownership on the doc.
203-
context_->myDoc = nullptr;
204-
205-
// Free the parser context because it's not needed anymore,
206-
// but keep the document alive so people can navigate the DOM tree:
207-
Parser::release_underlying();
250+
check_xinclude_and_finish_parsing();
208251
}
209252

210253
void DomParser::release_underlying()

libxml++/parsers/domparser.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,31 @@ class DomParser : public Parser
3333
explicit DomParser(const std::string& filename, bool validate = false);
3434
~DomParser() override;
3535

36+
/** Set whether and how the parser will perform XInclude substitution.
37+
*
38+
* @newin{3,2}
39+
*
40+
* @param process_xinclude Do XInclude substitution on the XML document.
41+
* If <tt>false</tt>, the other parameters have no effect.
42+
* @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
43+
* @param fixup_base_uris Add or replace xml:base attributes in included element
44+
* nodes, if necessary to preserve the target of relative URIs.
45+
*/
46+
void set_xinclude_options(bool process_xinclude = true,
47+
bool generate_xinclude_nodes = true, bool fixup_base_uris = true) noexcept;
48+
49+
/** Get whether and how the parser will perform XInclude substitution.
50+
*
51+
* @newin{3,2}
52+
*
53+
* @param[out] process_xinclude Do XInclude substitution on the XML document.
54+
* @param[out] generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
55+
* @param[out] fixup_base_uris Add or replace xml:base attributes in included element
56+
* nodes, if necessary to preserve the target of relative URIs.
57+
*/
58+
void get_xinclude_options(bool& process_xinclude,
59+
bool& generate_xinclude_nodes, bool& fixup_base_uris) const noexcept;
60+
3661
/** Parse an XML document from a file.
3762
* If the parser already contains a document, that document and all its nodes
3863
* are deleted.
@@ -90,6 +115,7 @@ class DomParser : public Parser
90115

91116
protected:
92117
void parse_context();
118+
void check_xinclude_and_finish_parsing();
93119

94120
void release_underlying() override;
95121

libxml++/parsers/parser.cc

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,15 @@
1212
namespace xmlpp
1313
{
1414

15+
//TODO: When we can break ABI, remove Parser::Impl::xinclude_options_
16+
// and move all XInclude stuff to DomParser.
1517
struct Parser::Impl
1618
{
1719
Impl()
1820
:
1921
throw_messages_(true), validate_(false), substitute_entities_(false),
20-
include_default_attributes_(false), set_options_(0), clear_options_(0)
22+
include_default_attributes_(false), set_options_(0), clear_options_(0),
23+
xinclude_options_(0)
2124
{}
2225

2326
// Built gradually - used in an exception at the end of parsing.
@@ -32,6 +35,7 @@ struct Parser::Impl
3235
bool include_default_attributes_;
3336
int set_options_;
3437
int clear_options_;
38+
int xinclude_options_;
3539
};
3640

3741
Parser::Parser()
@@ -96,6 +100,16 @@ void Parser::get_parser_options(int& set_options, int& clear_options) noexcept
96100
clear_options = pimpl_->clear_options_;
97101
}
98102

103+
void Parser::set_xinclude_options_internal(int xinclude_options) noexcept
104+
{
105+
pimpl_->xinclude_options_ = xinclude_options;
106+
}
107+
108+
int Parser::get_xinclude_options_internal() const noexcept
109+
{
110+
return pimpl_->xinclude_options_;
111+
}
112+
99113
void Parser::initialize_context()
100114
{
101115
//Clear these temporary buffers:

libxml++/parsers/parser.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,11 @@ class Parser : public NonCopyable
196196
static void callback_error_or_warning(MsgType msg_type, void* ctx,
197197
const char* msg, va_list var_args);
198198

199+
//TODO: When we can break ABI, remove set/get_xinclude_options_internal()
200+
// and move all XInclude stuff to DomParser.
201+
void set_xinclude_options_internal(int xinclude_options) noexcept;
202+
int get_xinclude_options_internal() const noexcept;
203+
199204
_xmlParserCtxt* context_;
200205
std::unique_ptr<exception> exception_;
201206

0 commit comments

Comments
 (0)