From d65e27c765c1004f07b910c024f856eda549587d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 1 Aug 2024 11:54:33 +0900 Subject: [PATCH 01/21] Bump version --- lib/rexml/rexml.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 39e92a57..818bd01a 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -31,7 +31,7 @@ module REXML COPYRIGHT = "Copyright © 2001-2008 Sean Russell " DATE = "2008/019" - VERSION = "3.3.3" + VERSION = "3.3.4" REVISION = "" Copyright = COPYRIGHT From cb2137880df6e5906f67a0c3701ffac3eded798f Mon Sep 17 00:00:00 2001 From: takuya kodama Date: Thu, 1 Aug 2024 15:16:46 +0800 Subject: [PATCH 02/21] Add missing rexml/security require in rexml/parsers/baseparser.rb (#189) `REXML::Parser::BaseParser` uses `REXML::Security` since #187. But `rexml/parsers/baseparser.rb` doesn't require `rexml/security` explicitly. This doesn't cause a problem in normal usages because `require "rexml"` requires `rexml/security` implicitly. If an user requires specific parser such as `rexml/parsers/streamparser` explicitly, this causes a problem. We should require `rexml/security` explicitly in `rexml/parsers/baseparser.rb` explicitly because `REXML::Parser::BaseParser` uses `REXML::Security`. ## How to reproduce When `lib/rexml/parsers/baseparser.rb` is required directly, the `REXML::Security` module is not required. It causes the following error: ```ruby require "rexml/parsers/streamparser" require "rexml/streamlistener" class Listener include REXML::StreamListener end REXML::Parsers::StreamParser.new(">", Listener.new).parse ``` ```console $ ruby test.rb lib/rexml/parsers/baseparser.rb:558:in 'block in REXML::Parsers::BaseParser#unnormalize': uninitialized constant REXML::Parsers::BaseParser::Security (NameError) if sum > Security.entity_expansion_text_limit ^^^^^^^^ Did you mean? SecurityError from :54:in 'Array#each' from rexml/parsers/baseparser.rb:551:in 'REXML::Parsers::BaseParser#unnormalize' from rexml/parsers/streamparser.rb:39:in 'REXML::Parsers::StreamParser#parse' from test.rb:8:in '
' ``` --- lib/rexml/parsers/baseparser.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 44dc6580..28810bfa 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative '../parseexception' require_relative '../undefinednamespaceexception' +require_relative '../security' require_relative '../source' require 'set' require "strscan" From 911dca43f2a645bffbfcfb07d57f2aaf52d19733 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 1 Aug 2024 16:19:34 +0900 Subject: [PATCH 03/21] Add 3.3.4 entry --- NEWS.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/NEWS.md b/NEWS.md index 72318b7f..a924538e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,19 @@ # News +## 3.3.4 - 2024-08-01 {#version-3-3-4} + +### Fixes + + * Fixed a bug that `REXML::Security` isn't defined when + `REXML::Parsers::StreamParser` is used and + `rexml/parsers/streamparser` is only required. + * GH-189 + * Patch by takuya kodama. + +### Thanks + + * takuya kodama + ## 3.3.3 - 2024-08-01 {#version-3-3-3} ### Improvements From e3f747fb4fe30f5c890a4bea5b12dd72f595e6b3 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 1 Aug 2024 16:20:26 +0900 Subject: [PATCH 04/21] Bump version --- lib/rexml/rexml.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 818bd01a..bb804b0e 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -31,7 +31,7 @@ module REXML COPYRIGHT = "Copyright © 2001-2008 Sean Russell " DATE = "2008/019" - VERSION = "3.3.4" + VERSION = "3.3.5" REVISION = "" Copyright = COPYRIGHT From 1892770f3e32d75368ffad99b8e86d539786c213 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Mon, 12 Aug 2024 09:58:23 +0900 Subject: [PATCH 05/21] Fix calculation of Security.entity_expansion_text_limit in SAX/pull parsers (#195) GitHub: fix #193 ## [Why?] In SAX and pull parsers, the total value of rv.bytesize was checked, but the summing process was unnecessary. - Add Log ```patch diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 28810bf..5cfc089 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -556,6 +556,7 @@ module REXML re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/ rv.gsub!( re, entity_value ) sum += rv.bytesize +puts " rv.bytesize: #{rv.bytesize} sum: #{sum} > Security.entity_expansion_text_limit: #{Security.entity_expansion_text_limit} : #{rv}" if sum > Security.entity_expansion_text_limit raise "entity expansion has grown too large" end diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 7e0befe..cc68dbf 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -415,6 +415,7 @@ module REXML sum = 0 string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) { s = Text.expand($&, doctype, filter) +puts " s.bytesize: #{s.bytesize} sum + s.bytesize : #{sum + s.bytesize } > Security.entity_expansion_text_limit: #{Security.entity_expansion_text_limit} : #{s}" if sum + s.bytesize > Security.entity_expansion_text_limit raise "entity expansion has grown too large" else ``` - entity_expansion_text_limit.rb ```ruby $LOAD_PATH.unshift(File.expand_path("lib")) require 'rexml' require 'rexml/parsers/sax2parser' require 'rexml/parsers/pullparser' def dom_entity_expansion_count_check(xml) doc = REXML::Document.new(xml) doc.root.children.first.value puts "DOM: entity_expansion_count: #{doc.entity_expansion_count}" end def sax_entity_expansion_count_check(xml) sax = REXML::Parsers::SAX2Parser.new(xml) sax.parse puts "SAX: entity_expansion_count: #{sax.entity_expansion_count}" end def pull_entity_expansion_count_check(xml) parser = REXML::Parsers::PullParser.new(xml) while parser.has_next? parser.pull end puts "Pull: entity_expansion_count: #{parser.entity_expansion_count}" end xml = < ]> &a; XML dom_entity_expansion_count_check(xml) sax_entity_expansion_count_check(xml) pull_entity_expansion_count_check(xml) ``` ``` $ ruby entity_expansion_text_limit.rb s.bytesize: 10 sum + s.bytesize : 10 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx s.bytesize: 10 sum + s.bytesize : 20 > Security.entity_expansion_text_limit: 10240 : yyyyyyyyyy s.bytesize: 10 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : zzzzzzzzzz s.bytesize: 30 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz s.bytesize: 10 sum + s.bytesize : 10 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx s.bytesize: 10 sum + s.bytesize : 20 > Security.entity_expansion_text_limit: 10240 : yyyyyyyyyy s.bytesize: 10 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : zzzzzzzzzz s.bytesize: 30 sum + s.bytesize : 60 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz s.bytesize: 10 sum + s.bytesize : 10 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx s.bytesize: 10 sum + s.bytesize : 20 > Security.entity_expansion_text_limit: 10240 : yyyyyyyyyy s.bytesize: 10 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : zzzzzzzzzz s.bytesize: 30 sum + s.bytesize : 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz s.bytesize: 90 sum + s.bytesize : 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz DOM: entity_expansion_count: 13 rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 180 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 270 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz SAX: entity_expansion_count: 13 rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 180 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 270 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz Pull: entity_expansion_count: 13 ``` 90 bytes is the expected value, but SAX and Pull exceed 90 bytes due to unnecessary total processing. --- lib/rexml/parsers/baseparser.rb | 4 +--- test/test_document.rb | 20 ++++++++++++++++++++ test/test_pullparser.rb | 30 ++++++++++++++++++++++++++++++ test/test_sax.rb | 24 ++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 28810bfa..342f9482 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -548,15 +548,13 @@ def unnormalize( string, entities=nil, filter=nil ) } matches.collect!{|x|x[0]}.compact! if matches.size > 0 - sum = 0 matches.each do |entity_reference| unless filter and filter.include?(entity_reference) entity_value = entity( entity_reference, entities ) if entity_value re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/ rv.gsub!( re, entity_value ) - sum += rv.bytesize - if sum > Security.entity_expansion_text_limit + if rv.bytesize > Security.entity_expansion_text_limit raise "entity expansion has grown too large" end else diff --git a/test/test_document.rb b/test/test_document.rb index 0764631d..72ec3579 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -33,10 +33,12 @@ def test_new class EntityExpansionLimitTest < Test::Unit::TestCase def setup @default_entity_expansion_limit = REXML::Security.entity_expansion_limit + @default_entity_expansion_text_limit = REXML::Security.entity_expansion_text_limit end def teardown REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit end class GeneralEntityTest < self @@ -126,6 +128,24 @@ def test_with_default_entity doc.root.children.first.value end end + + def test_entity_expansion_text_limit + xml = <<-XML + + + + + + +]> +&a; + XML + + REXML::Security.entity_expansion_text_limit = 90 + doc = REXML::Document.new(xml) + assert_equal(90, doc.root.children.first.value.bytesize) + end end class ParameterEntityTest < self diff --git a/test/test_pullparser.rb b/test/test_pullparser.rb index 55205af8..827fad1d 100644 --- a/test/test_pullparser.rb +++ b/test/test_pullparser.rb @@ -159,10 +159,12 @@ def test_peek class EntityExpansionLimitTest < Test::Unit::TestCase def setup @default_entity_expansion_limit = REXML::Security.entity_expansion_limit + @default_entity_expansion_text_limit = REXML::Security.entity_expansion_text_limit end def teardown REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit end class GeneralEntityTest < self @@ -249,6 +251,34 @@ def test_with_default_entity end end end + + def test_entity_expansion_text_limit + source = <<-XML + + + + + +]> +&a; + XML + + REXML::Security.entity_expansion_text_limit = 90 + parser = REXML::Parsers::PullParser.new(source) + events = {} + element_name = '' + while parser.has_next? + event = parser.pull + case event.event_type + when :start_element + element_name = event[0] + when :text + events[element_name] = event[1] + end + end + assert_equal(90, events['member'].size) + end end end end diff --git a/test/test_sax.rb b/test/test_sax.rb index 5e3ad75b..f452de50 100644 --- a/test/test_sax.rb +++ b/test/test_sax.rb @@ -102,10 +102,12 @@ def test_sax2 class EntityExpansionLimitTest < Test::Unit::TestCase def setup @default_entity_expansion_limit = REXML::Security.entity_expansion_limit + @default_entity_expansion_text_limit = REXML::Security.entity_expansion_text_limit end def teardown REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit end class GeneralEntityTest < self @@ -182,6 +184,28 @@ def test_with_default_entity sax.parse end end + + def test_entity_expansion_text_limit + source = <<-XML + + + + + +]> +&a; + XML + + REXML::Security.entity_expansion_text_limit = 90 + sax = REXML::Parsers::SAX2Parser.new(source) + text_size = nil + sax.listen(:characters, ["member"]) do |text| + text_size = text.size + end + sax.parse + assert_equal(90, text_size) + end end end From 21d90cbba9a029f85146acbd66c3ce8630b1a608 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 12 Aug 2024 10:02:56 +0900 Subject: [PATCH 06/21] Add 3.3.5 entry --- NEWS.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/NEWS.md b/NEWS.md index a924538e..165b1c76 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,22 @@ # News +## 3.3.5 - 2024-08-12 {#version-3-3-5} + +### Fixes + + * Fixed a bug that `REXML::Security.entity_expansion_text_limit` + check has wrong text size calculation in SAX and pull parsers. + * GH-193 + * GH-195 + * Reported by Viktor Ivarsson. + * Patch by NAITOH Jun. + +### Thanks + + * Viktor Ivarsson + + * NAITOH Jun + ## 3.3.4 - 2024-08-01 {#version-3-3-4} ### Fixes From e14847cee53d26eb162ad786ba12e3cd7a86fce0 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 12 Aug 2024 10:03:34 +0900 Subject: [PATCH 07/21] Bump version --- lib/rexml/rexml.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index bb804b0e..99d574b3 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -31,7 +31,7 @@ module REXML COPYRIGHT = "Copyright © 2001-2008 Sean Russell " DATE = "2008/019" - VERSION = "3.3.5" + VERSION = "3.3.6" REVISION = "" Copyright = COPYRIGHT From 2f019f9b33594b561e1ef39c42fab1f2fda51891 Mon Sep 17 00:00:00 2001 From: Viktor Ivarsson Date: Fri, 16 Aug 2024 03:47:25 +0200 Subject: [PATCH 08/21] Improve `BaseParser#unnormalize` (#194) The current implementation of `#unnormalize` iterates over matched entity references that already has been substituted. With these changes we will reduce the number of redundant calls to `rv.gsub!`. * Reject filtered matches earlier in the loop * Improve `#unnormalize` by removing redundant calls to `rv.gsub!` * Improve `entity_expansion_limit` tests --- Example: ```ruby require "rexml/parsers/baseparser" entity_less_than = "<" entitiy_length = 100 filler_text = "A" filler_length = 100 feed = "#{entity_less_than * entitiy_length}#{filler_text * filler_length}" base_parser = REXML::Parsers::BaseParser.new("") base_parser.unnormalize(feed) # => "<" * 100 + "A" * 100 ``` Before this PR, the example above would require 100 iterations. After this PR, 1 iteration. --------- Co-authored-by: Sutou Kouhei --- lib/rexml/parsers/baseparser.rb | 53 ++++++++++++++++++++++++--------- test/test_pullparser.rb | 14 +++++---- test/test_sax.rb | 12 ++++---- 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 342f9482..b471feff 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -8,6 +8,22 @@ module REXML module Parsers + unless [].respond_to?(:tally) + module EnumerableTally + refine Enumerable do + def tally + counts = {} + each do |item| + counts[item] ||= 0 + counts[item] += 1 + end + counts + end + end + end + using EnumerableTally + end + if StringScanner::Version < "3.0.8" module StringScannerCaptures refine StringScanner do @@ -547,20 +563,29 @@ def unnormalize( string, entities=nil, filter=nil ) [Integer(m)].pack('U*') } matches.collect!{|x|x[0]}.compact! + if filter + matches.reject! do |entity_reference| + filter.include?(entity_reference) + end + end if matches.size > 0 - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - entity_value = entity( entity_reference, entities ) - if entity_value - re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/ - rv.gsub!( re, entity_value ) - if rv.bytesize > Security.entity_expansion_text_limit - raise "entity expansion has grown too large" - end - else - er = DEFAULT_ENTITIES[entity_reference] - rv.gsub!( er[0], er[2] ) if er + matches.tally.each do |entity_reference, n| + entity_expansion_count_before = @entity_expansion_count + entity_value = entity( entity_reference, entities ) + if entity_value + if n > 1 + entity_expansion_count_delta = + @entity_expansion_count - entity_expansion_count_before + record_entity_expansion(entity_expansion_count_delta * (n - 1)) + end + re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/ + rv.gsub!( re, entity_value ) + if rv.bytesize > Security.entity_expansion_text_limit + raise "entity expansion has grown too large" end + else + er = DEFAULT_ENTITIES[entity_reference] + rv.gsub!( er[0], er[2] ) if er end end rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' ) @@ -570,8 +595,8 @@ def unnormalize( string, entities=nil, filter=nil ) private - def record_entity_expansion - @entity_expansion_count += 1 + def record_entity_expansion(delta=1) + @entity_expansion_count += delta if @entity_expansion_count > Security.entity_expansion_limit raise "number of entity expansions exceeded, processing aborted." end diff --git a/test/test_pullparser.rb b/test/test_pullparser.rb index 827fad1d..dbde8779 100644 --- a/test/test_pullparser.rb +++ b/test/test_pullparser.rb @@ -206,21 +206,23 @@ def test_empty_value XML + REXML::Security.entity_expansion_limit = 100000 parser = REXML::Parsers::PullParser.new(source) - assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do - while parser.has_next? - parser.pull - end + while parser.has_next? + parser.pull end + assert_equal(11111, parser.entity_expansion_count) - REXML::Security.entity_expansion_limit = 100 + REXML::Security.entity_expansion_limit = @default_entity_expansion_limit parser = REXML::Parsers::PullParser.new(source) assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do while parser.has_next? parser.pull end end - assert_equal(101, parser.entity_expansion_count) + assert do + parser.entity_expansion_count > @default_entity_expansion_limit + end end def test_with_default_entity diff --git a/test/test_sax.rb b/test/test_sax.rb index f452de50..d31de183 100644 --- a/test/test_sax.rb +++ b/test/test_sax.rb @@ -147,17 +147,19 @@ def test_empty_value XML + REXML::Security.entity_expansion_limit = 100000 sax = REXML::Parsers::SAX2Parser.new(source) - assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do - sax.parse - end + sax.parse + assert_equal(11111, sax.entity_expansion_count) - REXML::Security.entity_expansion_limit = 100 + REXML::Security.entity_expansion_limit = @default_entity_expansion_limit sax = REXML::Parsers::SAX2Parser.new(source) assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do sax.parse end - assert_equal(101, sax.entity_expansion_count) + assert do + sax.entity_expansion_count > @default_entity_expansion_limit + end end def test_with_default_entity From 1c76dbbb7c5f001f4b931b8055e8e2d7578da760 Mon Sep 17 00:00:00 2001 From: Viktor Ivarsson Date: Sat, 17 Aug 2024 04:09:54 +0200 Subject: [PATCH 09/21] Fix RuntimeError in `REXML::Parsers::BaseParser` for valid feeds (#199) GitHub: fix GH-198 Change `#entity` to not match against default entities After this change, the following example will not raise a RuntimeError: ```ruby # rexml/refactor_entity_example.rb $LOAD_PATH.unshift(File.expand_path("lib")) require "rexml/parsers/baseparser" valid_feed = "<p>#{'A' * 10_240}</p>" base_parser = REXML::Parsers::BaseParser.new("") base_parser.unnormalize(valid_feed) # => "

" + "A" * 10_240 + "

" ``` Default entities now gets substituted by this block instead https://github.com/ruby/rexml/blob/e14847cee53d26eb162ad786ba12e3cd7a86fce0/lib/rexml/parsers/baseparser.rb#L560-L563 --------- Co-authored-by: Sutou Kouhei Co-authored-by: NAITOH Jun --- lib/rexml/parsers/baseparser.rb | 16 +++++++------- test/test_pullparser.rb | 30 ++++++++++++++++++++++++++ test/test_sax.rb | 24 +++++++++++++++++++++ test/test_stream.rb | 37 +++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 9 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index b471feff..c03f375f 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -521,15 +521,13 @@ def pull_event private :pull_event def entity( reference, entities ) - value = nil - value = entities[ reference ] if entities - if value - record_entity_expansion - else - value = DEFAULT_ENTITIES[ reference ] - value = value[2] if value - end - unnormalize( value, entities ) if value + return unless entities + + value = entities[ reference ] + return if value.nil? + + record_entity_expansion + unnormalize( value, entities ) end # Escapes all possible entities diff --git a/test/test_pullparser.rb b/test/test_pullparser.rb index dbde8779..005a106a 100644 --- a/test/test_pullparser.rb +++ b/test/test_pullparser.rb @@ -254,6 +254,36 @@ def test_with_default_entity end end + def test_with_only_default_entities + member_value = "<p>#{'A' * @default_entity_expansion_text_limit}</p>" + source = <<-XML + + +#{member_value} + + XML + + parser = REXML::Parsers::PullParser.new(source) + events = {} + element_name = '' + while parser.has_next? + event = parser.pull + case event.event_type + when :start_element + element_name = event[0] + when :text + events[element_name] = event[1] + end + end + + expected_value = "

#{'A' * @default_entity_expansion_text_limit}

" + assert_equal(expected_value, events['member'].strip) + assert_equal(0, parser.entity_expansion_count) + assert do + events['member'].bytesize > @default_entity_expansion_text_limit + end + end + def test_entity_expansion_text_limit source = <<-XML + +#{member_value} + + XML + + sax = REXML::Parsers::SAX2Parser.new(source) + text_value = nil + sax.listen(:characters, ["member"]) do |text| + text_value = text + end + sax.parse + + expected_value = "

#{'A' * @default_entity_expansion_text_limit}

" + assert_equal(expected_value, text_value.strip) + assert_equal(0, sax.entity_expansion_count) + assert do + text_value.bytesize > @default_entity_expansion_text_limit + end + end + def test_entity_expansion_text_limit source = <<-XML + +#{member_value} + + XML + + listener = MyListener.new + class << listener + attr_accessor :text_value + def text(text) + @text_value << text + end + end + listener.text_value = "" + REXML::Document.parse_stream(source, listener) + + expected_value = "

#{'A' * @default_entity_expansion_text_limit}

" + assert_equal(expected_value, listener.text_value.strip) + assert do + listener.text_value.bytesize > @default_entity_expansion_text_limit + end + end + end # For test_listener class RequestReader From c8110b4830c990453e167ccca934e65858308fa1 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sat, 17 Aug 2024 11:25:28 +0900 Subject: [PATCH 10/21] Fix to not allow parameter entity references at internal subsets (#191) ## Why? In the internal subset of DTD, references to parameter entities are not allowed within markup declarations. See: https://www.w3.org/TR/xml/#wfc-PEinInternalSubset > Well-formedness constraint: PEs in Internal Subset > In the internal DTD subset, parameter-entity references MUST NOT occur within markup declarations; they may occur where markup declarations can occur. (This does not apply to references that occur in external parameter entities or to the external subset.) --- lib/rexml/entity.rb | 52 ++-------------- lib/rexml/parsers/baseparser.rb | 3 + test/test_document.rb | 50 ---------------- test/test_entity.rb | 102 +++++++++++++++++++++++++------- 4 files changed, 89 insertions(+), 118 deletions(-) diff --git a/lib/rexml/entity.rb b/lib/rexml/entity.rb index 573db691..12bbad3f 100644 --- a/lib/rexml/entity.rb +++ b/lib/rexml/entity.rb @@ -12,6 +12,7 @@ class Entity < Child EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))" NDATADECL = "\\s+NDATA\\s+#{NAME}" PEREFERENCE = "%#{NAME};" + PEREFERENCE_RE = /#{PEREFERENCE}/um ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))} PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})" ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))" @@ -19,7 +20,7 @@ class Entity < Child GEDECL = "" ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um - attr_reader :name, :external, :ref, :ndata, :pubid + attr_reader :name, :external, :ref, :ndata, :pubid, :value # Create a new entity. Simple entities can be constructed by passing a # name, value to the constructor; this creates a generic, plain entity @@ -68,14 +69,11 @@ def Entity::matches? string end # Evaluates to the unnormalized value of this entity; that is, replacing - # all entities -- both %ent; and &ent; entities. This differs from - # +value()+ in that +value+ only replaces %ent; entities. + # &ent; entities. def unnormalized document.record_entity_expansion unless document.nil? - v = value() - return nil if v.nil? - @unnormalized = Text::unnormalize(v, parent) - @unnormalized + return nil if @value.nil? + @unnormalized = Text::unnormalize(@value, parent) end #once :unnormalized @@ -121,46 +119,6 @@ def to_s write rv rv end - - PEREFERENCE_RE = /#{PEREFERENCE}/um - # Returns the value of this entity. At the moment, only internal entities - # are processed. If the value contains internal references (IE, - # %blah;), those are replaced with their values. IE, if the doctype - # contains: - # - # - # then: - # doctype.entity('yada').value #-> "nanoo bar nanoo" - def value - @resolved_value ||= resolve_value - end - - def parent=(other) - @resolved_value = nil - super - end - - private - def resolve_value - return nil if @value.nil? - return @value unless @value.match?(PEREFERENCE_RE) - - matches = @value.scan(PEREFERENCE_RE) - rv = @value.clone - if @parent - sum = 0 - matches.each do |entity_reference| - entity_value = @parent.entity( entity_reference[0] ) - if sum + entity_value.bytesize > Security.entity_expansion_text_limit - raise "entity expansion has grown too large" - else - sum += entity_value.bytesize - end - rv.gsub!( /%#{entity_reference.join};/um, entity_value ) - end - end - rv - end end # This is a set of entity constants -- the ones defined in the XML diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index c03f375f..e38ff86e 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -141,6 +141,7 @@ class BaseParser } module Private + PEREFERENCE_PATTERN = /#{PEREFERENCE}/um TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um @@ -350,6 +351,8 @@ def pull_event match[4] = match[4][1..-2] # HREF match.delete_at(5) if match.size > 5 # Chop out NDATA decl # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ] + elsif Private::PEREFERENCE_PATTERN.match?(match[2]) + raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source) else match[2] = match[2][1..-2] match.pop if match.size == 4 diff --git a/test/test_document.rb b/test/test_document.rb index 72ec3579..25a8828f 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -147,56 +147,6 @@ def test_entity_expansion_text_limit assert_equal(90, doc.root.children.first.value.bytesize) end end - - class ParameterEntityTest < self - def test_have_value - xml = < - - - - - - - -]> - -EOF - - assert_raise(REXML::ParseException) do - REXML::Document.new(xml) - end - REXML::Security.entity_expansion_limit = 100 - assert_equal(100, REXML::Security.entity_expansion_limit) - assert_raise(REXML::ParseException) do - REXML::Document.new(xml) - end - end - - def test_empty_value - xml = < - - - - - - - -]> - -EOF - - REXML::Document.new(xml) - REXML::Security.entity_expansion_limit = 90 - assert_equal(90, REXML::Security.entity_expansion_limit) - assert_raise(REXML::ParseException) do - REXML::Document.new(xml) - end - end - end end def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source diff --git a/test/test_entity.rb b/test/test_entity.rb index a2b262f7..89f83894 100644 --- a/test/test_entity.rb +++ b/test/test_entity.rb @@ -59,8 +59,7 @@ def test_parse_entity def test_constructor one = [ %q{}, - %q{}, - %q{}, + %q{}, '', '' ] source = %q{ - - + ', + "a", + "B", + "B", + "B", + ], + [ + entity.to_s, + entity.name, + entity.value, + entity.normalized, + entity.unnormalized, + ]) + end + + def test_readers_without_reference + entity = REXML::Entity.new([:entitydecl, "a", "&b;"]) + assert_equal([ + '', + "a", + "&b;", + "&b;", + "&b;", + ], + [ + entity.to_s, + entity.name, + entity.value, + entity.normalized, + entity.unnormalized, + ]) + end + + def test_readers_with_nested_references + doctype = REXML::DocType.new('root') + doctype.add(REXML::Entity.new([:entitydecl, "a", "&b;"])) + doctype.add(REXML::Entity.new([:entitydecl, "b", "X"])) + assert_equal([ + "a", + "&b;", + "&b;", + "X", + "b", + "X", + "X", + "X", + ], + [ + doctype.entities["a"].name, + doctype.entities["a"].value, + doctype.entities["a"].normalized, + doctype.entities["a"].unnormalized, + doctype.entities["b"].name, + doctype.entities["b"].value, + doctype.entities["b"].normalized, + doctype.entities["b"].unnormalized, + ]) + end + + def test_parameter_entity_reference_forbidden_by_internal_subset_in_parser + source = ' ]>' + parser = REXML::Parsers::BaseParser.new(source) + exception = assert_raise(REXML::ParseException) do + while parser.has_next? + parser.pull + end + end + assert_equal(<<-DETAIL, exception.to_s) +Parameter entity references forbidden in internal subset: "%a;" +Line: 1 +Position: 54 +Last 80 unconsumed characters: + DETAIL + end + def test_entity_string_limit template = ' ]> $' len = 5120 # 5k per entity @@ -122,22 +198,6 @@ def test_entity_string_limit end end - def test_entity_string_limit_for_parameter_entity - template = ' ]>' - len = 5120 # 5k per entity - template.sub!(/\^/, "B" * len) - - # 10k is OK - entities = '%a;' * 2 # 5k entity * 2 = 10k - REXML::Document.new(template.sub(/\$/, entities)) - - # above 10k explodes - entities = '%a;' * 3 # 5k entity * 2 = 15k - assert_raise(REXML::ParseException) do - REXML::Document.new(template.sub(/\$/, entities)) - end - end - def test_raw source = ' @@ -161,7 +221,7 @@ def test_lazy_evaluation def test_entity_replacement source = %q{ - ]> + ]> &WhatHeSaid;} d = REXML::Document.new( source ) From 790ad5c8530d1b6f6ad7445c085a7403119c5150 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 17 Aug 2024 15:54:14 +0900 Subject: [PATCH 11/21] test: split duplicated attribute case and namespace conflict case --- test/test_core.rb | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/test/test_core.rb b/test/test_core.rb index e1fba8a7..b079c203 100644 --- a/test/test_core.rb +++ b/test/test_core.rb @@ -114,22 +114,35 @@ def test_attribute name4='test4'/>).join(' '), e.to_s end - def test_attribute_namespace_conflict + def test_attribute_duplicated # https://www.w3.org/TR/xml-names/#uniqAttrs message = <<-MESSAGE.chomp Duplicate attribute "a" -Line: 4 -Position: 140 +Line: 2 +Position: 24 Last 80 unconsumed characters: /> MESSAGE assert_raise(REXML::ParseException.new(message)) do Document.new(<<-XML) + + + + XML + end + end + + def test_attribute_namespace_conflict + # https://www.w3.org/TR/xml-names/#uniqAttrs + message = <<-MESSAGE.chomp +Namespace conflict in adding attribute "a": Prefix "n1" = "http://www.w3.org" and prefix "n2" = "http://www.w3.org" + MESSAGE + assert_raise(REXML::ParseException.new(message)) do + Document.new(<<-XML) - - + xmlns:n2="http://www.w3.org"> + XML end From 6422fa34494fd4145d7bc68fbbe9525d42becf62 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 17 Aug 2024 16:10:05 +0900 Subject: [PATCH 12/21] Use loop instead of recursive call for Element#root It's for performance and avoiding stack level too deep. --- lib/rexml/element.rb | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index a5808d7c..27132926 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -441,9 +441,14 @@ def root_node # Related: #root_node, #document. # def root - return elements[1] if self.kind_of? Document - return self if parent.kind_of? Document or parent.nil? - return parent.root + target = self + while target + return target.elements[1] if target.kind_of? Document + parent = target.parent + return target if parent.kind_of? Document or parent.nil? + target = parent + end + nil end # :call-seq: From fdbffe744b38811be8b1cf6a9eec3eea4d71c412 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 17 Aug 2024 16:14:19 +0900 Subject: [PATCH 13/21] Use loop instead of recursive call for Element#namespace It's for performance and avoiding stack level too deep. --- lib/rexml/element.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index 27132926..eb802165 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -624,8 +624,12 @@ def namespace(prefix=nil) else prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns' end - ns = attributes[ prefix ] - ns = parent.namespace(prefix) if ns.nil? and parent + ns = nil + target = self + while ns.nil? and target + ns = target.attributes[prefix] + target = target.parent + end ns = '' if ns.nil? and prefix == 'xmlns' return ns end From df3a0cc83013f3cde7b7c2044e3ce00bcad321cb Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 17 Aug 2024 16:18:58 +0900 Subject: [PATCH 14/21] test: fix indent --- test/parser/test_sax2.rb | 276 ++++++++++++++++---------------- test/parser/test_tree.rb | 48 +++--- test/parser/test_ultra_light.rb | 94 +++++------ 3 files changed, 209 insertions(+), 209 deletions(-) diff --git a/test/parser/test_sax2.rb b/test/parser/test_sax2.rb index 91d135f5..9cc76ffb 100644 --- a/test/parser/test_sax2.rb +++ b/test/parser/test_sax2.rb @@ -4,200 +4,200 @@ require "rexml/sax2listener" module REXMLTests -class TestSAX2Parser < Test::Unit::TestCase - class TestDocumentTypeDeclaration < self - private - def xml(internal_subset) - <<-XML + class TestSAX2Parser < Test::Unit::TestCase + class TestDocumentTypeDeclaration < self + private + def xml(internal_subset) + <<-XML - XML - end + XML + end - class TestEntityDeclaration < self - class Listener - include REXML::SAX2Listener - attr_reader :entity_declarations - def initialize - @entity_declarations = [] - end + class TestEntityDeclaration < self + class Listener + include REXML::SAX2Listener + attr_reader :entity_declarations + def initialize + @entity_declarations = [] + end - def entitydecl(declaration) - super - @entity_declarations << declaration + def entitydecl(declaration) + super + @entity_declarations << declaration + end end - end - private - def parse(internal_subset) - listener = Listener.new - parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset)) - parser.listen(listener) - parser.parse - listener.entity_declarations - end + private + def parse(internal_subset) + listener = Listener.new + parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset)) + parser.listen(listener) + parser.parse + listener.entity_declarations + end - class TestGeneralEntity < self - class TestValue < self - def test_double_quote - assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET)) + class TestGeneralEntity < self + class TestValue < self + def test_double_quote + assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET)) - INTERNAL_SUBSET - end + INTERNAL_SUBSET + end - def test_single_quote - assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET)) + def test_single_quote + assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET)) - INTERNAL_SUBSET + INTERNAL_SUBSET + end end - end - class TestExternlID < self - class TestSystem < self - def test_with_ndata - declaration = [ - "name", - "SYSTEM", "system-literal", - "NDATA", "ndata-name", - ] - assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) + class TestExternlID < self + class TestSystem < self + def test_with_ndata + declaration = [ + "name", + "SYSTEM", "system-literal", + "NDATA", "ndata-name", + ] + assert_equal([declaration], + parse(<<-INTERNAL_SUBSET)) + INTERNAL_SUBSET + end + + def test_without_ndata + declaration = [ + "name", + "SYSTEM", "system-literal", + ] + assert_equal([declaration], + parse(<<-INTERNAL_SUBSET)) + + INTERNAL_SUBSET + end + end + + class TestPublic < self + def test_with_ndata + declaration = [ + "name", + "PUBLIC", "public-literal", "system-literal", + "NDATA", "ndata-name", + ] + assert_equal([declaration], + parse(<<-INTERNAL_SUBSET)) + + INTERNAL_SUBSET + end + + def test_without_ndata + declaration = [ + "name", + "PUBLIC", "public-literal", "system-literal", + ] + assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) + + INTERNAL_SUBSET + end + end + end + end + + class TestParameterEntity < self + class TestValue < self + def test_double_quote + assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET)) + INTERNAL_SUBSET end - def test_without_ndata - declaration = [ - "name", - "SYSTEM", "system-literal", - ] - assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) - + def test_single_quote + assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET)) + INTERNAL_SUBSET end end - class TestPublic < self - def test_with_ndata + class TestExternlID < self + def test_system declaration = [ + "%", "name", - "PUBLIC", "public-literal", "system-literal", - "NDATA", "ndata-name", + "SYSTEM", "system-literal", ] assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) - + parse(<<-INTERNAL_SUBSET)) + INTERNAL_SUBSET end - def test_without_ndata + def test_public declaration = [ + "%", "name", "PUBLIC", "public-literal", "system-literal", ] assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) - + INTERNAL_SUBSET end end end end - class TestParameterEntity < self - class TestValue < self - def test_double_quote - assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET)) - - INTERNAL_SUBSET + class TestNotationDeclaration < self + class Listener + include REXML::SAX2Listener + attr_reader :notation_declarations + def initialize + @notation_declarations = [] end - def test_single_quote - assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET)) - - INTERNAL_SUBSET + def notationdecl(*declaration) + super + @notation_declarations << declaration end end + private + def parse(internal_subset) + listener = Listener.new + parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset)) + parser.listen(listener) + parser.parse + listener.notation_declarations + end + class TestExternlID < self def test_system - declaration = [ - "%", - "name", - "SYSTEM", "system-literal", - ] + declaration = ["name", "SYSTEM", nil, "system-literal"] assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) - - INTERNAL_SUBSET - end - - def test_public - declaration = [ - "%", - "name", - "PUBLIC", "public-literal", "system-literal", - ] - assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) - - INTERNAL_SUBSET - end - end - end - end - - class TestNotationDeclaration < self - class Listener - include REXML::SAX2Listener - attr_reader :notation_declarations - def initialize - @notation_declarations = [] - end - - def notationdecl(*declaration) - super - @notation_declarations << declaration - end - end - - private - def parse(internal_subset) - listener = Listener.new - parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset)) - parser.listen(listener) - parser.parse - listener.notation_declarations - end - - class TestExternlID < self - def test_system - declaration = ["name", "SYSTEM", nil, "system-literal"] - assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) + parse(<<-INTERNAL_SUBSET)) - INTERNAL_SUBSET - end + INTERNAL_SUBSET + end - def test_public - declaration = ["name", "PUBLIC", "public-literal", "system-literal"] - assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) + def test_public + declaration = ["name", "PUBLIC", "public-literal", "system-literal"] + assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) - INTERNAL_SUBSET + INTERNAL_SUBSET + end end - end - class TestPublicID < self - def test_literal - declaration = ["name", "PUBLIC", "public-literal", nil] - assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) + class TestPublicID < self + def test_literal + declaration = ["name", "PUBLIC", "public-literal", nil] + assert_equal([declaration], + parse(<<-INTERNAL_SUBSET)) - INTERNAL_SUBSET + INTERNAL_SUBSET + end end end end end end -end diff --git a/test/parser/test_tree.rb b/test/parser/test_tree.rb index 8a5d9d12..88bc075c 100644 --- a/test/parser/test_tree.rb +++ b/test/parser/test_tree.rb @@ -4,40 +4,40 @@ require "rexml/parsers/treeparser" module REXMLTests -class TestTreeParser < Test::Unit::TestCase - class TestInvalid < self - def test_unmatched_close_tag - xml = "" - exception = assert_raise(REXML::ParseException) do - parse(xml) - end - assert_equal(<<-MESSAGE, exception.to_s) + class TestTreeParser < Test::Unit::TestCase + class TestInvalid < self + def test_unmatched_close_tag + xml = "" + exception = assert_raise(REXML::ParseException) do + parse(xml) + end + assert_equal(<<-MESSAGE, exception.to_s) Missing end tag for 'root' (got 'not-root') Line: 1 Position: #{xml.bytesize} Last 80 unconsumed characters: - MESSAGE - end - - def test_no_close_tag - xml = "" - exception = assert_raise(REXML::ParseException) do - parse(xml) + MESSAGE end - assert_equal(<<-MESSAGE, exception.to_s) + + def test_no_close_tag + xml = "" + exception = assert_raise(REXML::ParseException) do + parse(xml) + end + assert_equal(<<-MESSAGE, exception.to_s) No close tag for /root Line: 1 Position: #{xml.bytesize} Last 80 unconsumed characters: - MESSAGE - end + MESSAGE + end - private - def parse(xml) - document = REXML::Document.new - parser = REXML::Parsers::TreeParser.new(xml, document) - parser.parse + private + def parse(xml) + document = REXML::Document.new + parser = REXML::Parsers::TreeParser.new(xml, document) + parser.parse + end end end end -end diff --git a/test/parser/test_ultra_light.rb b/test/parser/test_ultra_light.rb index b3f576ff..d1364d6a 100644 --- a/test/parser/test_ultra_light.rb +++ b/test/parser/test_ultra_light.rb @@ -3,66 +3,66 @@ require "rexml/parsers/ultralightparser" module REXMLTests -class TestUltraLightParser < Test::Unit::TestCase - class TestDocumentTypeDeclaration < self - def test_entity_declaration - assert_equal([ - [ - :start_doctype, - :parent, - "root", - "SYSTEM", - "urn:x-test", - nil, - [:entitydecl, "name", "value"] + class TestUltraLightParser < Test::Unit::TestCase + class TestDocumentTypeDeclaration < self + def test_entity_declaration + assert_equal([ + [ + :start_doctype, + :parent, + "root", + "SYSTEM", + "urn:x-test", + nil, + [:entitydecl, "name", "value"] + ], + [:start_element, :parent, "root", {}], ], - [:start_element, :parent, "root", {}], - ], - parse(<<-INTERNAL_SUBSET)) + parse(<<-INTERNAL_SUBSET)) - INTERNAL_SUBSET - end + INTERNAL_SUBSET + end - private - def xml(internal_subset) - <<-XML + private + def xml(internal_subset) + <<-XML - XML - end + XML + end - def parse(internal_subset) - parser = REXML::Parsers::UltraLightParser.new(xml(internal_subset)) - normalize(parser.parse) - end + def parse(internal_subset) + parser = REXML::Parsers::UltraLightParser.new(xml(internal_subset)) + normalize(parser.parse) + end - def normalize(root) - root.collect do |child| - normalize_child(child) + def normalize(root) + root.collect do |child| + normalize_child(child) + end end - end - def normalize_child(child) - tag = child.first - case tag - when :start_doctype - normalized_parent = :parent - normalized_doctype = child.dup - normalized_doctype[1] = normalized_parent - normalized_doctype - when :start_element - tag, _parent, name, attributes, *children = child - normalized_parent = :parent - normalized_children = children.collect do |sub_child| - normalize_child(sub_child) + def normalize_child(child) + tag = child.first + case tag + when :start_doctype + normalized_parent = :parent + normalized_doctype = child.dup + normalized_doctype[1] = normalized_parent + normalized_doctype + when :start_element + tag, _parent, name, attributes, *children = child + normalized_parent = :parent + normalized_children = children.collect do |sub_child| + normalize_child(sub_child) + end + [tag, normalized_parent, name, attributes, *normalized_children] + else + child end - [tag, normalized_parent, name, attributes, *normalized_children] - else - child end end end end -end From 6e00a14daf2f901df535eafe96cc94d43a957ffe Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 17 Aug 2024 16:20:50 +0900 Subject: [PATCH 15/21] test: fix indent --- test/parser/test_sax2.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/parser/test_sax2.rb b/test/parser/test_sax2.rb index 9cc76ffb..c2548907 100644 --- a/test/parser/test_sax2.rb +++ b/test/parser/test_sax2.rb @@ -177,12 +177,12 @@ def test_system assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) - INTERNAL_SUBSET - end + INTERNAL_SUBSET + end - def test_public - declaration = ["name", "PUBLIC", "public-literal", "system-literal"] - assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) + def test_public + declaration = ["name", "PUBLIC", "public-literal", "system-literal"] + assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) INTERNAL_SUBSET end From 35e1681a179c28d5b6ec97d4ab1c110e5ac00303 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 17 Aug 2024 16:21:19 +0900 Subject: [PATCH 16/21] test tree-parser: move common method to base class --- test/parser/test_tree.rb | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/test/parser/test_tree.rb b/test/parser/test_tree.rb index 88bc075c..cdd28d2c 100644 --- a/test/parser/test_tree.rb +++ b/test/parser/test_tree.rb @@ -5,6 +5,12 @@ module REXMLTests class TestTreeParser < Test::Unit::TestCase + private def parse(xml) + document = REXML::Document.new + parser = REXML::Parsers::TreeParser.new(xml, document) + parser.parse + end + class TestInvalid < self def test_unmatched_close_tag xml = "" @@ -31,13 +37,6 @@ def test_no_close_tag Last 80 unconsumed characters: MESSAGE end - - private - def parse(xml) - document = REXML::Document.new - parser = REXML::Parsers::TreeParser.new(xml, document) - parser.parse - end end end end From 2b47b161db19c38c5e45e36c2008c045543e976e Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 17 Aug 2024 17:05:29 +0900 Subject: [PATCH 17/21] parser: move duplicated end tag check to BaseParser --- lib/rexml/parsers/baseparser.rb | 4 ++++ lib/rexml/parsers/streamparser.rb | 8 -------- lib/rexml/parsers/treeparser.rb | 7 ------- test/parser/test_tree.rb | 2 +- 4 files changed, 5 insertions(+), 16 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index e38ff86e..093af36a 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -249,6 +249,10 @@ def pull_event if @document_status == :in_doctype raise ParseException.new("Malformed DOCTYPE: unclosed", @source) end + unless @tags.empty? + path = "/" + @tags.join("/") + raise ParseException.new("Missing end tag for '#{path}'", @source) + end return [ :end_document ] end return @stack.shift if @stack.size > 0 diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index fa3ac496..e2da2a7d 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -7,7 +7,6 @@ class StreamParser def initialize source, listener @listener = listener @parser = BaseParser.new( source ) - @tag_stack = [] end def add_listener( listener ) @@ -20,21 +19,14 @@ def parse event = @parser.pull case event[0] when :end_document - unless @tag_stack.empty? - tag_path = "/" + @tag_stack.join("/") - raise ParseException.new("Missing end tag for '#{tag_path}'", - @parser.source) - end return when :start_element - @tag_stack << event[1] attrs = event[2].each do |n, v| event[2][n] = @parser.unnormalize( v ) end @listener.tag_start( event[1], attrs ) when :end_element @listener.tag_end( event[1] ) - @tag_stack.pop when :text unnormalized = @parser.unnormalize( event[1] ) @listener.text( unnormalized ) diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index 0cb6f7cc..4565a406 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -15,7 +15,6 @@ def add_listener( listener ) end def parse - tag_stack = [] entities = nil begin while true @@ -23,19 +22,13 @@ def parse #STDERR.puts "TREEPARSER GOT #{event.inspect}" case event[0] when :end_document - unless tag_stack.empty? - raise ParseException.new("No close tag for #{@build_context.xpath}", - @parser.source, @parser) - end return when :start_element - tag_stack.push(event[1]) el = @build_context = @build_context.add_element( event[1] ) event[2].each do |key, value| el.attributes[key]=Attribute.new(key,value,self) end when :end_element - tag_stack.pop @build_context = @build_context.parent when :text if @build_context[-1].instance_of? Text diff --git a/test/parser/test_tree.rb b/test/parser/test_tree.rb index cdd28d2c..315be9c2 100644 --- a/test/parser/test_tree.rb +++ b/test/parser/test_tree.rb @@ -31,7 +31,7 @@ def test_no_close_tag parse(xml) end assert_equal(<<-MESSAGE, exception.to_s) -No close tag for /root +Missing end tag for '/root' Line: 1 Position: #{xml.bytesize} Last 80 unconsumed characters: From cb158582f18cebb3bf7b3f21f230e2fb17d435aa Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 17 Aug 2024 17:39:14 +0900 Subject: [PATCH 18/21] parser: keep the current namespaces instead of stack of Set It improves namespace resolution performance for deep element. --- lib/rexml/parsers/baseparser.rb | 45 +++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 093af36a..9ed032d3 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -181,7 +181,8 @@ def stream=( source ) @tags = [] @stack = [] @entities = [] - @nsstack = [] + @namespaces = {} + @namespaces_restore_stack = [] end def position @@ -285,7 +286,6 @@ def pull_event @source.position = start_position raise REXML::ParseException.new(message, @source) end - @nsstack.unshift(Set.new) name = parse_name(base_error_message) if @source.match(/\s*\[/um, true) id = [nil, nil, nil] @@ -379,7 +379,7 @@ def pull_event val = attdef[4] if val == "#FIXED " pairs[attdef[0]] = val if attdef[0] =~ /^xmlns:(.*)/ - @nsstack[0] << $1 + @namespaces[$1] = val end end end @@ -432,7 +432,7 @@ def pull_event # here explicitly. @source.ensure_buffer if @source.match("/", true) - @nsstack.shift + @namespaces_restore_stack.pop last_tag = @tags.pop md = @source.match(Private::CLOSE_PATTERN, true) if md and !last_tag @@ -477,18 +477,18 @@ def pull_event @document_status = :in_element @prefixes.clear @prefixes << md[2] if md[2] - @nsstack.unshift(curr_ns=Set.new) - attributes, closed = parse_attributes(@prefixes, curr_ns) + push_namespaces_restore + attributes, closed = parse_attributes(@prefixes) # Verify that all of the prefixes have been defined for prefix in @prefixes - unless @nsstack.find{|k| k.member?(prefix)} + unless @namespaces.key?(prefix) raise UndefinedNamespaceException.new(prefix,@source,self) end end if closed @closed = tag - @nsstack.shift + pop_namespaces_restore else if @tags.empty? and @have_root raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source) @@ -599,6 +599,31 @@ def unnormalize( string, entities=nil, filter=nil ) end private + def add_namespace(prefix, uri) + @namespaces_restore_stack.last[prefix] = @namespaces[prefix] + if uri.nil? + @namespaces.delete(prefix) + else + @namespaces[prefix] = uri + end + end + + def push_namespaces_restore + namespaces_restore = {} + @namespaces_restore_stack.push(namespaces_restore) + namespaces_restore + end + + def pop_namespaces_restore + namespaces_restore = @namespaces_restore_stack.pop + namespaces_restore.each do |prefix, uri| + if uri.nil? + @namespaces.delete(prefix) + else + @namespaces[prefix] = uri + end + end + end def record_entity_expansion(delta=1) @entity_expansion_count += delta @@ -727,7 +752,7 @@ def process_instruction [:processing_instruction, name, content] end - def parse_attributes(prefixes, curr_ns) + def parse_attributes(prefixes) attributes = {} closed = false while true @@ -770,7 +795,7 @@ def parse_attributes(prefixes, curr_ns) "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" raise REXML::ParseException.new( msg, @source, self) end - curr_ns << local_part + add_namespace(local_part, value) elsif prefix prefixes << prefix unless prefix == "xml" end From 6109e0183cecf4f8b587d76209716cb1bbcd6bd5 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Wed, 21 Aug 2024 15:23:00 +0900 Subject: [PATCH 19/21] Fix a bug that Stream parser doesn't expand the user-defined entity references for "text" (#200) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Why? Pull parser expands character references and predefined entity references, but doesn't expand user-defined entity references. ## Change - text_stream_unnormalize.rb ``` $LOAD_PATH.unshift(File.expand_path("lib")) require 'rexml/document' require 'rexml/parsers/sax2parser' require 'rexml/parsers/pullparser' require 'rexml/parsers/streamparser' require 'rexml/streamlistener' xml = < ]>&la;&lala;<P> <I> <B> Text </B> </I>test™ EOS class StListener include REXML::StreamListener def text(text) puts text end end puts "REXML(DOM)" REXML::Document.new(xml).elements.each("/root/*") {|element| puts element.text} puts "" puts "REXML(Pull)" parser = REXML::Parsers::PullParser.new(xml) while parser.has_next? event = parser.pull case event.event_type when :text puts event[1] end end puts "" puts "REXML(Stream)" parser = REXML::Parsers::StreamParser.new(xml, StListener.new).parse puts "" puts "REXML(SAX)" sax = REXML::Parsers::SAX2Parser.new(xml) sax.listen(:characters) {|x| puts x } sax.parse ``` ## Before (master) ``` $ ruby text_stream_unnormalize.rb REXML(DOM) 1234 --1234--

Text test™ REXML(Pull) 1234 --1234--

Text test™ REXML(Stream) &la; #<= This &lala; #<= This

Text test™ REXML(SAX) 1234 --1234--

Text test™ ``` ## After(This PR) ``` $ ruby text_stream_unnormalize.rb REXML(DOM) 1234 --1234--

Text test™ REXML(Pull) 1234 --1234--

Text test™ REXML(Stream) 1234 --1234--

Text test™ REXML(SAX) 1234 --1234--

Text test™ ``` --- lib/rexml/parsers/streamparser.rb | 8 +- test/test_stream.rb | 141 +++++++++++++++++++++++++++++- 2 files changed, 147 insertions(+), 2 deletions(-) diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index e2da2a7d..7781fe44 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -7,12 +7,17 @@ class StreamParser def initialize source, listener @listener = listener @parser = BaseParser.new( source ) + @entities = {} end def add_listener( listener ) @parser.add_listener( listener ) end + def entity_expansion_count + @parser.entity_expansion_count + end + def parse # entity string while true @@ -28,7 +33,7 @@ def parse when :end_element @listener.tag_end( event[1] ) when :text - unnormalized = @parser.unnormalize( event[1] ) + unnormalized = @parser.unnormalize( event[1], @entities ) @listener.text( unnormalized ) when :processing_instruction @listener.instruction( *event[1,2] ) @@ -40,6 +45,7 @@ def parse when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl @listener.send( event[0].to_s, *event[1..-1] ) when :entitydecl, :notationdecl + @entities[ event[1] ] = event[2] if event.size == 3 @listener.send( event[0].to_s, event[1..-1] ) when :externalentity entity_reference = event[1] diff --git a/test/test_stream.rb b/test/test_stream.rb index 615d497f..782066c2 100644 --- a/test/test_stream.rb +++ b/test/test_stream.rb @@ -87,6 +87,42 @@ def entity(content) assert_equal(["ISOLat2"], listener.entities) end + + def test_entity_replacement + source = <<-XML + + + +]>&la;&lala; + XML + + listener = MyListener.new + class << listener + attr_accessor :text_values + def text(text) + @text_values << text + end + end + listener.text_values = [] + REXML::Document.parse_stream(source, listener) + assert_equal(["1234", "--1234--"], listener.text_values) + end + + def test_characters_predefined_entities + source = '<P> <I> <B> Text </B> </I>' + + listener = MyListener.new + class << listener + attr_accessor :text_value + def text(text) + @text_value << text + end + end + listener.text_value = "" + REXML::Document.parse_stream(source, listener) + assert_equal("

Text ", listener.text_value) + end end class EntityExpansionLimitTest < Test::Unit::TestCase @@ -100,6 +136,81 @@ def teardown REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit end + def test_have_value + source = <<-XML + + + + + + +]> + +&a; + + XML + + assert_raise(RuntimeError.new("entity expansion has grown too large")) do + REXML::Document.parse_stream(source, MyListener.new) + end + end + + def test_empty_value + source = <<-XML + + + + + + +]> + +&a; + + XML + + listener = MyListener.new + REXML::Security.entity_expansion_limit = 100000 + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.parse + assert_equal(11111, parser.entity_expansion_count) + + REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + parser = REXML::Parsers::StreamParser.new( source, listener ) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + parser.parse + end + assert do + parser.entity_expansion_count > @default_entity_expansion_limit + end + end + + def test_with_default_entity + source = <<-XML + + + +]> + +&a; +&a2; +< + + XML + + listener = MyListener.new + REXML::Security.entity_expansion_limit = 4 + REXML::Document.parse_stream(source, listener) + + REXML::Security.entity_expansion_limit = 3 + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + REXML::Document.parse_stream(source, listener) + end + end + def test_with_only_default_entities member_value = "<p>#{'A' * @default_entity_expansion_text_limit}</p>" source = <<-XML @@ -117,14 +228,42 @@ def text(text) end end listener.text_value = "" - REXML::Document.parse_stream(source, listener) + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.parse expected_value = "

#{'A' * @default_entity_expansion_text_limit}

" assert_equal(expected_value, listener.text_value.strip) + assert_equal(0, parser.entity_expansion_count) assert do listener.text_value.bytesize > @default_entity_expansion_text_limit end end + + def test_entity_expansion_text_limit + source = <<-XML + + + + + +]> +&a; + XML + + listener = MyListener.new + class << listener + attr_accessor :text_value + def text(text) + @text_value << text + end + end + listener.text_value = "" + REXML::Security.entity_expansion_text_limit = 90 + REXML::Document.parse_stream(source, listener) + + assert_equal(90, listener.text_value.size) + end end # For test_listener From 7cb5eaeb221c322b9912f724183294d8ce96bae3 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 17 Aug 2024 17:45:52 +0900 Subject: [PATCH 20/21] parser tree: improve namespace conflicted attribute check performance It was slow for deep element. Reported by l33thaxor. Thanks!!! --- lib/rexml/element.rb | 11 ----------- lib/rexml/parsers/baseparser.rb | 15 +++++++++++++++ test/parse/test_element.rb | 14 ++++++++++++++ test/test_core.rb | 4 ++++ 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index eb802165..4e3a60b9 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -2384,17 +2384,6 @@ def []=( name, value ) elsif old_attr.kind_of? Hash old_attr[value.prefix] = value elsif old_attr.prefix != value.prefix - # Check for conflicting namespaces - if value.prefix != "xmlns" and old_attr.prefix != "xmlns" - old_namespace = old_attr.namespace - new_namespace = value.namespace - if old_namespace == new_namespace - raise ParseException.new( - "Namespace conflict in adding attribute \"#{value.name}\": "+ - "Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+ - "prefix \"#{value.prefix}\" = \"#{new_namespace}\"") - end - end store value.name, {old_attr.prefix => old_attr, value.prefix => value} else diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 9ed032d3..d11c2766 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -754,6 +754,7 @@ def process_instruction def parse_attributes(prefixes) attributes = {} + expanded_names = {} closed = false while true if @source.match(">", true) @@ -805,6 +806,20 @@ def parse_attributes(prefixes) raise REXML::ParseException.new(msg, @source, self) end + unless prefix == "xmlns" + uri = @namespaces[prefix] + expanded_name = [uri, local_part] + existing_prefix = expanded_names[expanded_name] + if existing_prefix + message = "Namespace conflict in adding attribute " + + "\"#{local_part}\": " + + "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " + + "prefix \"#{prefix}\" = \"#{uri}\"" + raise REXML::ParseException.new(message, @source, self) + end + expanded_names[expanded_name] = prefix + end + attributes[name] = value else message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>" diff --git a/test/parse/test_element.rb b/test/parse/test_element.rb index 2b0746ea..ab4818da 100644 --- a/test/parse/test_element.rb +++ b/test/parse/test_element.rb @@ -131,5 +131,19 @@ def test_linear_performance_attribute_value_gt REXML::Document.new('" * n + '">') end end + + def test_linear_performance_deep_same_name_attributes + seq = [100, 500, 1000, 1500, 2000] + assert_linear_performance(seq, rehearsal: 10) do |n| + xml = <<-XML + + +#{"\n" * n} +#{"\n" * n} + + XML + REXML::Document.new(xml) + end + end end end diff --git a/test/test_core.rb b/test/test_core.rb index b079c203..48666c86 100644 --- a/test/test_core.rb +++ b/test/test_core.rb @@ -136,6 +136,10 @@ def test_attribute_namespace_conflict # https://www.w3.org/TR/xml-names/#uniqAttrs message = <<-MESSAGE.chomp Namespace conflict in adding attribute "a": Prefix "n1" = "http://www.w3.org" and prefix "n2" = "http://www.w3.org" +Line: 4 +Position: 140 +Last 80 unconsumed characters: +/> MESSAGE assert_raise(REXML::ParseException.new(message)) do Document.new(<<-XML) From 95871f399eda642a022b03550479b7994895c742 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 22 Aug 2024 09:54:49 +0900 Subject: [PATCH 21/21] Add 3.3.6 entry --- NEWS.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/NEWS.md b/NEWS.md index 165b1c76..6c290678 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,44 @@ # News +## 3.3.6 - 2024-08-22 {#version-3-3-6} + +### Improvements + + * Removed duplicated entity expansions for performance. + * GH-194 + * Patch by Viktor Ivarsson. + + * Improved namespace conflicted attribute check performance. It was + too slow for deep elements. + * Reported by l33thaxor. + +### Fixes + + * Fixed a bug that default entity expansions are counted for + security check. Default entity expansions should not be counted + because they don't have a security risk. + * GH-198 + * GH-199 + * Patch Viktor Ivarsson + + * Fixed a parser bug that parameter entity references in internal + subsets are expanded. It's not allowed in the XML specification. + * GH-191 + * Patch by NAITOH Jun. + + * Fixed a stream parser bug that user-defined entity references in + text aren't expanded. + * GH-200 + * Patch by NAITOH Jun. + +### Thanks + + * Viktor Ivarsson + + * NAITOH Jun + + * l33thaxor + ## 3.3.5 - 2024-08-12 {#version-3-3-5} ### Fixes