diff --git a/src/constants/string_mappings.js b/src/constants/string_mappings.js
deleted file mode 100644
index 0fbaee7d74e..00000000000
--- a/src/constants/string_mappings.js
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
-* Copyright 2012-2018, Plotly, Inc.
-* All rights reserved.
-*
-* This source code is licensed under the MIT license found in the
-* LICENSE file in the root directory of this source tree.
-*/
-
-
-'use strict';
-
-// N.B. HTML entities are listed without the leading '&' and trailing ';'
-// https://www.freeformatter.com/html-entities.html
-
-module.exports = {
- entityToUnicode: {
- 'mu': 'μ',
- '#956': 'μ',
-
- 'amp': '&',
- '#28': '&',
-
- 'lt': '<',
- '#60': '<',
-
- 'gt': '>',
- '#62': '>',
-
- 'nbsp': ' ',
- '#160': ' ',
-
- 'times': '×',
- '#215': '×',
-
- 'plusmn': '±',
- '#177': '±',
-
- 'deg': '°',
- '#176': '°'
- }
-};
diff --git a/src/lib/html2unicode.js b/src/lib/html2unicode.js
index 2b030fa1155..a9ec30b2a4f 100644
--- a/src/lib/html2unicode.js
+++ b/src/lib/html2unicode.js
@@ -10,7 +10,7 @@
'use strict';
var toSuperScript = require('superscript-text');
-var stringMappings = require('../constants/string_mappings');
+var fixEntities = require('./svg_text_utils').convertEntities;
function fixSuperScript(x) {
var idx = 0;
@@ -33,28 +33,6 @@ function stripTags(x) {
return x.replace(/\<.*\>/g, '');
}
-function fixEntities(x) {
- var entityToUnicode = stringMappings.entityToUnicode;
- var idx = 0;
-
- while((idx = x.indexOf('&', idx)) >= 0) {
- var nidx = x.indexOf(';', idx);
- if(nidx < idx) {
- idx += 1;
- continue;
- }
-
- var entity = entityToUnicode[x.slice(idx + 1, nidx)];
- if(entity) {
- x = x.slice(0, idx) + entity + x.slice(nidx + 1);
- } else {
- x = x.slice(0, idx) + x.slice(nidx + 1);
- }
- }
-
- return x;
-}
-
function convertHTMLToUnicode(html) {
return '' +
fixEntities(
diff --git a/src/lib/svg_text_utils.js b/src/lib/svg_text_utils.js
index fc3f718db16..b320c560183 100644
--- a/src/lib/svg_text_utils.js
+++ b/src/lib/svg_text_utils.js
@@ -15,7 +15,6 @@ var d3 = require('d3');
var Lib = require('../lib');
var xmlnsNamespaces = require('../constants/xmlns_namespaces');
-var stringMappings = require('../constants/string_mappings');
var LINE_SPACING = require('../constants/alignment').LINE_SPACING;
// text converter
@@ -223,13 +222,6 @@ var PROTOCOLS = ['http:', 'https:', 'mailto:', '', undefined, ':'];
var STRIP_TAGS = new RegExp('?(' + Object.keys(TAG_STYLES).join('|') + ')( [^>]*)?/?>', 'g');
-var ENTITY_TO_UNICODE = Object.keys(stringMappings.entityToUnicode).map(function(k) {
- return {
- regExp: new RegExp('&' + k + ';', 'g'),
- sub: stringMappings.entityToUnicode[k]
- };
-});
-
var NEWLINES = /(\r\n?|\n)/g;
var SPLIT_TAGS = /(<[^<>]*>)/;
@@ -254,6 +246,14 @@ var BR_TAG = /
/i;
*
* Because we hack in other attributes with style (sub & sup), drop any trailing
* semicolon in user-supplied styles so we can consistently append the tag-dependent style
+ *
+ * These are for tag attributes; Chrome anyway will convert entities in
+ * attribute values, but not in attribute names
+ * you can test this by for example:
+ * > p = document.createElement('p')
+ * > p.innerHTML = 'Hi'
+ * > p.innerHTML
+ * <- 'Hi'
*/
var STYLEMATCH = /(^|[\s"'])style\s*=\s*("([^"]*);?"|'([^']*);?')/i;
var HREFMATCH = /(^|[\s"'])href\s*=\s*("([^"]*)"|'([^']*)')/i;
@@ -265,7 +265,8 @@ var POPUPMATCH = /(^|[\s"'])popup\s*=\s*("([\w=,]*)"|'([\w=,]*)')/i;
function getQuotedMatch(_str, re) {
if(!_str) return null;
var match = _str.match(re);
- return match && (match[3] || match[4]);
+ var result = match && (match[3] || match[4]);
+ return result && convertEntities(result);
}
var COLORMATCH = /(^|;)\s*color:/;
@@ -276,19 +277,70 @@ exports.plainText = function(_str) {
return (_str || '').replace(STRIP_TAGS, ' ');
};
-function replaceFromMapObject(_str, list) {
- if(!_str) return '';
+/*
+ * N.B. HTML entities are listed without the leading '&' and trailing ';'
+ * https://www.freeformatter.com/html-entities.html
+ *
+ * FWIW if we wanted to support the full set, it has 2261 entries:
+ * https://www.w3.org/TR/html5/entities.json
+ * though I notice that some of these are duplicates and/or are missing ";"
+ * eg: "&", "&", "&", and "&" all map to "&"
+ * We no longer need to include numeric entities here, these are now handled
+ * by String.fromCodePoint/fromCharCode
+ *
+ * Anyway the only ones that are really important to allow are the HTML special
+ * chars <, >, and &, because these ones can trigger special processing if not
+ * replaced by the corresponding entity.
+ */
+var entityToUnicode = {
+ mu: 'μ',
+ amp: '&',
+ lt: '<',
+ gt: '>',
+ nbsp: ' ',
+ times: '×',
+ plusmn: '±',
+ deg: '°'
+};
- for(var i = 0; i < list.length; i++) {
- var item = list[i];
- _str = _str.replace(item.regExp, item.sub);
- }
+// NOTE: in general entities can contain uppercase too (so [a-zA-Z]) but all the
+// ones we support use only lowercase. If we ever change that, update the regex.
+var ENTITY_MATCH = /&(#\d+|#x[\da-fA-F]+|[a-z]+);/g;
+function convertEntities(_str) {
+ return _str.replace(ENTITY_MATCH, function(fullMatch, innerMatch) {
+ var outChar;
+ if(innerMatch.charAt(0) === '#') {
+ // cannot use String.fromCodePoint in IE
+ outChar = fromCodePoint(
+ innerMatch.charAt(1) === 'x' ?
+ parseInt(innerMatch.substr(2), 16) :
+ parseInt(innerMatch.substr(1), 10)
+ );
+ }
+ else outChar = entityToUnicode[innerMatch];
- return _str;
+ // as in regular HTML, if we didn't decode the entity just
+ // leave the raw text in place.
+ return outChar || fullMatch;
+ });
}
-
-function convertEntities(_str) {
- return replaceFromMapObject(_str, ENTITY_TO_UNICODE);
+exports.convertEntities = convertEntities;
+
+function fromCodePoint(code) {
+ // Don't allow overflow. In Chrome this turns into � but I feel like it's
+ // more useful to just not convert it at all.
+ if(code > 0x10FFFF) return;
+ var stringFromCodePoint = String.fromCodePoint;
+ if(stringFromCodePoint) return stringFromCodePoint(code);
+
+ // IE doesn't have String.fromCodePoint
+ // see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint
+ var stringFromCharCode = String.fromCharCode;
+ if(code <= 0xFFFF) return stringFromCharCode(code);
+ return stringFromCharCode(
+ (code >> 10) + 0xD7C0,
+ (code % 0x400) + 0xDC00
+ );
}
/*
@@ -302,15 +354,14 @@ function convertEntities(_str) {
* somewhat differently if it does, so just keep track of this when it happens.
*/
function buildSVGText(containerNode, str) {
- str = convertEntities(str)
- /*
- * Normalize behavior between IE and others wrt newlines and whitespace:pre
- * this combination makes IE barf https://github.com/plotly/plotly.js/issues/746
- * Chrome and FF display \n, \r, or \r\n as a space in this mode.
- * I feel like at some point we turned these into
but currently we don't so
- * I'm just going to cement what we do now in Chrome and FF
- */
- .replace(NEWLINES, ' ');
+ /*
+ * Normalize behavior between IE and others wrt newlines and whitespace:pre
+ * this combination makes IE barf https://github.com/plotly/plotly.js/issues/746
+ * Chrome and FF display \n, \r, or \r\n as a space in this mode.
+ * I feel like at some point we turned these into
but currently we don't so
+ * I'm just going to cement what we do now in Chrome and FF
+ */
+ str = str.replace(NEWLINES, ' ');
var hasLink = false;
@@ -435,7 +486,7 @@ function buildSVGText(containerNode, str) {
newLine();
}
else if(tagStyle === undefined) {
- addTextNode(currentNode, parti);
+ addTextNode(currentNode, convertEntities(parti));
}
else {
// tag - open or close
diff --git a/test/jasmine/tests/svg_text_utils_test.js b/test/jasmine/tests/svg_text_utils_test.js
index c950f728add..4720c27369a 100644
--- a/test/jasmine/tests/svg_text_utils_test.js
+++ b/test/jasmine/tests/svg_text_utils_test.js
@@ -8,6 +8,16 @@ describe('svg+text utils', function() {
describe('convertToTspans', function() {
+ var stringFromCodePoint;
+
+ beforeAll(function() {
+ stringFromCodePoint = String.fromCodePoint;
+ });
+
+ afterEach(function() {
+ String.fromCodePoint = stringFromCodePoint;
+ });
+
function mockTextSVGElement(txt) {
return d3.select('body')
.append('svg')
@@ -300,16 +310,78 @@ describe('svg+text utils', function() {
'100 × 20 ± 0.5 °'
);
- expect(node.text()).toEqual('100μ & < 10 > 0 100 × 20 ± 0.5 °');
+ expect(node.text()).toBe('100μ & < 10 > 0 100 × 20 ± 0.5 °');
});
it('decodes some HTML entities in text (number case)', function() {
var node = mockTextSVGElement(
- '100μ < 10 > 0 ' +
+ '100μ & < 10 > 0 ' +
'100 × 20 ± 0.5 °'
);
- expect(node.text()).toEqual('100μ & < 10 > 0 100 × 20 ± 0.5 °');
+ expect(node.text()).toBe('100μ & < 10 > 0 100 × 20 ± 0.5 °');
+ });
+
+ it('decodes arbitrary decimal and hex number entities', function() {
+ var i = 0;
+ for(var n = 33; n < 0x10FFFF; n = Math.round(n * 1.03)) {
+ var node = mockTextSVGElement(
+ '' + n.toString(16) +
+ '; = ' + n.toString() +
+ '; = ' + n.toString(16).toUpperCase() + ';'
+ );
+ var char = String.fromCodePoint(n);
+ expect(node.text()).toBe(char + ' = ' + char + ' = ' + char, n);
+ i++;
+ }
+ // not really necessary to assert this, but we tested 355 characters,
+ // weighted toward the low end but continuing all the way to the
+ // end of the unicode definition
+ expect(i).toBe(355);
+ });
+
+ it('decodes arbitrary decimal and hex number entities (IE case)', function() {
+ // IE does not have String.fromCodePoint
+ String.fromCodePoint = undefined;
+ expect(String.fromCodePoint).toBeUndefined();
+
+ var i = 0;
+ for(var n = 33; n < 0x10FFFF; n = Math.round(n * 1.03)) {
+ var node = mockTextSVGElement(
+ '' + n.toString(16) +
+ '; = ' + n.toString() +
+ '; = ' + n.toString(16).toUpperCase() + ';'
+ );
+ var char = stringFromCodePoint(n);
+ expect(node.text()).toBe(char + ' = ' + char + ' = ' + char, n);
+ i++;
+ }
+ // not really necessary to assert this, but we tested 355 characters,
+ // weighted toward the low end but continuing all the way to the
+ // end of the unicode definition
+ expect(i).toBe(355);
+ });
+
+ it('does not decode entities prematurely', function() {
+ var testCases = [
+ '<b>not boldnot bold</b>',
+ '',
+ '<b>not bold</b>'
+ ];
+ testCases.forEach(function(testCase) {
+ var node = mockTextSVGElement(testCase);
+
+ expect(node.html()).toBe(
+ '<b>not bold</b>', testCase
+ );
+ });
+
+ var controlNode = mockTextSVGElement('bold');
+ expect(controlNode.html()).toBe(
+ 'bold'
+ );
});
it('supports superscript by itself', function() {