Skip to content

Commit eb8f4e8

Browse files
committed
Handle some illegal characters in hex string
Do not throw exception when hex strings are in the wrong format Currently pdf.js is throwing an exception for the following hex string: `<7 0 2 15 5 2 2 2 4 3 2 4>` The issue is that the 15 is not a valid hex character so pdf.js ends up throwing an exception. This diff changes the parser to process the above hex string as follow: `70 21 55 2 24 32` (Note: the final 4 of the hex string is ignored) replicating the behaviour of MuPDF, and doesn't throw an exception.
1 parent 7d9938d commit eb8f4e8

File tree

2 files changed

+35
-18
lines changed

2 files changed

+35
-18
lines changed

src/parser.js

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -460,28 +460,34 @@ var Lexer = (function LexerClosure() {
460460
getHexString: function Lexer_getHexString(ch) {
461461
var str = '';
462462
var stream = this.stream;
463-
for (;;) {
463+
var isFirstHex = true;
464+
var firstDigit;
465+
var secondDigit;
466+
while (true) {
464467
ch = stream.getChar();
465-
if (ch == '>') {
466-
break;
467-
}
468468
if (!ch) {
469469
warn('Unterminated hex string');
470470
break;
471-
}
472-
if (specialChars[ch.charCodeAt(0)] != 1) {
473-
var x, x2;
474-
if ((x = toHexDigit(ch)) == -1)
475-
error('Illegal character in hex string: ' + ch);
476-
477-
ch = stream.getChar();
478-
while (specialChars[ch.charCodeAt(0)] == 1)
479-
ch = stream.getChar();
480-
481-
if ((x2 = toHexDigit(ch)) == -1)
482-
error('Illegal character in hex string: ' + ch);
483-
484-
str += String.fromCharCode((x << 4) | x2);
471+
} else if (ch === '>') {
472+
break;
473+
} else if (specialChars[ch.charCodeAt(0)] === 1) {
474+
continue;
475+
} else {
476+
if (isFirstHex) {
477+
firstDigit = toHexDigit(ch);
478+
if (firstDigit === -1) {
479+
warn("Ignoring invalid character '" + ch + "' in hex string");
480+
continue;
481+
}
482+
} else {
483+
secondDigit = toHexDigit(ch);
484+
if (secondDigit === -1) {
485+
warn("Ignoring invalid character '" + ch + "' in hex string");
486+
continue;
487+
}
488+
str += String.fromCharCode((firstDigit << 4) | secondDigit);
489+
}
490+
isFirstHex = !isFirstHex;
485491
}
486492
}
487493
return str;

test/unit/parser_spec.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,17 @@ describe('parser', function() {
1212

1313
expect(result).toEqual(11.234);
1414
});
15+
16+
it('should not throw exception on bad input', function() {
17+
// '8 0 2 15 5 2 2 2 4 3 2 4'
18+
// should be parsed as
19+
// '80 21 55 22 24 32'
20+
var input = new StringStream('7 0 2 15 5 2 2 2 4 3 2 4>');
21+
var lexer = new Lexer(input);
22+
var result = lexer.getHexString('<');
23+
24+
expect(result).toEqual('p!U"$2');
25+
});
1526
});
1627
});
1728

0 commit comments

Comments
 (0)