Skip to content

Commit aac622a

Browse files
committed
Merge pull request mozilla#2567 from vyv03354/master
Supports 'H/V', 'EUC-H/V' and '90ms*' CMaps
2 parents a3dd009 + 2ef50c5 commit aac622a

File tree

6 files changed

+81
-5
lines changed

6 files changed

+81
-5
lines changed

src/fonts.js

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,48 @@ var symbolsFonts = {
401401
'Dingbats': true, 'Symbol': true, 'ZapfDingbats': true
402402
};
403403

404+
var CMapConverterList = {
405+
'H': jis7ToUnicode,
406+
'V': jis7ToUnicode,
407+
'EUC-H': eucjpToUnicode,
408+
'EUC-V': eucjpToUnicode,
409+
'90ms-RKSJ-H': sjisToUnicode,
410+
'90ms-RKSJ-V': sjisToUnicode,
411+
'90msp-RKSJ-H': sjisToUnicode,
412+
'90msp-RKSJ-V': sjisToUnicode
413+
};
414+
415+
var decodeBytes;
416+
if (typeof TextDecoder !== 'undefined') {
417+
decodeBytes = function(bytes, encoding) {
418+
return new TextDecoder(encoding).decode(bytes);
419+
};
420+
} else if (typeof FileReaderSync !== 'undefined') {
421+
decodeBytes = function(bytes, encoding) {
422+
return new FileReaderSync().readAsText(new Blob([bytes]), encoding);
423+
};
424+
} else {
425+
// Clear the list so that decodeBytes will never be called.
426+
CMapConverterList = {};
427+
}
428+
429+
function jis7ToUnicode(str) {
430+
var bytes = stringToBytes(str);
431+
var length = bytes.length;
432+
for (var i = 0; i < length; ++i) {
433+
bytes[i] |= 0x80;
434+
}
435+
return decodeBytes(bytes, 'euc-jp');
436+
}
437+
438+
function eucjpToUnicode(str) {
439+
return decodeBytes(stringToBytes(str), 'euc-jp');
440+
}
441+
442+
function sjisToUnicode(str) {
443+
return decodeBytes(stringToBytes(str), 'shift_jis');
444+
}
445+
404446
// Some characters, e.g. copyrightserif, mapped to the private use area and
405447
// might not be displayed using standard fonts. Mapping/hacking well-known chars
406448
// to the similar equivalents in the normal characters range.
@@ -2282,6 +2324,7 @@ var Font = (function FontClosure() {
22822324

22832325
// Trying to fix encoding using glyph CIDSystemInfo.
22842326
this.loadCidToUnicode(properties);
2327+
this.cidEncoding = properties.cidEncoding;
22852328

22862329
if (properties.toUnicode)
22872330
this.toUnicode = properties.toUnicode;
@@ -4128,8 +4171,8 @@ var Font = (function FontClosure() {
41284171
}
41294172

41304173
var cidEncoding = properties.cidEncoding;
4131-
if (cidEncoding && cidEncoding.indexOf('Uni') === 0) {
4132-
// input is already Unicode for Uni* CMap encodings.
4174+
if (cidEncoding && cidEncoding.indexOf('Identity-') !== 0) {
4175+
// input is already Unicode for non-Identity CMap encodings.
41334176
// However, Unicode-to-CID conversion is needed
41344177
// regardless of the CMap encoding. So we can't reset
41354178
// unicodeToCID.
@@ -4304,8 +4347,20 @@ var Font = (function FontClosure() {
43044347
charsCache = this.charsCache = Object.create(null);
43054348

43064349
glyphs = [];
4307-
4308-
if (this.wideChars) {
4350+
var charsCacheKey = chars;
4351+
4352+
var converter;
4353+
var cidEncoding = this.cidEncoding;
4354+
if (cidEncoding) {
4355+
converter = CMapConverterList[cidEncoding];
4356+
if (converter) {
4357+
chars = converter(chars);
4358+
} else if (cidEncoding.indexOf('Uni') !== 0 &&
4359+
cidEncoding.indexOf('Identity-') !== 0) {
4360+
warn('Unsupported CMap: ' + cidEncoding);
4361+
}
4362+
}
4363+
if (!converter && this.wideChars) {
43094364
// composite fonts have multi-byte strings convert the string from
43104365
// single-byte to multi-byte
43114366
// XXX assuming CIDFonts are two-byte - later need to extract the
@@ -4332,7 +4387,7 @@ var Font = (function FontClosure() {
43324387
}
43334388

43344389
// Enter the translated string into the cache
4335-
return (charsCache[chars] = glyphs);
4390+
return (charsCache[charsCacheKey] = glyphs);
43364391
}
43374392
};
43384393

test/pdfs/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,7 @@
3939
!mixedfonts.pdf
4040
!shading_extend.pdf
4141
!noembed-identity.pdf
42+
!noembed-jis7.pdf
43+
!noembed-eucjp.pdf
44+
!noembed-sjis.pdf
4245
!issue2099-1.pdf

test/pdfs/noembed-eucjp.pdf

1.44 KB
Binary file not shown.

test/pdfs/noembed-jis7.pdf

14.9 KB
Binary file not shown.

test/pdfs/noembed-sjis.pdf

1.44 KB
Binary file not shown.

test/test_manifest.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,24 @@
817817
"rounds": 1,
818818
"type": "eq"
819819
},
820+
{ "id": "noembed-jis7",
821+
"file": "pdfs/noembed-jis7.pdf",
822+
"md5": "a0f6cf5a830f23d0c35994a6aaf92b3d",
823+
"rounds": 1,
824+
"type": "eq"
825+
},
826+
{ "id": "noembed-eucjp",
827+
"file": "pdfs/noembed-eucjp.pdf",
828+
"md5": "d270f2d46db99b70235b4d37cbc313ad",
829+
"rounds": 1,
830+
"type": "eq"
831+
},
832+
{ "id": "noembed-sjis",
833+
"file": "pdfs/noembed-sjis.pdf",
834+
"md5": "51f9d150bf4afe498019b3029d451072",
835+
"rounds": 1,
836+
"type": "eq"
837+
},
820838
{ "id": "issue2099-1",
821839
"file": "pdfs/issue2099-1.pdf",
822840
"md5": "c7eca682d70a976dfc4b7e64d3e9f1ce",

0 commit comments

Comments
 (0)