/*
* Copyright 2011 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkPDFMakeToUnicodeCmap.h"
#include "SkPDFUtils.h"
#include "SkTo.h"
#include "SkUTF.h"
static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
bool multibyte) {
// 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
// It's there to prevent old version Adobe Readers from malfunctioning.
const char* kHeader =
"/CIDInit /ProcSet findresource begin\n"
"12 dict begin\n"
"begincmap\n";
cmap->writeText(kHeader);
// The /CIDSystemInfo must be consistent to the one in
// SkPDFFont::populateCIDFont().
// We can not pass over the system info object here because the format is
// different. This is not a reference object.
const char* kSysInfo =
"/CIDSystemInfo\n"
"<< /Registry (Adobe)\n"
"/Ordering (UCS)\n"
"/Supplement 0\n"
">> def\n";
cmap->writeText(kSysInfo);
// The CMapName must be consistent to /CIDSystemInfo above.
// /CMapType 2 means ToUnicode.
// Codespace range just tells the PDF processor the valid range.
const char* kTypeInfoHeader =
"/CMapName /Adobe-Identity-UCS def\n"
"/CMapType 2 def\n"
"1 begincodespacerange\n";
cmap->writeText(kTypeInfoHeader);
if (multibyte) {
cmap->writeText("<0000> <FFFF>\n");
} else {
cmap->writeText("<00> <FF>\n");
}
cmap->writeText("endcodespacerange\n");
}
static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
const char kFooter[] =
"endcmap\n"
"CMapName currentdict /CMap defineresource pop\n"
"end\n"
"end";
cmap->writeText(kFooter);
}
namespace {
struct BFChar {
SkGlyphID fGlyphId;
SkUnichar fUnicode;
};
struct BFRange {
SkGlyphID fStart;
SkGlyphID fEnd;
SkUnichar fUnicode;
};
} // namespace
static void write_glyph(SkDynamicMemoryWStream* cmap,
bool multiByte,
SkGlyphID gid) {
if (multiByte) {
SkPDFUtils::WriteUInt16BE(cmap, gid);
} else {
SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
}
}
static void append_bfchar_section(const std::vector<BFChar>& bfchar,
bool multiByte,
SkDynamicMemoryWStream* cmap) {
// PDF spec defines that every bf* list can have at most 100 entries.
for (size_t i = 0; i < bfchar.size(); i += 100) {
int count = SkToInt(bfchar.size() - i);
count = SkMin32(count, 100);
cmap->writeDecAsText(count);
cmap->writeText(" beginbfchar\n");
for (int j = 0; j < count; ++j) {
cmap->writeText("<");
write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
cmap->writeText("> <");
SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
cmap->writeText(">\n");
}
cmap->writeText("endbfchar\n");
}
}
static void append_bfrange_section(const std::vector<BFRange>& bfrange,
bool multiByte,
SkDynamicMemoryWStream* cmap) {
// PDF spec defines that every bf* list can have at most 100 entries.
for (size_t i = 0; i < bfrange.size(); i += 100) {
int count = SkToInt(bfrange.size() - i);
count = SkMin32(count, 100);
cmap->writeDecAsText(count);
cmap->writeText(" beginbfrange\n");
for (int j = 0; j < count; ++j) {
cmap->writeText("<");
write_glyph(cmap, multiByte, bfrange[i + j].fStart);
cmap->writeText("> <");
write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
cmap->writeText("> <");
SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
cmap->writeText(">\n");
}
cmap->writeText("endbfrange\n");
}
}
// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
// Technote 5014.
// The function is not static so we can test it in unit tests.
//
// Current implementation guarantees bfchar and bfrange entries do not overlap.
//
// Current implementation does not attempt aggressive optimizations against
// following case because the specification is not clear.
//
// 4 beginbfchar 1 beginbfchar
// <0003> <0013> <0020> <0014>
// <0005> <0015> to endbfchar
// <0007> <0017> 1 beginbfrange
// <0020> <0014> <0003> <0007> <0013>
// endbfchar endbfrange
//
// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
// overlap, but succeeding maps supersede preceding maps."
//
// In case of searching text in PDF, bfrange will have higher precedence so
// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
// the spec does not mention how will this kind of conflict being resolved.
//
// For the worst case (having 65536 continuous unicode and we use every other
// one of them), the possible savings by aggressive optimization is 416KB
// pre-compressed and does not provide enough motivation for implementation.
void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
const SkPDFGlyphUse* subset,
SkDynamicMemoryWStream* cmap,
bool multiByteGlyphs,
SkGlyphID firstGlyphID,
SkGlyphID lastGlyphID) {
int glyphOffset = 0;
if (!multiByteGlyphs) {
glyphOffset = firstGlyphID - 1;
}
std::vector<BFChar> bfcharEntries;
std::vector<BFRange> bfrangeEntries;
BFRange currentRangeEntry = {0, 0, 0};
bool rangeEmpty = true;
const int limit = (int)lastGlyphID + 1 - glyphOffset;
for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
SkGlyphID gid = i + glyphOffset;
bool inSubset = i < limit && (subset == nullptr || subset->has(gid));
if (!rangeEmpty) {
// PDF spec requires bfrange not changing the higher byte,
// e.g. <1035> <10FF> <2222> is ok, but
// <1035> <1100> <2222> is no good
bool inRange =
i == currentRangeEntry.fEnd + 1 &&
i >> 8 == currentRangeEntry.fStart >> 8 &&
i < limit &&
glyphToUnicode[gid] ==
currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
if (!inSubset || !inRange) {
if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
bfrangeEntries.push_back(currentRangeEntry);
} else {
bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
}
rangeEmpty = true;
}
}
if (inSubset) {
currentRangeEntry.fEnd = i;
if (rangeEmpty) {
currentRangeEntry.fStart = i;
currentRangeEntry.fUnicode = glyphToUnicode[gid];
rangeEmpty = false;
}
}
}
// The spec requires all bfchar entries for a font must come before bfrange
// entries.
append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
}
std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
const SkUnichar* glyphToUnicode,
const SkPDFGlyphUse* subset,
bool multiByteGlyphs,
SkGlyphID firstGlyphID,
SkGlyphID lastGlyphID) {
SkDynamicMemoryWStream cmap;
append_tounicode_header(&cmap, multiByteGlyphs);
SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
firstGlyphID, lastGlyphID);
append_cmap_footer(&cmap);
return cmap.detachAsStream();
}