1 /*
2  * Copyright 2011 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/pdf/SkPDFMakeToUnicodeCmap.h"
9 
10 #include "include/private/SkTo.h"
11 #include "src/pdf/SkPDFUtils.h"
12 #include "src/utils/SkUTF.h"
13 
append_tounicode_header(SkDynamicMemoryWStream * cmap,bool multibyte)14 static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
15                                     bool multibyte) {
16     // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
17     // It's there to prevent old version Adobe Readers from malfunctioning.
18     const char* kHeader =
19         "/CIDInit /ProcSet findresource begin\n"
20         "12 dict begin\n"
21         "begincmap\n";
22     cmap->writeText(kHeader);
23 
24     // The /CIDSystemInfo must be consistent to the one in
25     // SkPDFFont::populateCIDFont().
26     // We can not pass over the system info object here because the format is
27     // different. This is not a reference object.
28     const char* kSysInfo =
29         "/CIDSystemInfo\n"
30         "<<  /Registry (Adobe)\n"
31         "/Ordering (UCS)\n"
32         "/Supplement 0\n"
33         ">> def\n";
34     cmap->writeText(kSysInfo);
35 
36     // The CMapName must be consistent to /CIDSystemInfo above.
37     // /CMapType 2 means ToUnicode.
38     // Codespace range just tells the PDF processor the valid range.
39     const char* kTypeInfoHeader =
40         "/CMapName /Adobe-Identity-UCS def\n"
41         "/CMapType 2 def\n"
42         "1 begincodespacerange\n";
43     cmap->writeText(kTypeInfoHeader);
44     if (multibyte) {
45         cmap->writeText("<0000> <FFFF>\n");
46     } else {
47         cmap->writeText("<00> <FF>\n");
48     }
49     cmap->writeText("endcodespacerange\n");
50 }
51 
append_cmap_footer(SkDynamicMemoryWStream * cmap)52 static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
53     const char kFooter[] =
54         "endcmap\n"
55         "CMapName currentdict /CMap defineresource pop\n"
56         "end\n"
57         "end";
58     cmap->writeText(kFooter);
59 }
60 
61 namespace {
62 struct BFChar {
63     SkGlyphID fGlyphId;
64     SkUnichar fUnicode;
65 };
66 
67 struct BFRange {
68     SkGlyphID fStart;
69     SkGlyphID fEnd;
70     SkUnichar fUnicode;
71 };
72 }  // namespace
73 
write_glyph(SkDynamicMemoryWStream * cmap,bool multiByte,SkGlyphID gid)74 static void write_glyph(SkDynamicMemoryWStream* cmap,
75                         bool multiByte,
76                         SkGlyphID gid) {
77     if (multiByte) {
78         SkPDFUtils::WriteUInt16BE(cmap, gid);
79     } else {
80         SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
81     }
82 }
83 
append_bfchar_section(const std::vector<BFChar> & bfchar,bool multiByte,SkDynamicMemoryWStream * cmap)84 static void append_bfchar_section(const std::vector<BFChar>& bfchar,
85                                   bool multiByte,
86                                   SkDynamicMemoryWStream* cmap) {
87     // PDF spec defines that every bf* list can have at most 100 entries.
88     for (size_t i = 0; i < bfchar.size(); i += 100) {
89         int count = SkToInt(bfchar.size() - i);
90         count = SkMin32(count, 100);
91         cmap->writeDecAsText(count);
92         cmap->writeText(" beginbfchar\n");
93         for (int j = 0; j < count; ++j) {
94             cmap->writeText("<");
95             write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
96             cmap->writeText("> <");
97             SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
98             cmap->writeText(">\n");
99         }
100         cmap->writeText("endbfchar\n");
101     }
102 }
103 
append_bfrange_section(const std::vector<BFRange> & bfrange,bool multiByte,SkDynamicMemoryWStream * cmap)104 static void append_bfrange_section(const std::vector<BFRange>& bfrange,
105                                    bool multiByte,
106                                    SkDynamicMemoryWStream* cmap) {
107     // PDF spec defines that every bf* list can have at most 100 entries.
108     for (size_t i = 0; i < bfrange.size(); i += 100) {
109         int count = SkToInt(bfrange.size() - i);
110         count = SkMin32(count, 100);
111         cmap->writeDecAsText(count);
112         cmap->writeText(" beginbfrange\n");
113         for (int j = 0; j < count; ++j) {
114             cmap->writeText("<");
115             write_glyph(cmap, multiByte, bfrange[i + j].fStart);
116             cmap->writeText("> <");
117             write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
118             cmap->writeText("> <");
119             SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
120             cmap->writeText(">\n");
121         }
122         cmap->writeText("endbfrange\n");
123     }
124 }
125 
126 // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
127 // Technote 5014.
128 // The function is not static so we can test it in unit tests.
129 //
130 // Current implementation guarantees bfchar and bfrange entries do not overlap.
131 //
132 // Current implementation does not attempt aggressive optimizations against
133 // following case because the specification is not clear.
134 //
135 // 4 beginbfchar          1 beginbfchar
136 // <0003> <0013>          <0020> <0014>
137 // <0005> <0015>    to    endbfchar
138 // <0007> <0017>          1 beginbfrange
139 // <0020> <0014>          <0003> <0007> <0013>
140 // endbfchar              endbfrange
141 //
142 // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
143 // overlap, but succeeding maps supersede preceding maps."
144 //
145 // In case of searching text in PDF, bfrange will have higher precedence so
146 // typing char id 0x0014 in search box will get glyph id 0x0004 first.  However,
147 // the spec does not mention how will this kind of conflict being resolved.
148 //
149 // For the worst case (having 65536 continuous unicode and we use every other
150 // one of them), the possible savings by aggressive optimization is 416KB
151 // pre-compressed and does not provide enough motivation for implementation.
SkPDFAppendCmapSections(const SkUnichar * glyphToUnicode,const SkPDFGlyphUse * subset,SkDynamicMemoryWStream * cmap,bool multiByteGlyphs,SkGlyphID firstGlyphID,SkGlyphID lastGlyphID)152 void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
153                              const SkPDFGlyphUse* subset,
154                              SkDynamicMemoryWStream* cmap,
155                              bool multiByteGlyphs,
156                              SkGlyphID firstGlyphID,
157                              SkGlyphID lastGlyphID) {
158     int glyphOffset = 0;
159     if (!multiByteGlyphs) {
160         glyphOffset = firstGlyphID - 1;
161     }
162 
163     std::vector<BFChar> bfcharEntries;
164     std::vector<BFRange> bfrangeEntries;
165 
166     BFRange currentRangeEntry = {0, 0, 0};
167     bool rangeEmpty = true;
168     const int limit = (int)lastGlyphID + 1 - glyphOffset;
169 
170     for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
171         SkGlyphID gid = i + glyphOffset;
172         bool inSubset = i < limit && (subset == nullptr || subset->has(gid));
173         if (!rangeEmpty) {
174             // PDF spec requires bfrange not changing the higher byte,
175             // e.g. <1035> <10FF> <2222> is ok, but
176             //      <1035> <1100> <2222> is no good
177             bool inRange =
178                 i == currentRangeEntry.fEnd + 1 &&
179                 i >> 8 == currentRangeEntry.fStart >> 8 &&
180                 i < limit &&
181                 glyphToUnicode[gid] ==
182                     currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
183             if (!inSubset || !inRange) {
184                 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
185                     bfrangeEntries.push_back(currentRangeEntry);
186                 } else {
187                     bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
188                 }
189                 rangeEmpty = true;
190             }
191         }
192         if (inSubset) {
193             currentRangeEntry.fEnd = i;
194             if (rangeEmpty) {
195               currentRangeEntry.fStart = i;
196               currentRangeEntry.fUnicode = glyphToUnicode[gid];
197               rangeEmpty = false;
198             }
199         }
200     }
201 
202     // The spec requires all bfchar entries for a font must come before bfrange
203     // entries.
204     append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
205     append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
206 }
207 
SkPDFMakeToUnicodeCmap(const SkUnichar * glyphToUnicode,const SkPDFGlyphUse * subset,bool multiByteGlyphs,SkGlyphID firstGlyphID,SkGlyphID lastGlyphID)208 std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
209         const SkUnichar* glyphToUnicode,
210         const SkPDFGlyphUse* subset,
211         bool multiByteGlyphs,
212         SkGlyphID firstGlyphID,
213         SkGlyphID lastGlyphID) {
214     SkDynamicMemoryWStream cmap;
215     append_tounicode_header(&cmap, multiByteGlyphs);
216     SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
217                             firstGlyphID, lastGlyphID);
218     append_cmap_footer(&cmap);
219     return cmap.detachAsStream();
220 }
221