1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3
4 #include <string.h>
5
6 pdf_cmap *
pdf_load_embedded_cmap(fz_context * ctx,pdf_document * doc,pdf_obj * stmobj)7 pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj)
8 {
9 fz_stream *file = NULL;
10 pdf_cmap *cmap = NULL;
11 pdf_cmap *usecmap = NULL;
12 pdf_obj *obj;
13
14 fz_var(file);
15 fz_var(cmap);
16 fz_var(usecmap);
17
18 if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL)
19 return cmap;
20
21 fz_try(ctx)
22 {
23 file = pdf_open_stream(ctx, stmobj);
24 cmap = pdf_load_cmap(ctx, file);
25
26 obj = pdf_dict_get(ctx, stmobj, PDF_NAME(WMode));
27 if (pdf_is_int(ctx, obj))
28 pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj));
29
30 obj = pdf_dict_get(ctx, stmobj, PDF_NAME(UseCMap));
31 if (pdf_is_name(ctx, obj))
32 {
33 usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj));
34 pdf_set_usecmap(ctx, cmap, usecmap);
35 }
36 else if (pdf_is_indirect(ctx, obj))
37 {
38 if (pdf_mark_obj(ctx, obj))
39 fz_throw(ctx, FZ_ERROR_GENERIC, "recursive CMap");
40 fz_try(ctx)
41 usecmap = pdf_load_embedded_cmap(ctx, doc, obj);
42 fz_always(ctx)
43 pdf_unmark_obj(ctx, obj);
44 fz_catch(ctx)
45 fz_rethrow(ctx);
46 pdf_set_usecmap(ctx, cmap, usecmap);
47 }
48
49 pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap));
50 }
51 fz_always(ctx)
52 {
53 fz_drop_stream(ctx, file);
54 pdf_drop_cmap(ctx, usecmap);
55 }
56 fz_catch(ctx)
57 {
58 pdf_drop_cmap(ctx, cmap);
59 fz_rethrow(ctx);
60 }
61
62 return cmap;
63 }
64
65 pdf_cmap *
pdf_new_identity_cmap(fz_context * ctx,int wmode,int bytes)66 pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes)
67 {
68 pdf_cmap *cmap = pdf_new_cmap(ctx);
69 fz_try(ctx)
70 {
71 unsigned int high = (1 << (bytes * 8)) - 1;
72 if (wmode)
73 fz_strlcpy(cmap->cmap_name, "Identity-V", sizeof cmap->cmap_name);
74 else
75 fz_strlcpy(cmap->cmap_name, "Identity-H", sizeof cmap->cmap_name);
76 pdf_add_codespace(ctx, cmap, 0, high, bytes);
77 pdf_map_range_to_range(ctx, cmap, 0, high, 0);
78 pdf_sort_cmap(ctx, cmap);
79 pdf_set_cmap_wmode(ctx, cmap, wmode);
80 }
81 fz_catch(ctx)
82 {
83 pdf_drop_cmap(ctx, cmap);
84 fz_rethrow(ctx);
85 }
86 return cmap;
87 }
88
89 #ifdef NO_CJK
90
91 pdf_cmap *
pdf_load_builtin_cmap(fz_context * ctx,const char * name)92 pdf_load_builtin_cmap(fz_context *ctx, const char *name)
93 {
94 if (!strcmp(name, "Identity-H")) return pdf_new_identity_cmap(ctx, 0, 2);
95 if (!strcmp(name, "Identity-V")) return pdf_new_identity_cmap(ctx, 1, 2);
96 return NULL;
97 }
98
99 #else
100
101 /* To regenerate this list: :r !bash scripts/runcmapdump.sh */
102
103 #include "cmaps/83pv-RKSJ-H.h"
104 #include "cmaps/90ms-RKSJ-H.h"
105 #include "cmaps/90ms-RKSJ-V.h"
106 #include "cmaps/90msp-RKSJ-H.h"
107 #include "cmaps/90msp-RKSJ-V.h"
108 #include "cmaps/90pv-RKSJ-H.h"
109 #include "cmaps/Add-RKSJ-H.h"
110 #include "cmaps/Add-RKSJ-V.h"
111 #include "cmaps/Adobe-CNS1-UCS2.h"
112 #include "cmaps/Adobe-GB1-UCS2.h"
113 #include "cmaps/Adobe-Japan1-UCS2.h"
114 #include "cmaps/Adobe-Korea1-UCS2.h"
115 #include "cmaps/B5pc-H.h"
116 #include "cmaps/B5pc-V.h"
117 #include "cmaps/CNS-EUC-H.h"
118 #include "cmaps/CNS-EUC-V.h"
119 #include "cmaps/ETen-B5-H.h"
120 #include "cmaps/ETen-B5-V.h"
121 #include "cmaps/ETenms-B5-H.h"
122 #include "cmaps/ETenms-B5-V.h"
123 #include "cmaps/EUC-H.h"
124 #include "cmaps/EUC-V.h"
125 #include "cmaps/Ext-RKSJ-H.h"
126 #include "cmaps/Ext-RKSJ-V.h"
127 #include "cmaps/GB-EUC-H.h"
128 #include "cmaps/GB-EUC-V.h"
129 #include "cmaps/GBK-EUC-H.h"
130 #include "cmaps/GBK-EUC-V.h"
131 #include "cmaps/GBK-X.h"
132 #include "cmaps/GBK2K-H.h"
133 #include "cmaps/GBK2K-V.h"
134 #include "cmaps/GBKp-EUC-H.h"
135 #include "cmaps/GBKp-EUC-V.h"
136 #include "cmaps/GBpc-EUC-H.h"
137 #include "cmaps/GBpc-EUC-V.h"
138 #include "cmaps/H.h"
139 #include "cmaps/HKscs-B5-H.h"
140 #include "cmaps/HKscs-B5-V.h"
141 #include "cmaps/Identity-H.h"
142 #include "cmaps/Identity-V.h"
143 #include "cmaps/KSC-EUC-H.h"
144 #include "cmaps/KSC-EUC-V.h"
145 #include "cmaps/KSCms-UHC-H.h"
146 #include "cmaps/KSCms-UHC-HW-H.h"
147 #include "cmaps/KSCms-UHC-HW-V.h"
148 #include "cmaps/KSCms-UHC-V.h"
149 #include "cmaps/KSCpc-EUC-H.h"
150 #include "cmaps/UniCNS-UCS2-H.h"
151 #include "cmaps/UniCNS-UCS2-V.h"
152 #include "cmaps/UniCNS-UTF16-H.h"
153 #include "cmaps/UniCNS-UTF16-V.h"
154 #include "cmaps/UniCNS-X.h"
155 #include "cmaps/UniGB-UCS2-H.h"
156 #include "cmaps/UniGB-UCS2-V.h"
157 #include "cmaps/UniGB-UTF16-H.h"
158 #include "cmaps/UniGB-UTF16-V.h"
159 #include "cmaps/UniGB-X.h"
160 #include "cmaps/UniJIS-UCS2-H.h"
161 #include "cmaps/UniJIS-UCS2-HW-H.h"
162 #include "cmaps/UniJIS-UCS2-HW-V.h"
163 #include "cmaps/UniJIS-UCS2-V.h"
164 #include "cmaps/UniJIS-UTF16-H.h"
165 #include "cmaps/UniJIS-UTF16-V.h"
166 #include "cmaps/UniJIS-X.h"
167 #include "cmaps/UniKS-UCS2-H.h"
168 #include "cmaps/UniKS-UCS2-V.h"
169 #include "cmaps/UniKS-UTF16-H.h"
170 #include "cmaps/UniKS-UTF16-V.h"
171 #include "cmaps/UniKS-X.h"
172 #include "cmaps/V.h"
173
174 static pdf_cmap *table[] = {
175 &cmap_83pv_RKSJ_H,
176 &cmap_90ms_RKSJ_H,
177 &cmap_90ms_RKSJ_V,
178 &cmap_90msp_RKSJ_H,
179 &cmap_90msp_RKSJ_V,
180 &cmap_90pv_RKSJ_H,
181 &cmap_Add_RKSJ_H,
182 &cmap_Add_RKSJ_V,
183 &cmap_Adobe_CNS1_UCS2,
184 &cmap_Adobe_GB1_UCS2,
185 &cmap_Adobe_Japan1_UCS2,
186 &cmap_Adobe_Korea1_UCS2,
187 &cmap_B5pc_H,
188 &cmap_B5pc_V,
189 &cmap_CNS_EUC_H,
190 &cmap_CNS_EUC_V,
191 &cmap_ETen_B5_H,
192 &cmap_ETen_B5_V,
193 &cmap_ETenms_B5_H,
194 &cmap_ETenms_B5_V,
195 &cmap_EUC_H,
196 &cmap_EUC_V,
197 &cmap_Ext_RKSJ_H,
198 &cmap_Ext_RKSJ_V,
199 &cmap_GB_EUC_H,
200 &cmap_GB_EUC_V,
201 &cmap_GBK_EUC_H,
202 &cmap_GBK_EUC_V,
203 &cmap_GBK_X,
204 &cmap_GBK2K_H,
205 &cmap_GBK2K_V,
206 &cmap_GBKp_EUC_H,
207 &cmap_GBKp_EUC_V,
208 &cmap_GBpc_EUC_H,
209 &cmap_GBpc_EUC_V,
210 &cmap_H,
211 &cmap_HKscs_B5_H,
212 &cmap_HKscs_B5_V,
213 &cmap_Identity_H,
214 &cmap_Identity_V,
215 &cmap_KSC_EUC_H,
216 &cmap_KSC_EUC_V,
217 &cmap_KSCms_UHC_H,
218 &cmap_KSCms_UHC_HW_H,
219 &cmap_KSCms_UHC_HW_V,
220 &cmap_KSCms_UHC_V,
221 &cmap_KSCpc_EUC_H,
222 &cmap_UniCNS_UCS2_H,
223 &cmap_UniCNS_UCS2_V,
224 &cmap_UniCNS_UTF16_H,
225 &cmap_UniCNS_UTF16_V,
226 &cmap_UniCNS_X,
227 &cmap_UniGB_UCS2_H,
228 &cmap_UniGB_UCS2_V,
229 &cmap_UniGB_UTF16_H,
230 &cmap_UniGB_UTF16_V,
231 &cmap_UniGB_X,
232 &cmap_UniJIS_UCS2_H,
233 &cmap_UniJIS_UCS2_HW_H,
234 &cmap_UniJIS_UCS2_HW_V,
235 &cmap_UniJIS_UCS2_V,
236 &cmap_UniJIS_UTF16_H,
237 &cmap_UniJIS_UTF16_V,
238 &cmap_UniJIS_X,
239 &cmap_UniKS_UCS2_H,
240 &cmap_UniKS_UCS2_V,
241 &cmap_UniKS_UTF16_H,
242 &cmap_UniKS_UTF16_V,
243 &cmap_UniKS_X,
244 &cmap_V,
245 };
246
247 pdf_cmap *
pdf_load_builtin_cmap(fz_context * ctx,const char * name)248 pdf_load_builtin_cmap(fz_context *ctx, const char *name)
249 {
250 int r = nelem(table)-1;
251 int l = 0;
252 while (l <= r)
253 {
254 int m = (l + r) >> 1;
255 int c = strcmp(name, table[m]->cmap_name);
256 if (c < 0)
257 r = m - 1;
258 else if (c > 0)
259 l = m + 1;
260 else
261 return table[m];
262 }
263 return NULL;
264 }
265
266 #endif
267
268 pdf_cmap *
pdf_load_system_cmap(fz_context * ctx,const char * cmap_name)269 pdf_load_system_cmap(fz_context *ctx, const char *cmap_name)
270 {
271 pdf_cmap *usecmap;
272 pdf_cmap *cmap;
273
274 cmap = pdf_load_builtin_cmap(ctx, cmap_name);
275 if (!cmap)
276 fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap_name);
277
278 if (cmap->usecmap_name[0] && !cmap->usecmap)
279 {
280 usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name);
281 if (!usecmap)
282 fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap->usecmap_name);
283 pdf_set_usecmap(ctx, cmap, usecmap);
284 }
285
286 return cmap;
287 }
288