1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3 
4 #include <string.h>
5 
6 pdf_cmap *
pdf_load_embedded_cmap(fz_context * ctx,pdf_document * doc,pdf_obj * stmobj)7 pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj)
8 {
9 	fz_stream *file = NULL;
10 	pdf_cmap *cmap = NULL;
11 	pdf_cmap *usecmap = NULL;
12 	pdf_obj *obj;
13 
14 	fz_var(file);
15 	fz_var(cmap);
16 	fz_var(usecmap);
17 
18 	if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL)
19 		return cmap;
20 
21 	fz_try(ctx)
22 	{
23 		file = pdf_open_stream(ctx, stmobj);
24 		cmap = pdf_load_cmap(ctx, file);
25 
26 		obj = pdf_dict_get(ctx, stmobj, PDF_NAME(WMode));
27 		if (pdf_is_int(ctx, obj))
28 			pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj));
29 
30 		obj = pdf_dict_get(ctx, stmobj, PDF_NAME(UseCMap));
31 		if (pdf_is_name(ctx, obj))
32 		{
33 			usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj));
34 			pdf_set_usecmap(ctx, cmap, usecmap);
35 		}
36 		else if (pdf_is_indirect(ctx, obj))
37 		{
38 			if (pdf_mark_obj(ctx, obj))
39 				fz_throw(ctx, FZ_ERROR_GENERIC, "recursive CMap");
40 			fz_try(ctx)
41 				usecmap = pdf_load_embedded_cmap(ctx, doc, obj);
42 			fz_always(ctx)
43 				pdf_unmark_obj(ctx, obj);
44 			fz_catch(ctx)
45 				fz_rethrow(ctx);
46 			pdf_set_usecmap(ctx, cmap, usecmap);
47 		}
48 
49 		pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap));
50 	}
51 	fz_always(ctx)
52 	{
53 		fz_drop_stream(ctx, file);
54 		pdf_drop_cmap(ctx, usecmap);
55 	}
56 	fz_catch(ctx)
57 	{
58 		pdf_drop_cmap(ctx, cmap);
59 		fz_rethrow(ctx);
60 	}
61 
62 	return cmap;
63 }
64 
65 pdf_cmap *
pdf_new_identity_cmap(fz_context * ctx,int wmode,int bytes)66 pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes)
67 {
68 	pdf_cmap *cmap = pdf_new_cmap(ctx);
69 	fz_try(ctx)
70 	{
71 		unsigned int high = (1 << (bytes * 8)) - 1;
72 		if (wmode)
73 			fz_strlcpy(cmap->cmap_name, "Identity-V", sizeof cmap->cmap_name);
74 		else
75 			fz_strlcpy(cmap->cmap_name, "Identity-H", sizeof cmap->cmap_name);
76 		pdf_add_codespace(ctx, cmap, 0, high, bytes);
77 		pdf_map_range_to_range(ctx, cmap, 0, high, 0);
78 		pdf_sort_cmap(ctx, cmap);
79 		pdf_set_cmap_wmode(ctx, cmap, wmode);
80 	}
81 	fz_catch(ctx)
82 	{
83 		pdf_drop_cmap(ctx, cmap);
84 		fz_rethrow(ctx);
85 	}
86 	return cmap;
87 }
88 
89 #ifdef NO_CJK
90 
91 pdf_cmap *
pdf_load_builtin_cmap(fz_context * ctx,const char * name)92 pdf_load_builtin_cmap(fz_context *ctx, const char *name)
93 {
94 	if (!strcmp(name, "Identity-H")) return pdf_new_identity_cmap(ctx, 0, 2);
95 	if (!strcmp(name, "Identity-V")) return pdf_new_identity_cmap(ctx, 1, 2);
96 	return NULL;
97 }
98 
99 #else
100 
101 /* To regenerate this list: :r !bash scripts/runcmapdump.sh */
102 
103 #include "cmaps/83pv-RKSJ-H.h"
104 #include "cmaps/90ms-RKSJ-H.h"
105 #include "cmaps/90ms-RKSJ-V.h"
106 #include "cmaps/90msp-RKSJ-H.h"
107 #include "cmaps/90msp-RKSJ-V.h"
108 #include "cmaps/90pv-RKSJ-H.h"
109 #include "cmaps/Add-RKSJ-H.h"
110 #include "cmaps/Add-RKSJ-V.h"
111 #include "cmaps/Adobe-CNS1-UCS2.h"
112 #include "cmaps/Adobe-GB1-UCS2.h"
113 #include "cmaps/Adobe-Japan1-UCS2.h"
114 #include "cmaps/Adobe-Korea1-UCS2.h"
115 #include "cmaps/B5pc-H.h"
116 #include "cmaps/B5pc-V.h"
117 #include "cmaps/CNS-EUC-H.h"
118 #include "cmaps/CNS-EUC-V.h"
119 #include "cmaps/ETen-B5-H.h"
120 #include "cmaps/ETen-B5-V.h"
121 #include "cmaps/ETenms-B5-H.h"
122 #include "cmaps/ETenms-B5-V.h"
123 #include "cmaps/EUC-H.h"
124 #include "cmaps/EUC-V.h"
125 #include "cmaps/Ext-RKSJ-H.h"
126 #include "cmaps/Ext-RKSJ-V.h"
127 #include "cmaps/GB-EUC-H.h"
128 #include "cmaps/GB-EUC-V.h"
129 #include "cmaps/GBK-EUC-H.h"
130 #include "cmaps/GBK-EUC-V.h"
131 #include "cmaps/GBK-X.h"
132 #include "cmaps/GBK2K-H.h"
133 #include "cmaps/GBK2K-V.h"
134 #include "cmaps/GBKp-EUC-H.h"
135 #include "cmaps/GBKp-EUC-V.h"
136 #include "cmaps/GBpc-EUC-H.h"
137 #include "cmaps/GBpc-EUC-V.h"
138 #include "cmaps/H.h"
139 #include "cmaps/HKscs-B5-H.h"
140 #include "cmaps/HKscs-B5-V.h"
141 #include "cmaps/Identity-H.h"
142 #include "cmaps/Identity-V.h"
143 #include "cmaps/KSC-EUC-H.h"
144 #include "cmaps/KSC-EUC-V.h"
145 #include "cmaps/KSCms-UHC-H.h"
146 #include "cmaps/KSCms-UHC-HW-H.h"
147 #include "cmaps/KSCms-UHC-HW-V.h"
148 #include "cmaps/KSCms-UHC-V.h"
149 #include "cmaps/KSCpc-EUC-H.h"
150 #include "cmaps/UniCNS-UCS2-H.h"
151 #include "cmaps/UniCNS-UCS2-V.h"
152 #include "cmaps/UniCNS-UTF16-H.h"
153 #include "cmaps/UniCNS-UTF16-V.h"
154 #include "cmaps/UniCNS-X.h"
155 #include "cmaps/UniGB-UCS2-H.h"
156 #include "cmaps/UniGB-UCS2-V.h"
157 #include "cmaps/UniGB-UTF16-H.h"
158 #include "cmaps/UniGB-UTF16-V.h"
159 #include "cmaps/UniGB-X.h"
160 #include "cmaps/UniJIS-UCS2-H.h"
161 #include "cmaps/UniJIS-UCS2-HW-H.h"
162 #include "cmaps/UniJIS-UCS2-HW-V.h"
163 #include "cmaps/UniJIS-UCS2-V.h"
164 #include "cmaps/UniJIS-UTF16-H.h"
165 #include "cmaps/UniJIS-UTF16-V.h"
166 #include "cmaps/UniJIS-X.h"
167 #include "cmaps/UniKS-UCS2-H.h"
168 #include "cmaps/UniKS-UCS2-V.h"
169 #include "cmaps/UniKS-UTF16-H.h"
170 #include "cmaps/UniKS-UTF16-V.h"
171 #include "cmaps/UniKS-X.h"
172 #include "cmaps/V.h"
173 
174 static pdf_cmap *table[] = {
175 	&cmap_83pv_RKSJ_H,
176 	&cmap_90ms_RKSJ_H,
177 	&cmap_90ms_RKSJ_V,
178 	&cmap_90msp_RKSJ_H,
179 	&cmap_90msp_RKSJ_V,
180 	&cmap_90pv_RKSJ_H,
181 	&cmap_Add_RKSJ_H,
182 	&cmap_Add_RKSJ_V,
183 	&cmap_Adobe_CNS1_UCS2,
184 	&cmap_Adobe_GB1_UCS2,
185 	&cmap_Adobe_Japan1_UCS2,
186 	&cmap_Adobe_Korea1_UCS2,
187 	&cmap_B5pc_H,
188 	&cmap_B5pc_V,
189 	&cmap_CNS_EUC_H,
190 	&cmap_CNS_EUC_V,
191 	&cmap_ETen_B5_H,
192 	&cmap_ETen_B5_V,
193 	&cmap_ETenms_B5_H,
194 	&cmap_ETenms_B5_V,
195 	&cmap_EUC_H,
196 	&cmap_EUC_V,
197 	&cmap_Ext_RKSJ_H,
198 	&cmap_Ext_RKSJ_V,
199 	&cmap_GB_EUC_H,
200 	&cmap_GB_EUC_V,
201 	&cmap_GBK_EUC_H,
202 	&cmap_GBK_EUC_V,
203 	&cmap_GBK_X,
204 	&cmap_GBK2K_H,
205 	&cmap_GBK2K_V,
206 	&cmap_GBKp_EUC_H,
207 	&cmap_GBKp_EUC_V,
208 	&cmap_GBpc_EUC_H,
209 	&cmap_GBpc_EUC_V,
210 	&cmap_H,
211 	&cmap_HKscs_B5_H,
212 	&cmap_HKscs_B5_V,
213 	&cmap_Identity_H,
214 	&cmap_Identity_V,
215 	&cmap_KSC_EUC_H,
216 	&cmap_KSC_EUC_V,
217 	&cmap_KSCms_UHC_H,
218 	&cmap_KSCms_UHC_HW_H,
219 	&cmap_KSCms_UHC_HW_V,
220 	&cmap_KSCms_UHC_V,
221 	&cmap_KSCpc_EUC_H,
222 	&cmap_UniCNS_UCS2_H,
223 	&cmap_UniCNS_UCS2_V,
224 	&cmap_UniCNS_UTF16_H,
225 	&cmap_UniCNS_UTF16_V,
226 	&cmap_UniCNS_X,
227 	&cmap_UniGB_UCS2_H,
228 	&cmap_UniGB_UCS2_V,
229 	&cmap_UniGB_UTF16_H,
230 	&cmap_UniGB_UTF16_V,
231 	&cmap_UniGB_X,
232 	&cmap_UniJIS_UCS2_H,
233 	&cmap_UniJIS_UCS2_HW_H,
234 	&cmap_UniJIS_UCS2_HW_V,
235 	&cmap_UniJIS_UCS2_V,
236 	&cmap_UniJIS_UTF16_H,
237 	&cmap_UniJIS_UTF16_V,
238 	&cmap_UniJIS_X,
239 	&cmap_UniKS_UCS2_H,
240 	&cmap_UniKS_UCS2_V,
241 	&cmap_UniKS_UTF16_H,
242 	&cmap_UniKS_UTF16_V,
243 	&cmap_UniKS_X,
244 	&cmap_V,
245 };
246 
247 pdf_cmap *
pdf_load_builtin_cmap(fz_context * ctx,const char * name)248 pdf_load_builtin_cmap(fz_context *ctx, const char *name)
249 {
250 	int r = nelem(table)-1;
251 	int l = 0;
252 	while (l <= r)
253 	{
254 		int m = (l + r) >> 1;
255 		int c = strcmp(name, table[m]->cmap_name);
256 		if (c < 0)
257 			r = m - 1;
258 		else if (c > 0)
259 			l = m + 1;
260 		else
261 			return table[m];
262 	}
263 	return NULL;
264 }
265 
266 #endif
267 
268 pdf_cmap *
pdf_load_system_cmap(fz_context * ctx,const char * cmap_name)269 pdf_load_system_cmap(fz_context *ctx, const char *cmap_name)
270 {
271 	pdf_cmap *usecmap;
272 	pdf_cmap *cmap;
273 
274 	cmap = pdf_load_builtin_cmap(ctx, cmap_name);
275 	if (!cmap)
276 		fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap_name);
277 
278 	if (cmap->usecmap_name[0] && !cmap->usecmap)
279 	{
280 		usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name);
281 		if (!usecmap)
282 			fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap->usecmap_name);
283 		pdf_set_usecmap(ctx, cmap, usecmap);
284 	}
285 
286 	return cmap;
287 }
288