1 #ifndef MUPDF_FITZ_TEXT_H
2 #define MUPDF_FITZ_TEXT_H
3 
4 #include "mupdf/fitz/system.h"
5 #include "mupdf/fitz/context.h"
6 #include "mupdf/fitz/font.h"
7 #include "mupdf/fitz/path.h"
8 #include "mupdf/fitz/bidi.h"
9 
10 /**
11 	Text buffer.
12 
13 	The trm field contains the a, b, c and d coefficients.
14 	The e and f coefficients come from the individual elements,
15 	together they form the transform matrix for the glyph.
16 
17 	Glyphs are referenced by glyph ID.
18 	The Unicode text equivalent is kept in a separate array
19 	with indexes into the glyph array.
20 */
21 
22 typedef struct
23 {
24 	float x, y;
25 	int gid; /* -1 for one gid to many ucs mappings */
26 	int ucs; /* -1 for one ucs to many gid mappings */
27 } fz_text_item;
28 
29 #define FZ_LANG_TAG2(c1,c2) ((c1-'a'+1) + ((c2-'a'+1)*27))
30 #define FZ_LANG_TAG3(c1,c2,c3) ((c1-'a'+1) + ((c2-'a'+1)*27) + ((c3-'a'+1)*27*27))
31 
32 typedef enum
33 {
34 	FZ_LANG_UNSET = 0,
35 	FZ_LANG_ur = FZ_LANG_TAG2('u','r'),
36 	FZ_LANG_urd = FZ_LANG_TAG3('u','r','d'),
37 	FZ_LANG_ko = FZ_LANG_TAG2('k','o'),
38 	FZ_LANG_ja = FZ_LANG_TAG2('j','a'),
39 	FZ_LANG_zh = FZ_LANG_TAG2('z','h'),
40 	FZ_LANG_zh_Hans = FZ_LANG_TAG3('z','h','s'),
41 	FZ_LANG_zh_Hant = FZ_LANG_TAG3('z','h','t'),
42 } fz_text_language;
43 
44 typedef struct fz_text_span
45 {
46 	fz_font *font;
47 	fz_matrix trm;
48 	unsigned wmode : 1;		/* 0 horizontal, 1 vertical */
49 	unsigned bidi_level : 7;	/* The bidirectional level of text */
50 	unsigned markup_dir : 2;	/* The direction of text as marked in the original document */
51 	unsigned language : 15;		/* The language as marked in the original document */
52 	int len, cap;
53 	fz_text_item *items;
54 	struct fz_text_span *next;
55 } fz_text_span;
56 
57 typedef struct
58 {
59 	int refs;
60 	fz_text_span *head, *tail;
61 } fz_text;
62 
63 /**
64 	Create a new empty fz_text object.
65 
66 	Throws exception on failure to allocate.
67 */
68 fz_text *fz_new_text(fz_context *ctx);
69 
70 /**
71 	Increment the reference count for the text object. The same
72 	pointer is returned.
73 
74 	Never throws exceptions.
75 */
76 fz_text *fz_keep_text(fz_context *ctx, const fz_text *text);
77 
78 /**
79 	Decrement the reference count for the text object. When the
80 	reference count hits zero, the text object is freed.
81 
82 	Never throws exceptions.
83 */
84 void fz_drop_text(fz_context *ctx, const fz_text *text);
85 
86 /**
87 	Add a glyph/unicode value to a text object.
88 
89 	text: Text object to add to.
90 
91 	font: The font the glyph should be added in.
92 
93 	trm: The transform to use for the glyph.
94 
95 	glyph: The glyph id to add.
96 
97 	unicode: The unicode character for the glyph.
98 
99 	wmode: 1 for vertical mode, 0 for horizontal.
100 
101 	bidi_level: The bidirectional level for this glyph.
102 
103 	markup_dir: The direction of the text as specified in the
104 	markup.
105 
106 	language: The language in use (if known, 0 otherwise)
107 	(e.g. FZ_LANG_zh_Hans).
108 
109 	Throws exception on failure to allocate.
110 */
111 void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int glyph, int unicode, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
112 
113 /**
114 	Add a UTF8 string to a text object.
115 
116 	text: Text object to add to.
117 
118 	font: The font the string should be added in.
119 
120 	trm: The transform to use.
121 
122 	s: The utf-8 string to add.
123 
124 	wmode: 1 for vertical mode, 0 for horizontal.
125 
126 	bidi_level: The bidirectional level for this glyph.
127 
128 	markup_dir: The direction of the text as specified in the markup.
129 
130 	language: The language in use (if known, 0 otherwise)
131 		(e.g. FZ_LANG_zh_Hans).
132 
133 	Returns the transform updated with the advance width of the
134 	string.
135 */
136 fz_matrix fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
137 
138 /**
139 	Measure the advance width of a UTF8 string should it be added to a text object.
140 
141 	This uses the same layout algorithms as fz_show_string, and can be used
142 	to calculate text alignment adjustments.
143 */
144 fz_matrix
145 fz_measure_string(fz_context *ctx, fz_font *user_font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
146 
147 /**
148 	Find the bounds of a given text object.
149 
150 	text: The text object to find the bounds of.
151 
152 	stroke: Pointer to the stroke attributes (for stroked
153 	text), or NULL (for filled text).
154 
155 	ctm: The matrix in use.
156 
157 	r: pointer to storage for the bounds.
158 
159 	Returns a pointer to r, which is updated to contain the
160 	bounding box for the text object.
161 */
162 fz_rect fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm);
163 
164 /**
165 	Convert ISO 639 (639-{1,2,3,5}) language specification
166 	strings losslessly to a 15 bit fz_text_language code.
167 
168 	No validation is carried out. Obviously invalid (out
169 	of spec) codes will be mapped to FZ_LANG_UNSET, but
170 	well-formed (but undefined) codes will be blithely
171 	accepted.
172 */
173 fz_text_language fz_text_language_from_string(const char *str);
174 
175 /**
176 	Recover ISO 639 (639-{1,2,3,5}) language specification
177 	strings losslessly from a 15 bit fz_text_language code.
178 
179 	No validation is carried out. See note above.
180 */
181 char *fz_string_from_text_language(char str[8], fz_text_language lang);
182 
183 #endif
184