1 #ifndef SOURCE_HTML_IMP_H
2 #define SOURCE_HTML_IMP_H
3 
4 typedef struct fz_html_font_face_s fz_html_font_face;
5 typedef struct fz_html_font_set_s fz_html_font_set;
6 typedef struct fz_html_s fz_html;
7 typedef struct fz_html_box_s fz_html_box;
8 typedef struct fz_html_flow_s fz_html_flow;
9 typedef struct fz_css_style_splay_s fz_css_style_splay;
10 
11 typedef struct fz_css_s fz_css;
12 typedef struct fz_css_rule_s fz_css_rule;
13 typedef struct fz_css_match_s fz_css_match;
14 typedef struct fz_css_style_s fz_css_style;
15 
16 typedef struct fz_css_selector_s fz_css_selector;
17 typedef struct fz_css_condition_s fz_css_condition;
18 typedef struct fz_css_property_s fz_css_property;
19 typedef struct fz_css_value_s fz_css_value;
20 typedef struct fz_css_number_s fz_css_number;
21 typedef struct fz_css_color_s fz_css_color;
22 
23 struct fz_html_font_face_s
24 {
25 	char *family;
26 	int is_bold;
27 	int is_italic;
28 	int is_small_caps;
29 	fz_font *font;
30 	char *src;
31 	fz_html_font_face *next;
32 };
33 
34 struct fz_html_font_set_s
35 {
36 	fz_font *fonts[12]; /* Times, Helvetica, Courier in R,I,B,BI */
37 	fz_html_font_face *custom;
38 };
39 
40 enum
41 {
42 	CSS_KEYWORD = 256,
43 	CSS_HASH,
44 	CSS_STRING,
45 	CSS_NUMBER,
46 	CSS_LENGTH,
47 	CSS_PERCENT,
48 	CSS_URI,
49 };
50 
51 struct fz_css_s
52 {
53 	fz_pool *pool;
54 	fz_css_rule *rule;
55 };
56 
57 struct fz_css_rule_s
58 {
59 	fz_css_selector *selector;
60 	fz_css_property *declaration;
61 	fz_css_rule *next;
62 	int loaded;
63 };
64 
65 struct fz_css_selector_s
66 {
67 	char *name;
68 	int combine;
69 	fz_css_condition *cond;
70 	fz_css_selector *left;
71 	fz_css_selector *right;
72 	fz_css_selector *next;
73 };
74 
75 struct fz_css_condition_s
76 {
77 	int type;
78 	char *key;
79 	char *val;
80 	fz_css_condition *next;
81 };
82 
83 struct fz_css_property_s
84 {
85 	int name;
86 	fz_css_value *value;
87 	short spec;
88 	short important;
89 	fz_css_property *next;
90 };
91 
92 struct fz_css_value_s
93 {
94 	int type;
95 	char *data;
96 	fz_css_value *args; /* function arguments */
97 	fz_css_value *next;
98 };
99 
100 enum
101 {
102 	PRO_BACKGROUND_COLOR,
103 	PRO_BORDER_BOTTOM_COLOR,
104 	PRO_BORDER_BOTTOM_STYLE,
105 	PRO_BORDER_BOTTOM_WIDTH,
106 	PRO_BORDER_LEFT_COLOR,
107 	PRO_BORDER_LEFT_STYLE,
108 	PRO_BORDER_LEFT_WIDTH,
109 	PRO_BORDER_RIGHT_COLOR,
110 	PRO_BORDER_RIGHT_STYLE,
111 	PRO_BORDER_RIGHT_WIDTH,
112 	PRO_BORDER_TOP_COLOR,
113 	PRO_BORDER_TOP_STYLE,
114 	PRO_BORDER_TOP_WIDTH,
115 	PRO_COLOR,
116 	PRO_DIRECTION,
117 	PRO_DISPLAY,
118 	PRO_FONT_FAMILY,
119 	PRO_FONT_SIZE,
120 	PRO_FONT_STYLE,
121 	PRO_FONT_VARIANT,
122 	PRO_FONT_WEIGHT,
123 	PRO_HEIGHT,
124 	PRO_LETTER_SPACING,
125 	PRO_LINE_HEIGHT,
126 	PRO_LIST_STYLE_IMAGE,
127 	PRO_LIST_STYLE_POSITION,
128 	PRO_LIST_STYLE_TYPE,
129 	PRO_MARGIN_BOTTOM,
130 	PRO_MARGIN_LEFT,
131 	PRO_MARGIN_RIGHT,
132 	PRO_MARGIN_TOP,
133 	PRO_ORPHANS,
134 	PRO_PADDING_BOTTOM,
135 	PRO_PADDING_LEFT,
136 	PRO_PADDING_RIGHT,
137 	PRO_PADDING_TOP,
138 	PRO_PAGE_BREAK_AFTER,
139 	PRO_PAGE_BREAK_BEFORE,
140 	PRO_QUOTES,
141 	PRO_SRC,
142 	PRO_TEXT_ALIGN,
143 	PRO_TEXT_INDENT,
144 	PRO_TEXT_TRANSFORM,
145 	PRO_VERTICAL_ALIGN,
146 	PRO_VISIBILITY,
147 	PRO_WHITE_SPACE,
148 	PRO_WIDOWS,
149 	PRO_WIDTH,
150 	PRO_WORD_SPACING,
151 
152 	/* Number of real properties. */
153 	NUM_PROPERTIES,
154 
155 	/* Short-hand properties (always expanded when applied, never used as is): */
156 	PRO_BORDER,
157 	PRO_BORDER_BOTTOM,
158 	PRO_BORDER_COLOR,
159 	PRO_BORDER_LEFT,
160 	PRO_BORDER_RIGHT,
161 	PRO_BORDER_STYLE,
162 	PRO_BORDER_TOP,
163 	PRO_BORDER_WIDTH,
164 	PRO_LIST_STYLE,
165 	PRO_MARGIN,
166 	PRO_PADDING,
167 };
168 
169 struct fz_css_match_s
170 {
171 	fz_css_match *up;
172 	short spec[NUM_PROPERTIES];
173 	fz_css_value *value[NUM_PROPERTIES];
174 };
175 
176 enum { DIS_NONE, DIS_BLOCK, DIS_INLINE, DIS_LIST_ITEM, DIS_INLINE_BLOCK, DIS_TABLE, DIS_TABLE_ROW, DIS_TABLE_CELL };
177 enum { POS_STATIC, POS_RELATIVE, POS_ABSOLUTE, POS_FIXED };
178 enum { TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY };
179 enum { VA_BASELINE, VA_SUB, VA_SUPER, VA_TOP, VA_BOTTOM, VA_TEXT_TOP, VA_TEXT_BOTTOM };
180 enum { BS_NONE, BS_SOLID };
181 enum { V_VISIBLE, V_HIDDEN, V_COLLAPSE };
182 enum { PB_AUTO, PB_ALWAYS, PB_AVOID, PB_LEFT, PB_RIGHT };
183 
184 enum {
185 	WS_COLLAPSE = 1,
186 	WS_ALLOW_BREAK_SPACE = 2,
187 	WS_FORCE_BREAK_NEWLINE = 4,
188 	WS_NORMAL = WS_COLLAPSE | WS_ALLOW_BREAK_SPACE,
189 	WS_PRE = WS_FORCE_BREAK_NEWLINE,
190 	WS_NOWRAP = WS_COLLAPSE,
191 	WS_PRE_WRAP = WS_ALLOW_BREAK_SPACE | WS_FORCE_BREAK_NEWLINE,
192 	WS_PRE_LINE = WS_COLLAPSE | WS_ALLOW_BREAK_SPACE | WS_FORCE_BREAK_NEWLINE
193 };
194 
195 enum {
196 	LST_NONE,
197 	LST_DISC, LST_CIRCLE, LST_SQUARE,
198 	LST_DECIMAL, LST_DECIMAL_ZERO,
199 	LST_LC_ROMAN, LST_UC_ROMAN,
200 	LST_LC_GREEK, LST_UC_GREEK,
201 	LST_LC_LATIN, LST_UC_LATIN,
202 	LST_LC_ALPHA, LST_UC_ALPHA,
203 	LST_ARMENIAN, LST_GEORGIAN,
204 };
205 
206 enum { N_NUMBER='u', N_LENGTH='p', N_SCALE='m', N_PERCENT='%', N_AUTO='a' };
207 
208 struct fz_css_number_s
209 {
210 	float value;
211 	int unit;
212 };
213 
214 struct fz_css_color_s
215 {
216 	unsigned char r, g, b, a;
217 };
218 
219 struct fz_css_style_s
220 {
221 	fz_css_number font_size;
222 	fz_css_number width, height;
223 	fz_css_number margin[4];
224 	fz_css_number padding[4];
225 	fz_css_number border_width[4];
226 	fz_css_number text_indent;
227 	unsigned int visibility : 2;
228 	unsigned int white_space : 3;
229 	unsigned int text_align : 2;
230 	unsigned int vertical_align : 3;
231 	unsigned int list_style_type : 4;
232 	unsigned int page_break_before : 3;
233 	unsigned int page_break_after : 3;
234 	unsigned int border_style_0 : 1;
235 	unsigned int border_style_1 : 1;
236 	unsigned int border_style_2 : 1;
237 	unsigned int border_style_3 : 1;
238 	unsigned int small_caps : 1;
239 	/* Ensure the extra bits in the bitfield are copied
240 	 * on structure copies. */
241 	unsigned int blank : 6;
242 	fz_css_number line_height;
243 	fz_css_color background_color;
244 	fz_css_color border_color[4];
245 	fz_css_color color;
246 	fz_font *font;
247 };
248 
249 struct fz_css_style_splay_s {
250 	fz_css_style style;
251 	fz_css_style_splay *lt;
252 	fz_css_style_splay *gt;
253 	fz_css_style_splay *up;
254 };
255 
256 enum
257 {
258 	BOX_BLOCK,	/* block-level: contains block, break, flow, and table boxes */
259 	BOX_FLOW,	/* block-level: contains only inline boxes */
260 	BOX_INLINE,	/* inline-level: contains only inline boxes */
261 	BOX_TABLE,	/* table: contains table-row */
262 	BOX_TABLE_ROW,	/* table-row: contains table-cell */
263 	BOX_TABLE_CELL,	/* table-cell: contains block */
264 };
265 
266 struct fz_html_s
267 {
268 	fz_storable storable;
269 	fz_pool *pool; /* pool allocator for this html tree */
270 	float page_w, page_h;
271 	float layout_w, layout_h, layout_em;
272 	float page_margin[4];
273 	fz_html_box *root;
274 	char *title;
275 };
276 
277 struct fz_html_box_s
278 {
279 	unsigned int type : 3;
280 	unsigned int is_first_flow : 1; /* for text-indent */
281 	unsigned int markup_dir : 2;
282 	unsigned int heading : 3; /* h1..h6 */
283 	unsigned int list_item : 23;
284 	float x, y, w, b; /* content */
285 	float em;
286 	/* During construction, 'next' plays double duty; as well
287 	 * as its normal meaning of 'next sibling', the last sibling
288 	 * has next meaning "the last of my children". We correct
289 	 * this as a post-processing pass after construction. */
290 	fz_html_box *up, *down, *next;
291 	fz_html_flow *flow_head, **flow_tail;
292 	char *id, *href;
293 	const fz_css_style *style;
294 	/* Only BOX_{BLOCK,TABLE,TABLE_ROW,TABLE_CELL} actually use the following */
295 	float padding[4];
296 	float margin[4];
297 	float border[4];
298 };
299 
300 static inline int
fz_html_box_has_boxes(fz_html_box * box)301 fz_html_box_has_boxes(fz_html_box *box)
302 {
303 	return (box->type == BOX_BLOCK || box->type == BOX_TABLE || box->type == BOX_TABLE_ROW || box->type == BOX_TABLE_CELL);
304 }
305 
306 enum
307 {
308 	FLOW_WORD = 0,
309 	FLOW_SPACE = 1,
310 	FLOW_BREAK = 2,
311 	FLOW_IMAGE = 3,
312 	FLOW_SBREAK = 4,
313 	FLOW_SHYPHEN = 5,
314 	FLOW_ANCHOR = 6
315 };
316 
317 struct fz_html_flow_s
318 {
319 	/* What type of node */
320 	unsigned int type : 3;
321 
322 	/* Whether this should expand during justification */
323 	unsigned int expand : 1;
324 
325 	/* Whether this node is currently taken as a line break */
326 	unsigned int breaks_line : 1;
327 
328 	/* Direction setting for text - UAX#9 says 125 is the max */
329 	unsigned int bidi_level : 7;
330 
331 	/* The script detected by the bidi code. */
332 	unsigned int script : 8;
333 
334 	/* Whether the markup specifies a given language. */
335 	unsigned int markup_lang : 15;
336 
337 	float x, y, w, h;
338 	fz_html_box *box; /* for style and em */
339 	fz_html_flow *next;
340 	union {
341 		char text[1];
342 		fz_image *image;
343 	} content;
344 };
345 
346 
347 fz_css *fz_new_css(fz_context *ctx);
348 void fz_parse_css(fz_context *ctx, fz_css *css, const char *source, const char *file);
349 fz_css_property *fz_parse_css_properties(fz_context *ctx, fz_pool *pool, const char *source);
350 void fz_drop_css(fz_context *ctx, fz_css *css);
351 void fz_debug_css(fz_context *ctx, fz_css *css);
352 const char *fz_css_property_name(int name);
353 
354 void fz_match_css(fz_context *ctx, fz_css_match *match, fz_css_match *up, fz_css *css, fz_xml *node);
355 void fz_match_css_at_page(fz_context *ctx, fz_css_match *match, fz_css *css);
356 
357 int fz_get_css_match_display(fz_css_match *node);
358 void fz_default_css_style(fz_context *ctx, fz_css_style *style);
359 void fz_apply_css_style(fz_context *ctx, fz_html_font_set *set, fz_css_style *style, fz_css_match *match);
360 
361 /*
362 	Lookup style in the splay tree, returning a pointer
363 	to the found instance if there is one, creating and
364 	inserting (and moving to root) one if there is not.
365 */
366 const fz_css_style *fz_css_enlist(fz_context *ctx, const fz_css_style *style, fz_css_style_splay **tree, fz_pool *pool);
367 
368 float fz_from_css_number(fz_css_number number, float em, float percent_value, float auto_value);
369 float fz_from_css_number_scale(fz_css_number number, float scale);
370 
371 fz_html_font_set *fz_new_html_font_set(fz_context *ctx);
372 void fz_add_html_font_face(fz_context *ctx, fz_html_font_set *set,
373 	const char *family, int is_bold, int is_italic, int is_small_caps, const char *src, fz_font *font);
374 fz_font *fz_load_html_font(fz_context *ctx, fz_html_font_set *set, const char *family, int is_bold, int is_italic, int is_small_caps);
375 void fz_drop_html_font_set(fz_context *ctx, fz_html_font_set *htx);
376 
377 void fz_add_css_font_faces(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css);
378 
379 fz_html *fz_parse_fb2(fz_context *ctx, fz_html_font_set *htx, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css);
380 fz_html *fz_parse_html5(fz_context *ctx, fz_html_font_set *htx, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css);
381 fz_html *fz_parse_xhtml(fz_context *ctx, fz_html_font_set *htx, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css);
382 
383 void fz_layout_html(fz_context *ctx, fz_html *html, float w, float h, float em);
384 void fz_draw_html(fz_context *ctx, fz_device *dev, fz_matrix ctm, fz_html *html, int page);
385 fz_outline *fz_load_html_outline(fz_context *ctx, fz_html *node);
386 
387 float fz_find_html_target(fz_context *ctx, fz_html *html, const char *id);
388 fz_link *fz_load_html_links(fz_context *ctx, fz_html *html, int page, const char *base_uri, void *doc);
389 fz_html *fz_keep_html(fz_context *ctx, fz_html *html);
390 void fz_drop_html(fz_context *ctx, fz_html *html);
391 fz_bookmark fz_make_html_bookmark(fz_context *ctx, fz_html *html, int page);
392 int fz_lookup_html_bookmark(fz_context *ctx, fz_html *html, fz_bookmark mark);
393 void fz_debug_html(fz_context *ctx, fz_html_box *box);
394 
395 fz_html *fz_store_html(fz_context *ctx, fz_html *html, void *doc, int chapter);
396 fz_html *fz_find_html(fz_context *ctx, void *doc, int chapter);
397 void fz_purge_stored_html(fz_context *ctx, void *doc);
398 
399 #endif
400