xref: /dragonfly/contrib/mdocml/tbl_layout.c (revision 25a2db75)
1 /*	$Id: tbl_layout.c,v 1.22 2011/09/18 14:14:15 schwarze Exp $ */
2 /*
3  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <time.h>
26 
27 #include "mandoc.h"
28 #include "libmandoc.h"
29 #include "libroff.h"
30 
31 struct	tbl_phrase {
32 	char		 name;
33 	enum tbl_cellt	 key;
34 };
35 
36 /*
37  * FIXME: we can make this parse a lot nicer by, when an error is
38  * encountered in a layout key, bailing to the next key (i.e. to the
39  * next whitespace then continuing).
40  */
41 
42 #define	KEYS_MAX	 11
43 
44 static	const struct tbl_phrase keys[KEYS_MAX] = {
45 	{ 'c',		 TBL_CELL_CENTRE },
46 	{ 'r',		 TBL_CELL_RIGHT },
47 	{ 'l',		 TBL_CELL_LEFT },
48 	{ 'n',		 TBL_CELL_NUMBER },
49 	{ 's',		 TBL_CELL_SPAN },
50 	{ 'a',		 TBL_CELL_LONG },
51 	{ '^',		 TBL_CELL_DOWN },
52 	{ '-',		 TBL_CELL_HORIZ },
53 	{ '_',		 TBL_CELL_HORIZ },
54 	{ '=',		 TBL_CELL_DHORIZ },
55 	{ '|',		 TBL_CELL_VERT }
56 };
57 
58 static	int		 mods(struct tbl_node *, struct tbl_cell *,
59 				int, const char *, int *);
60 static	int		 cell(struct tbl_node *, struct tbl_row *,
61 				int, const char *, int *);
62 static	void		 row(struct tbl_node *, int, const char *, int *);
63 static	struct tbl_cell *cell_alloc(struct tbl_node *,
64 				struct tbl_row *, enum tbl_cellt);
65 static	void		 head_adjust(const struct tbl_cell *,
66 				struct tbl_head *);
67 
68 static int
69 mods(struct tbl_node *tbl, struct tbl_cell *cp,
70 		int ln, const char *p, int *pos)
71 {
72 	char		 buf[5];
73 	int		 i;
74 
75 	/* Not all types accept modifiers. */
76 
77 	switch (cp->pos) {
78 	case (TBL_CELL_DOWN):
79 		/* FALLTHROUGH */
80 	case (TBL_CELL_HORIZ):
81 		/* FALLTHROUGH */
82 	case (TBL_CELL_DHORIZ):
83 		/* FALLTHROUGH */
84 	case (TBL_CELL_VERT):
85 		/* FALLTHROUGH */
86 	case (TBL_CELL_DVERT):
87 		return(1);
88 	default:
89 		break;
90 	}
91 
92 mod:
93 	/*
94 	 * XXX: since, at least for now, modifiers are non-conflicting
95 	 * (are separable by value, regardless of position), we let
96 	 * modifiers come in any order.  The existing tbl doesn't let
97 	 * this happen.
98 	 */
99 	switch (p[*pos]) {
100 	case ('\0'):
101 		/* FALLTHROUGH */
102 	case (' '):
103 		/* FALLTHROUGH */
104 	case ('\t'):
105 		/* FALLTHROUGH */
106 	case (','):
107 		/* FALLTHROUGH */
108 	case ('.'):
109 		return(1);
110 	default:
111 		break;
112 	}
113 
114 	/* Throw away parenthesised expression. */
115 
116 	if ('(' == p[*pos]) {
117 		(*pos)++;
118 		while (p[*pos] && ')' != p[*pos])
119 			(*pos)++;
120 		if (')' == p[*pos]) {
121 			(*pos)++;
122 			goto mod;
123 		}
124 		mandoc_msg(MANDOCERR_TBLLAYOUT,
125 				tbl->parse, ln, *pos, NULL);
126 		return(0);
127 	}
128 
129 	/* Parse numerical spacing from modifier string. */
130 
131 	if (isdigit((unsigned char)p[*pos])) {
132 		for (i = 0; i < 4; i++) {
133 			if ( ! isdigit((unsigned char)p[*pos + i]))
134 				break;
135 			buf[i] = p[*pos + i];
136 		}
137 		buf[i] = '\0';
138 
139 		/* No greater than 4 digits. */
140 
141 		if (4 == i) {
142 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
143 					ln, *pos, NULL);
144 			return(0);
145 		}
146 
147 		*pos += i;
148 		cp->spacing = (size_t)atoi(buf);
149 
150 		goto mod;
151 		/* NOTREACHED */
152 	}
153 
154 	/* TODO: GNU has many more extensions. */
155 
156 	switch (tolower((unsigned char)p[(*pos)++])) {
157 	case ('z'):
158 		cp->flags |= TBL_CELL_WIGN;
159 		goto mod;
160 	case ('u'):
161 		cp->flags |= TBL_CELL_UP;
162 		goto mod;
163 	case ('e'):
164 		cp->flags |= TBL_CELL_EQUAL;
165 		goto mod;
166 	case ('t'):
167 		cp->flags |= TBL_CELL_TALIGN;
168 		goto mod;
169 	case ('d'):
170 		cp->flags |= TBL_CELL_BALIGN;
171 		goto mod;
172 	case ('w'):  /* XXX for now, ignore minimal column width */
173 		goto mod;
174 	case ('f'):
175 		break;
176 	case ('r'):
177 		/* FALLTHROUGH */
178 	case ('b'):
179 		/* FALLTHROUGH */
180 	case ('i'):
181 		(*pos)--;
182 		break;
183 	default:
184 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
185 				ln, *pos - 1, NULL);
186 		return(0);
187 	}
188 
189 	switch (tolower((unsigned char)p[(*pos)++])) {
190 	case ('3'):
191 		/* FALLTHROUGH */
192 	case ('b'):
193 		cp->flags |= TBL_CELL_BOLD;
194 		goto mod;
195 	case ('2'):
196 		/* FALLTHROUGH */
197 	case ('i'):
198 		cp->flags |= TBL_CELL_ITALIC;
199 		goto mod;
200 	case ('1'):
201 		/* FALLTHROUGH */
202 	case ('r'):
203 		goto mod;
204 	default:
205 		break;
206 	}
207 
208 	mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
209 			ln, *pos - 1, NULL);
210 	return(0);
211 }
212 
213 static int
214 cell(struct tbl_node *tbl, struct tbl_row *rp,
215 		int ln, const char *p, int *pos)
216 {
217 	int		 i;
218 	enum tbl_cellt	 c;
219 
220 	/* Parse the column position (`r', `R', `|', ...). */
221 
222 	for (i = 0; i < KEYS_MAX; i++)
223 		if (tolower((unsigned char)p[*pos]) == keys[i].name)
224 			break;
225 
226 	if (KEYS_MAX == i) {
227 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
228 				ln, *pos, NULL);
229 		return(0);
230 	}
231 
232 	c = keys[i].key;
233 
234 	/*
235 	 * If a span cell is found first, raise a warning and abort the
236 	 * parse.  If a span cell is found and the last layout element
237 	 * isn't a "normal" layout, bail.
238 	 *
239 	 * FIXME: recover from this somehow?
240 	 */
241 
242 	if (TBL_CELL_SPAN == c) {
243 		if (NULL == rp->first) {
244 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
245 					ln, *pos, NULL);
246 			return(0);
247 		} else if (rp->last)
248 			switch (rp->last->pos) {
249 			case (TBL_CELL_VERT):
250 			case (TBL_CELL_DVERT):
251 			case (TBL_CELL_HORIZ):
252 			case (TBL_CELL_DHORIZ):
253 				mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
254 						ln, *pos, NULL);
255 				return(0);
256 			default:
257 				break;
258 			}
259 	}
260 
261 	/*
262 	 * If a vertical spanner is found, we may not be in the first
263 	 * row.
264 	 */
265 
266 	if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
267 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
268 		return(0);
269 	}
270 
271 	(*pos)++;
272 
273 	/* Extra check for the double-vertical. */
274 
275 	if (TBL_CELL_VERT == c && '|' == p[*pos]) {
276 		(*pos)++;
277 		c = TBL_CELL_DVERT;
278 	}
279 
280 	/* Disallow adjacent spacers. */
281 
282 	if (rp->last && (TBL_CELL_VERT == c || TBL_CELL_DVERT == c) &&
283 			(TBL_CELL_VERT == rp->last->pos ||
284 			 TBL_CELL_DVERT == rp->last->pos)) {
285 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
286 		return(0);
287 	}
288 
289 	/* Allocate cell then parse its modifiers. */
290 
291 	return(mods(tbl, cell_alloc(tbl, rp, c), ln, p, pos));
292 }
293 
294 
295 static void
296 row(struct tbl_node *tbl, int ln, const char *p, int *pos)
297 {
298 	struct tbl_row	*rp;
299 
300 row:	/*
301 	 * EBNF describing this section:
302 	 *
303 	 * row		::= row_list [:space:]* [.]?[\n]
304 	 * row_list	::= [:space:]* row_elem row_tail
305 	 * row_tail	::= [:space:]*[,] row_list |
306 	 *                  epsilon
307 	 * row_elem	::= [\t\ ]*[:alpha:]+
308 	 */
309 
310 	rp = mandoc_calloc(1, sizeof(struct tbl_row));
311 	if (tbl->last_row) {
312 		tbl->last_row->next = rp;
313 		tbl->last_row = rp;
314 	} else
315 		tbl->last_row = tbl->first_row = rp;
316 
317 cell:
318 	while (isspace((unsigned char)p[*pos]))
319 		(*pos)++;
320 
321 	/* Safely exit layout context. */
322 
323 	if ('.' == p[*pos]) {
324 		tbl->part = TBL_PART_DATA;
325 		if (NULL == tbl->first_row)
326 			mandoc_msg(MANDOCERR_TBLNOLAYOUT, tbl->parse,
327 					ln, *pos, NULL);
328 		(*pos)++;
329 		return;
330 	}
331 
332 	/* End (and possibly restart) a row. */
333 
334 	if (',' == p[*pos]) {
335 		(*pos)++;
336 		goto row;
337 	} else if ('\0' == p[*pos])
338 		return;
339 
340 	if ( ! cell(tbl, rp, ln, p, pos))
341 		return;
342 
343 	goto cell;
344 	/* NOTREACHED */
345 }
346 
347 int
348 tbl_layout(struct tbl_node *tbl, int ln, const char *p)
349 {
350 	int		 pos;
351 
352 	pos = 0;
353 	row(tbl, ln, p, &pos);
354 
355 	/* Always succeed. */
356 	return(1);
357 }
358 
359 static struct tbl_cell *
360 cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos)
361 {
362 	struct tbl_cell	*p, *pp;
363 	struct tbl_head	*h, *hp;
364 
365 	p = mandoc_calloc(1, sizeof(struct tbl_cell));
366 
367 	if (NULL != (pp = rp->last)) {
368 		rp->last->next = p;
369 		rp->last = p;
370 	} else
371 		rp->last = rp->first = p;
372 
373 	p->pos = pos;
374 
375 	/*
376 	 * This is a little bit complicated.  Here we determine the
377 	 * header the corresponds to a cell.  We add headers dynamically
378 	 * when need be or re-use them, otherwise.  As an example, given
379 	 * the following:
380 	 *
381 	 * 	1  c || l
382 	 * 	2  | c | l
383 	 * 	3  l l
384 	 * 	3  || c | l |.
385 	 *
386 	 * We first add the new headers (as there are none) in (1); then
387 	 * in (2) we insert the first spanner (as it doesn't match up
388 	 * with the header); then we re-use the prior data headers,
389 	 * skipping over the spanners; then we re-use everything and add
390 	 * a last spanner.  Note that VERT headers are made into DVERT
391 	 * ones.
392 	 */
393 
394 	h = pp ? pp->head->next : tbl->first_head;
395 
396 	if (h) {
397 		/* Re-use data header. */
398 		if (TBL_HEAD_DATA == h->pos &&
399 				(TBL_CELL_VERT != p->pos &&
400 				 TBL_CELL_DVERT != p->pos)) {
401 			p->head = h;
402 			return(p);
403 		}
404 
405 		/* Re-use spanner header. */
406 		if (TBL_HEAD_DATA != h->pos &&
407 				(TBL_CELL_VERT == p->pos ||
408 				 TBL_CELL_DVERT == p->pos)) {
409 			head_adjust(p, h);
410 			p->head = h;
411 			return(p);
412 		}
413 
414 		/* Right-shift headers with a new spanner. */
415 		if (TBL_HEAD_DATA == h->pos &&
416 				(TBL_CELL_VERT == p->pos ||
417 				 TBL_CELL_DVERT == p->pos)) {
418 			hp = mandoc_calloc(1, sizeof(struct tbl_head));
419 			hp->ident = tbl->opts.cols++;
420 			hp->prev = h->prev;
421 			if (h->prev)
422 				h->prev->next = hp;
423 			if (h == tbl->first_head)
424 				tbl->first_head = hp;
425 			h->prev = hp;
426 			hp->next = h;
427 			head_adjust(p, hp);
428 			p->head = hp;
429 			return(p);
430 		}
431 
432 		if (NULL != (h = h->next)) {
433 			head_adjust(p, h);
434 			p->head = h;
435 			return(p);
436 		}
437 
438 		/* Fall through to default case... */
439 	}
440 
441 	hp = mandoc_calloc(1, sizeof(struct tbl_head));
442 	hp->ident = tbl->opts.cols++;
443 
444 	if (tbl->last_head) {
445 		hp->prev = tbl->last_head;
446 		tbl->last_head->next = hp;
447 		tbl->last_head = hp;
448 	} else
449 		tbl->last_head = tbl->first_head = hp;
450 
451 	head_adjust(p, hp);
452 	p->head = hp;
453 	return(p);
454 }
455 
456 static void
457 head_adjust(const struct tbl_cell *cellp, struct tbl_head *head)
458 {
459 	if (TBL_CELL_VERT != cellp->pos &&
460 			TBL_CELL_DVERT != cellp->pos) {
461 		head->pos = TBL_HEAD_DATA;
462 		return;
463 	}
464 
465 	if (TBL_CELL_VERT == cellp->pos)
466 		if (TBL_HEAD_DVERT != head->pos)
467 			head->pos = TBL_HEAD_VERT;
468 
469 	if (TBL_CELL_DVERT == cellp->pos)
470 		head->pos = TBL_HEAD_DVERT;
471 }
472 
473