xref: /netbsd/external/bsd/mdocml/dist/tbl_layout.c (revision 6550d01e)
1 /*	$Vendor-Id: tbl_layout.c,v 1.13 2011/01/09 05:38:23 joerg Exp $ */
2 /*
3  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 
23 #include "mandoc.h"
24 #include "libmandoc.h"
25 #include "libroff.h"
26 
27 struct	tbl_phrase {
28 	char		 name;
29 	enum tbl_cellt	 key;
30 };
31 
32 /*
33  * FIXME: we can make this parse a lot nicer by, when an error is
34  * encountered in a layout key, bailing to the next key (i.e. to the
35  * next whitespace then continuing).
36  */
37 
38 #define	KEYS_MAX	 11
39 
40 static	const struct tbl_phrase keys[KEYS_MAX] = {
41 	{ 'c',		 TBL_CELL_CENTRE },
42 	{ 'r',		 TBL_CELL_RIGHT },
43 	{ 'l',		 TBL_CELL_LEFT },
44 	{ 'n',		 TBL_CELL_NUMBER },
45 	{ 's',		 TBL_CELL_SPAN },
46 	{ 'a',		 TBL_CELL_LONG },
47 	{ '^',		 TBL_CELL_DOWN },
48 	{ '-',		 TBL_CELL_HORIZ },
49 	{ '_',		 TBL_CELL_HORIZ },
50 	{ '=',		 TBL_CELL_DHORIZ },
51 	{ '|',		 TBL_CELL_VERT }
52 };
53 
54 static	int		 mods(struct tbl_node *, struct tbl_cell *,
55 				int, const char *, int *);
56 static	int		 cell(struct tbl_node *, struct tbl_row *,
57 				int, const char *, int *);
58 static	void		 row(struct tbl_node *, int, const char *, int *);
59 static	struct tbl_cell *cell_alloc(struct tbl_node *,
60 				struct tbl_row *, enum tbl_cellt);
61 static	void		 head_adjust(const struct tbl_cell *,
62 				struct tbl_head *);
63 
64 static int
65 mods(struct tbl_node *tbl, struct tbl_cell *cp,
66 		int ln, const char *p, int *pos)
67 {
68 	char		 buf[5];
69 	int		 i;
70 
71 mod:
72 	/*
73 	 * XXX: since, at least for now, modifiers are non-conflicting
74 	 * (are separable by value, regardless of position), we let
75 	 * modifiers come in any order.  The existing tbl doesn't let
76 	 * this happen.
77 	 */
78 	switch (p[*pos]) {
79 	case ('\0'):
80 		/* FALLTHROUGH */
81 	case (' '):
82 		/* FALLTHROUGH */
83 	case ('\t'):
84 		/* FALLTHROUGH */
85 	case (','):
86 		/* FALLTHROUGH */
87 	case ('.'):
88 		return(1);
89 	default:
90 		break;
91 	}
92 
93 	/* Throw away parenthesised expression. */
94 
95 	if ('(' == p[*pos]) {
96 		(*pos)++;
97 		while (p[*pos] && ')' != p[*pos])
98 			(*pos)++;
99 		if (')' == p[*pos]) {
100 			(*pos)++;
101 			goto mod;
102 		}
103 		TBL_MSG(tbl, MANDOCERR_TBLLAYOUT, ln, *pos);
104 		return(0);
105 	}
106 
107 	/* Parse numerical spacing from modifier string. */
108 
109 	if (isdigit((unsigned char)p[*pos])) {
110 		for (i = 0; i < 4; i++) {
111 			if ( ! isdigit((unsigned char)p[*pos + i]))
112 				break;
113 			buf[i] = p[*pos + i];
114 		}
115 		buf[i] = '\0';
116 
117 		/* No greater than 4 digits. */
118 
119 		if (4 == i) {
120 			TBL_MSG(tbl, MANDOCERR_TBLLAYOUT, ln, *pos);
121 			return(0);
122 		}
123 
124 		*pos += i;
125 		cp->spacing = atoi(buf);
126 
127 		goto mod;
128 		/* NOTREACHED */
129 	}
130 
131 	/* TODO: GNU has many more extensions. */
132 
133 	switch (tolower((unsigned char)p[(*pos)++])) {
134 	case ('z'):
135 		cp->flags |= TBL_CELL_WIGN;
136 		goto mod;
137 	case ('u'):
138 		cp->flags |= TBL_CELL_UP;
139 		goto mod;
140 	case ('e'):
141 		cp->flags |= TBL_CELL_EQUAL;
142 		goto mod;
143 	case ('t'):
144 		cp->flags |= TBL_CELL_TALIGN;
145 		goto mod;
146 	case ('d'):
147 		cp->flags |= TBL_CELL_BALIGN;
148 		goto mod;
149 	case ('w'):  /* XXX for now, ignore minimal column width */
150 		goto mod;
151 	case ('f'):
152 		break;
153 	case ('b'):
154 		/* FALLTHROUGH */
155 	case ('i'):
156 		(*pos)--;
157 		break;
158 	default:
159 		TBL_MSG(tbl, MANDOCERR_TBLLAYOUT, ln, *pos - 1);
160 		return(0);
161 	}
162 
163 	switch (tolower((unsigned char)p[(*pos)++])) {
164 	case ('b'):
165 		cp->flags |= TBL_CELL_BOLD;
166 		goto mod;
167 	case ('i'):
168 		cp->flags |= TBL_CELL_ITALIC;
169 		goto mod;
170 	default:
171 		break;
172 	}
173 
174 	TBL_MSG(tbl, MANDOCERR_TBLLAYOUT, ln, *pos - 1);
175 	return(0);
176 }
177 
178 static int
179 cell(struct tbl_node *tbl, struct tbl_row *rp,
180 		int ln, const char *p, int *pos)
181 {
182 	int		 i;
183 	enum tbl_cellt	 c;
184 
185 	/* Parse the column position (`r', `R', `|', ...). */
186 
187 	for (i = 0; i < KEYS_MAX; i++)
188 		if (tolower((unsigned char)p[*pos]) == keys[i].name)
189 			break;
190 
191 	if (KEYS_MAX == i) {
192 		TBL_MSG(tbl, MANDOCERR_TBLLAYOUT, ln, *pos);
193 		return(0);
194 	}
195 
196 	c = keys[i].key;
197 
198 	/*
199 	 * If a span cell is found first, raise a warning and abort the
200 	 * parse.  FIXME: recover from this somehow?
201 	 */
202 
203 	if (NULL == rp->first && TBL_CELL_SPAN == c) {
204 		TBL_MSG(tbl, MANDOCERR_TBLLAYOUT, ln, *pos);
205 		return(0);
206 	}
207 
208 	(*pos)++;
209 
210 	/* Extra check for the double-vertical. */
211 
212 	if (TBL_CELL_VERT == c && '|' == p[*pos]) {
213 		(*pos)++;
214 		c = TBL_CELL_DVERT;
215 	}
216 
217 	/* Disallow adjacent spacers. */
218 
219 	if (rp->last && (TBL_CELL_VERT == c || TBL_CELL_DVERT == c) &&
220 			(TBL_CELL_VERT == rp->last->pos ||
221 			 TBL_CELL_DVERT == rp->last->pos)) {
222 		TBL_MSG(tbl, MANDOCERR_TBLLAYOUT, ln, *pos - 1);
223 		return(0);
224 	}
225 
226 	/* Allocate cell then parse its modifiers. */
227 
228 	return(mods(tbl, cell_alloc(tbl, rp, c), ln, p, pos));
229 }
230 
231 
232 static void
233 row(struct tbl_node *tbl, int ln, const char *p, int *pos)
234 {
235 	struct tbl_row	*rp;
236 
237 row:	/*
238 	 * EBNF describing this section:
239 	 *
240 	 * row		::= row_list [:space:]* [.]?[\n]
241 	 * row_list	::= [:space:]* row_elem row_tail
242 	 * row_tail	::= [:space:]*[,] row_list |
243 	 *                  epsilon
244 	 * row_elem	::= [\t\ ]*[:alpha:]+
245 	 */
246 
247 	rp = mandoc_calloc(1, sizeof(struct tbl_row));
248 	if (tbl->last_row) {
249 		tbl->last_row->next = rp;
250 		tbl->last_row = rp;
251 	} else
252 		tbl->last_row = tbl->first_row = rp;
253 
254 cell:
255 	while (isspace((unsigned char)p[*pos]))
256 		(*pos)++;
257 
258 	/* Safely exit layout context. */
259 
260 	if ('.' == p[*pos]) {
261 		tbl->part = TBL_PART_DATA;
262 		if (NULL == tbl->first_row)
263 			TBL_MSG(tbl, MANDOCERR_TBLNOLAYOUT, ln, *pos);
264 		(*pos)++;
265 		return;
266 	}
267 
268 	/* End (and possibly restart) a row. */
269 
270 	if (',' == p[*pos]) {
271 		(*pos)++;
272 		goto row;
273 	} else if ('\0' == p[*pos])
274 		return;
275 
276 	if ( ! cell(tbl, rp, ln, p, pos))
277 		return;
278 
279 	goto cell;
280 	/* NOTREACHED */
281 }
282 
283 int
284 tbl_layout(struct tbl_node *tbl, int ln, const char *p)
285 {
286 	int		 pos;
287 
288 	pos = 0;
289 	row(tbl, ln, p, &pos);
290 
291 	/* Always succeed. */
292 	return(1);
293 }
294 
295 static struct tbl_cell *
296 cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos)
297 {
298 	struct tbl_cell	*p, *pp;
299 	struct tbl_head	*h, *hp;
300 
301 	p = mandoc_calloc(1, sizeof(struct tbl_cell));
302 
303 	if (NULL != (pp = rp->last)) {
304 		rp->last->next = p;
305 		rp->last = p;
306 	} else
307 		rp->last = rp->first = p;
308 
309 	p->pos = pos;
310 
311 	/*
312 	 * This is a little bit complicated.  Here we determine the
313 	 * header the corresponds to a cell.  We add headers dynamically
314 	 * when need be or re-use them, otherwise.  As an example, given
315 	 * the following:
316 	 *
317 	 * 	1  c || l
318 	 * 	2  | c | l
319 	 * 	3  l l
320 	 * 	3  || c | l |.
321 	 *
322 	 * We first add the new headers (as there are none) in (1); then
323 	 * in (2) we insert the first spanner (as it doesn't match up
324 	 * with the header); then we re-use the prior data headers,
325 	 * skipping over the spanners; then we re-use everything and add
326 	 * a last spanner.  Note that VERT headers are made into DVERT
327 	 * ones.
328 	 */
329 
330 	h = pp ? pp->head->next : tbl->first_head;
331 
332 	if (h) {
333 		/* Re-use data header. */
334 		if (TBL_HEAD_DATA == h->pos &&
335 				(TBL_CELL_VERT != p->pos &&
336 				 TBL_CELL_DVERT != p->pos)) {
337 			p->head = h;
338 			return(p);
339 		}
340 
341 		/* Re-use spanner header. */
342 		if (TBL_HEAD_DATA != h->pos &&
343 				(TBL_CELL_VERT == p->pos ||
344 				 TBL_CELL_DVERT == p->pos)) {
345 			head_adjust(p, h);
346 			p->head = h;
347 			return(p);
348 		}
349 
350 		/* Right-shift headers with a new spanner. */
351 		if (TBL_HEAD_DATA == h->pos &&
352 				(TBL_CELL_VERT == p->pos ||
353 				 TBL_CELL_DVERT == p->pos)) {
354 			hp = mandoc_calloc(1, sizeof(struct tbl_head));
355 			hp->ident = tbl->opts.cols++;
356 			hp->prev = h->prev;
357 			if (h->prev)
358 				h->prev->next = hp;
359 			if (h == tbl->first_head)
360 				tbl->first_head = hp;
361 			h->prev = hp;
362 			hp->next = h;
363 			head_adjust(p, hp);
364 			p->head = hp;
365 			return(p);
366 		}
367 
368 		if (NULL != (h = h->next)) {
369 			head_adjust(p, h);
370 			p->head = h;
371 			return(p);
372 		}
373 
374 		/* Fall through to default case... */
375 	}
376 
377 	hp = mandoc_calloc(1, sizeof(struct tbl_head));
378 	hp->ident = tbl->opts.cols++;
379 
380 	if (tbl->last_head) {
381 		hp->prev = tbl->last_head;
382 		tbl->last_head->next = hp;
383 		tbl->last_head = hp;
384 	} else
385 		tbl->last_head = tbl->first_head = hp;
386 
387 	head_adjust(p, hp);
388 	p->head = hp;
389 	return(p);
390 }
391 
392 static void
393 head_adjust(const struct tbl_cell *cell, struct tbl_head *head)
394 {
395 	if (TBL_CELL_VERT != cell->pos &&
396 			TBL_CELL_DVERT != cell->pos) {
397 		head->pos = TBL_HEAD_DATA;
398 		return;
399 	}
400 
401 	if (TBL_CELL_VERT == cell->pos)
402 		if (TBL_HEAD_DVERT != head->pos)
403 			head->pos = TBL_HEAD_VERT;
404 
405 	if (TBL_CELL_DVERT == cell->pos)
406 		head->pos = TBL_HEAD_DVERT;
407 }
408 
409