xref: /openbsd/usr.bin/mandoc/tag.c (revision 09467b48)
1 /* $OpenBSD: tag.c,v 1.36 2020/04/19 16:26:11 schwarze Exp $ */
2 /*
3  * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * Functions to tag syntax tree nodes.
18  * For internal use by mandoc(1) validation modules only.
19  */
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <limits.h>
24 #include <stddef.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include "mandoc_aux.h"
30 #include "mandoc_ohash.h"
31 #include "roff.h"
32 #include "mdoc.h"
33 #include "roff_int.h"
34 #include "tag.h"
35 
36 struct tag_entry {
37 	struct roff_node **nodes;
38 	size_t	 maxnodes;
39 	size_t	 nnodes;
40 	int	 prio;
41 	char	 s[];
42 };
43 
44 static void		 tag_move_href(struct roff_man *,
45 				struct roff_node *, const char *);
46 static void		 tag_move_id(struct roff_node *);
47 
48 static struct ohash	 tag_data;
49 
50 
51 /*
52  * Set up the ohash table to collect nodes
53  * where various marked-up terms are documented.
54  */
55 void
56 tag_alloc(void)
57 {
58 	mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
59 }
60 
61 void
62 tag_free(void)
63 {
64 	struct tag_entry	*entry;
65 	unsigned int		 slot;
66 
67 	if (tag_data.info.free == NULL)
68 		return;
69 	entry = ohash_first(&tag_data, &slot);
70 	while (entry != NULL) {
71 		free(entry->nodes);
72 		free(entry);
73 		entry = ohash_next(&tag_data, &slot);
74 	}
75 	ohash_delete(&tag_data);
76 	tag_data.info.free = NULL;
77 }
78 
79 /*
80  * Set a node where a term is defined,
81  * unless it is already defined at a lower priority.
82  */
83 void
84 tag_put(const char *s, int prio, struct roff_node *n)
85 {
86 	struct tag_entry	*entry;
87 	struct roff_node	*nold;
88 	const char		*se;
89 	size_t			 len;
90 	unsigned int		 slot;
91 
92 	assert(prio <= TAG_FALLBACK);
93 
94 	if (s == NULL) {
95 		if (n->child == NULL || n->child->type != ROFFT_TEXT)
96 			return;
97 		s = n->child->string;
98 		switch (s[0]) {
99 		case '-':
100 			s++;
101 			break;
102 		case '\\':
103 			switch (s[1]) {
104 			case '&':
105 			case '-':
106 			case 'e':
107 				s += 2;
108 				break;
109 			default:
110 				break;
111 			}
112 			break;
113 		default:
114 			break;
115 		}
116 	}
117 
118 	/*
119 	 * Skip whitespace and escapes and whatever follows,
120 	 * and if there is any, downgrade the priority.
121 	 */
122 
123 	len = strcspn(s, " \t\\");
124 	if (len == 0)
125 		return;
126 
127 	se = s + len;
128 	if (*se != '\0' && prio < TAG_WEAK)
129 		prio = TAG_WEAK;
130 
131 	slot = ohash_qlookupi(&tag_data, s, &se);
132 	entry = ohash_find(&tag_data, slot);
133 
134 	/* Build a new entry. */
135 
136 	if (entry == NULL) {
137 		entry = mandoc_malloc(sizeof(*entry) + len + 1);
138 		memcpy(entry->s, s, len);
139 		entry->s[len] = '\0';
140 		entry->nodes = NULL;
141 		entry->maxnodes = entry->nnodes = 0;
142 		ohash_insert(&tag_data, slot, entry);
143 	}
144 
145 	/*
146 	 * Lower priority numbers take precedence.
147 	 * If a better entry is already present, ignore the new one.
148 	 */
149 
150 	else if (entry->prio < prio)
151 			return;
152 
153 	/*
154 	 * If the existing entry is worse, clear it.
155 	 * In addition, a tag with priority TAG_FALLBACK
156 	 * is only used if the tag occurs exactly once.
157 	 */
158 
159 	else if (entry->prio > prio || prio == TAG_FALLBACK) {
160 		while (entry->nnodes > 0) {
161 			nold = entry->nodes[--entry->nnodes];
162 			nold->flags &= ~NODE_ID;
163 			free(nold->tag);
164 			nold->tag = NULL;
165 		}
166 		if (prio == TAG_FALLBACK) {
167 			entry->prio = TAG_DELETE;
168 			return;
169 		}
170 	}
171 
172 	/* Remember the new node. */
173 
174 	if (entry->maxnodes == entry->nnodes) {
175 		entry->maxnodes += 4;
176 		entry->nodes = mandoc_reallocarray(entry->nodes,
177 		    entry->maxnodes, sizeof(*entry->nodes));
178 	}
179 	entry->nodes[entry->nnodes++] = n;
180 	entry->prio = prio;
181 	n->flags |= NODE_ID;
182 	if (n->child == NULL || n->child->string != s || *se != '\0') {
183 		assert(n->tag == NULL);
184 		n->tag = mandoc_strndup(s, len);
185 	}
186 }
187 
188 int
189 tag_exists(const char *tag)
190 {
191 	return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
192 }
193 
194 /*
195  * For in-line elements, move the link target
196  * to the enclosing paragraph when appropriate.
197  */
198 static void
199 tag_move_id(struct roff_node *n)
200 {
201 	struct roff_node *np;
202 
203 	np = n;
204 	for (;;) {
205 		if (np->prev != NULL)
206 			np = np->prev;
207 		else if ((np = np->parent) == NULL)
208 			return;
209 		switch (np->tok) {
210 		case MDOC_It:
211 			switch (np->parent->parent->norm->Bl.type) {
212 			case LIST_column:
213 				/* Target the ROFFT_BLOCK = <tr>. */
214 				np = np->parent;
215 				break;
216 			case LIST_diag:
217 			case LIST_hang:
218 			case LIST_inset:
219 			case LIST_ohang:
220 			case LIST_tag:
221 				/* Target the ROFFT_HEAD = <dt>. */
222 				np = np->parent->head;
223 				break;
224 			default:
225 				/* Target the ROFF_BODY = <li>. */
226 				break;
227 			}
228 			/* FALLTHROUGH */
229 		case MDOC_Pp:	/* Target the ROFFT_ELEM = <p>. */
230 			if (np->tag == NULL) {
231 				np->tag = mandoc_strdup(n->tag == NULL ?
232 				    n->child->string : n->tag);
233 				np->flags |= NODE_ID;
234 				n->flags &= ~NODE_ID;
235 			}
236 			return;
237 		case MDOC_Sh:
238 		case MDOC_Ss:
239 		case MDOC_Bd:
240 		case MDOC_Bl:
241 		case MDOC_D1:
242 		case MDOC_Dl:
243 		case MDOC_Rs:
244 			/* Do not move past major blocks. */
245 			return;
246 		default:
247 			/*
248 			 * Move past in-line content and partial
249 			 * blocks, for example .It Xo or .It Bq Er.
250 			 */
251 			break;
252 		}
253 	}
254 }
255 
256 /*
257  * When a paragraph is tagged and starts with text,
258  * move the permalink to the first few words.
259  */
260 static void
261 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
262 {
263 	char	*cp;
264 
265 	if (n == NULL || n->type != ROFFT_TEXT ||
266 	    *n->string == '\0' || *n->string == ' ')
267 		return;
268 
269 	cp = n->string;
270 	while (cp != NULL && cp - n->string < 5)
271 		cp = strchr(cp + 1, ' ');
272 
273 	/* If the first text node is longer, split it. */
274 
275 	if (cp != NULL && cp[1] != '\0') {
276 		man->last = n;
277 		man->next = ROFF_NEXT_SIBLING;
278 		roff_word_alloc(man, n->line,
279 		    n->pos + (cp - n->string), cp + 1);
280 		man->last->flags = n->flags & ~NODE_LINE;
281 		*cp = '\0';
282 	}
283 
284 	assert(n->tag == NULL);
285 	n->tag = mandoc_strdup(tag);
286 	n->flags |= NODE_HREF;
287 }
288 
289 /*
290  * When all tags have been set, decide where to put
291  * the associated permalinks, and maybe move some tags
292  * to the beginning of the respective paragraphs.
293  */
294 void
295 tag_postprocess(struct roff_man *man, struct roff_node *n)
296 {
297 	if (n->flags & NODE_ID) {
298 		switch (n->tok) {
299 		case MDOC_Pp:
300 			tag_move_href(man, n->next, n->tag);
301 			break;
302 		case MDOC_Bd:
303 		case MDOC_D1:
304 		case MDOC_Dl:
305 			tag_move_href(man, n->child, n->tag);
306 			break;
307 		case MDOC_Bl:
308 			/* XXX No permalink for now. */
309 			break;
310 		default:
311 			if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
312 				tag_move_id(n);
313 			if (n->tok != MDOC_Tg)
314 				n->flags |= NODE_HREF;
315 			else if ((n->flags & NODE_ID) == 0) {
316 				n->flags |= NODE_NOPRT;
317 				free(n->tag);
318 				n->tag = NULL;
319 			}
320 			break;
321 		}
322 	}
323 	for (n = n->child; n != NULL; n = n->next)
324 		tag_postprocess(man, n);
325 }
326