xref: /openbsd/usr.bin/mandoc/tag.c (revision 2b14f697)
1 /* $OpenBSD: tag.c,v 1.38 2023/11/24 04:48:02 schwarze Exp $ */
2 /*
3  * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
4  *               Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Functions to tag syntax tree nodes.
19  * For internal use by mandoc(1) validation modules only.
20  */
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "roff_int.h"
37 #include "tag.h"
38 
39 struct tag_entry {
40 	struct roff_node **nodes;
41 	size_t	 maxnodes;
42 	size_t	 nnodes;
43 	int	 prio;
44 	char	 s[];
45 };
46 
47 static void		 tag_move_href(struct roff_man *,
48 				struct roff_node *, const char *);
49 static void		 tag_move_id(struct roff_node *);
50 
51 static struct ohash	 tag_data;
52 
53 
54 /*
55  * Set up the ohash table to collect nodes
56  * where various marked-up terms are documented.
57  */
58 void
tag_alloc(void)59 tag_alloc(void)
60 {
61 	mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
62 }
63 
64 void
tag_free(void)65 tag_free(void)
66 {
67 	struct tag_entry	*entry;
68 	unsigned int		 slot;
69 
70 	if (tag_data.info.free == NULL)
71 		return;
72 	entry = ohash_first(&tag_data, &slot);
73 	while (entry != NULL) {
74 		free(entry->nodes);
75 		free(entry);
76 		entry = ohash_next(&tag_data, &slot);
77 	}
78 	ohash_delete(&tag_data);
79 	tag_data.info.free = NULL;
80 }
81 
82 /*
83  * Set a node where a term is defined,
84  * unless the term is already defined at a lower priority.
85  */
86 void
tag_put(const char * s,int prio,struct roff_node * n)87 tag_put(const char *s, int prio, struct roff_node *n)
88 {
89 	struct tag_entry	*entry;
90 	struct roff_node	*nold;
91 	const char		*se, *src;
92 	char			*cpy;
93 	size_t			 len;
94 	unsigned int		 slot;
95 	int			 changed;
96 
97 	assert(prio <= TAG_FALLBACK);
98 
99 	/*
100 	 * If the node is already tagged, the existing tag is
101 	 * explicit and we are now about to add an implicit tag.
102 	 * Don't do that; just skip implicit tagging if the author
103 	 * specified an explicit tag.
104 	 */
105 
106 	if (n->flags & NODE_ID)
107 		return;
108 
109 	/* Determine the implicit tag. */
110 
111 	changed = 1;
112 	if (s == NULL) {
113 		if (n->child == NULL || n->child->type != ROFFT_TEXT)
114 			return;
115 		s = n->child->string;
116 		switch (s[0]) {
117 		case '-':
118 			s++;
119 			break;
120 		case '\\':
121 			switch (s[1]) {
122 			case '&':
123 			case '-':
124 			case 'e':
125 				s += 2;
126 				break;
127 			default:
128 				return;
129 			}
130 			break;
131 		default:
132 			changed = 0;
133 			break;
134 		}
135 	}
136 
137 	/*
138 	 * Translate \- and ASCII_HYPH to plain '-'.
139 	 * Skip whitespace and escapes and whatever follows,
140 	 * and if there is any, downgrade the priority.
141 	 */
142 
143 	cpy = mandoc_malloc(strlen(s) + 1);
144 	for (src = s, len = 0; *src != '\0'; src++, len++) {
145 		switch (*src) {
146 		case '\t':
147 		case ' ':
148 			changed = 1;
149 			break;
150 		case ASCII_HYPH:
151 			cpy[len] = '-';
152 			changed = 1;
153 			continue;
154 		case '\\':
155 			if (src[1] != '-')
156 				break;
157 			src++;
158 			changed = 1;
159 			/* FALLTHROUGH */
160 		default:
161 			cpy[len] = *src;
162 			continue;
163 		}
164 		break;
165 	}
166 	if (len == 0)
167 		goto out;
168 	cpy[len] = '\0';
169 
170 	if (*src != '\0' && prio < TAG_WEAK)
171 		prio = TAG_WEAK;
172 
173 	s = cpy;
174 	se = cpy + len;
175 	slot = ohash_qlookupi(&tag_data, s, &se);
176 	entry = ohash_find(&tag_data, slot);
177 
178 	/* Build a new entry. */
179 
180 	if (entry == NULL) {
181 		entry = mandoc_malloc(sizeof(*entry) + len + 1);
182 		memcpy(entry->s, s, len + 1);
183 		entry->nodes = NULL;
184 		entry->maxnodes = entry->nnodes = 0;
185 		ohash_insert(&tag_data, slot, entry);
186 	}
187 
188 	/*
189 	 * Lower priority numbers take precedence.
190 	 * If a better entry is already present, ignore the new one.
191 	 */
192 
193 	else if (entry->prio < prio)
194 		goto out;
195 
196 	/*
197 	 * If the existing entry is worse, clear it.
198 	 * In addition, a tag with priority TAG_FALLBACK
199 	 * is only used if the tag occurs exactly once.
200 	 */
201 
202 	else if (entry->prio > prio || prio == TAG_FALLBACK) {
203 		while (entry->nnodes > 0) {
204 			nold = entry->nodes[--entry->nnodes];
205 			nold->flags &= ~NODE_ID;
206 			free(nold->tag);
207 			nold->tag = NULL;
208 		}
209 		if (prio == TAG_FALLBACK) {
210 			entry->prio = TAG_DELETE;
211 			goto out;
212 		}
213 	}
214 
215 	/* Remember the new node. */
216 
217 	if (entry->maxnodes == entry->nnodes) {
218 		entry->maxnodes += 4;
219 		entry->nodes = mandoc_reallocarray(entry->nodes,
220 		    entry->maxnodes, sizeof(*entry->nodes));
221 	}
222 	entry->nodes[entry->nnodes++] = n;
223 	entry->prio = prio;
224 	n->flags |= NODE_ID;
225 	if (changed) {
226 		assert(n->tag == NULL);
227 		n->tag = mandoc_strndup(s, len);
228 	}
229 
230  out:
231 	free(cpy);
232 }
233 
234 int
tag_exists(const char * tag)235 tag_exists(const char *tag)
236 {
237 	return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
238 }
239 
240 /*
241  * For in-line elements, move the link target
242  * to the enclosing paragraph when appropriate.
243  */
244 static void
tag_move_id(struct roff_node * n)245 tag_move_id(struct roff_node *n)
246 {
247 	struct roff_node *np;
248 
249 	np = n;
250 	for (;;) {
251 		if (np->prev != NULL)
252 			np = np->prev;
253 		else if ((np = np->parent) == NULL)
254 			return;
255 		switch (np->tok) {
256 		case MDOC_It:
257 			switch (np->parent->parent->norm->Bl.type) {
258 			case LIST_column:
259 				/* Target the ROFFT_BLOCK = <tr>. */
260 				np = np->parent;
261 				break;
262 			case LIST_diag:
263 			case LIST_hang:
264 			case LIST_inset:
265 			case LIST_ohang:
266 			case LIST_tag:
267 				/* Target the ROFFT_HEAD = <dt>. */
268 				np = np->parent->head;
269 				break;
270 			default:
271 				/* Target the ROFF_BODY = <li>. */
272 				break;
273 			}
274 			/* FALLTHROUGH */
275 		case MDOC_Pp:	/* Target the ROFFT_ELEM = <p>. */
276 			if (np->tag == NULL) {
277 				np->tag = mandoc_strdup(n->tag == NULL ?
278 				    n->child->string : n->tag);
279 				np->flags |= NODE_ID;
280 				n->flags &= ~NODE_ID;
281 			}
282 			return;
283 		case MDOC_Sh:
284 		case MDOC_Ss:
285 		case MDOC_Bd:
286 		case MDOC_Bl:
287 		case MDOC_D1:
288 		case MDOC_Dl:
289 		case MDOC_Rs:
290 			/* Do not move past major blocks. */
291 			return;
292 		default:
293 			/*
294 			 * Move past in-line content and partial
295 			 * blocks, for example .It Xo or .It Bq Er.
296 			 */
297 			break;
298 		}
299 	}
300 }
301 
302 /*
303  * When a paragraph is tagged and starts with text,
304  * move the permalink to the first few words.
305  */
306 static void
tag_move_href(struct roff_man * man,struct roff_node * n,const char * tag)307 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
308 {
309 	char	*cp;
310 
311 	if (n == NULL || n->type != ROFFT_TEXT ||
312 	    *n->string == '\0' || *n->string == ' ')
313 		return;
314 
315 	cp = n->string;
316 	while (cp != NULL && cp - n->string < 5)
317 		cp = strchr(cp + 1, ' ');
318 
319 	/* If the first text node is longer, split it. */
320 
321 	if (cp != NULL && cp[1] != '\0') {
322 		man->last = n;
323 		man->next = ROFF_NEXT_SIBLING;
324 		roff_word_alloc(man, n->line,
325 		    n->pos + (cp - n->string), cp + 1);
326 		man->last->flags = n->flags & ~NODE_LINE;
327 		*cp = '\0';
328 	}
329 
330 	assert(n->tag == NULL);
331 	n->tag = mandoc_strdup(tag);
332 	n->flags |= NODE_HREF;
333 }
334 
335 /*
336  * When all tags have been set, decide where to put
337  * the associated permalinks, and maybe move some tags
338  * to the beginning of the respective paragraphs.
339  */
340 void
tag_postprocess(struct roff_man * man,struct roff_node * n)341 tag_postprocess(struct roff_man *man, struct roff_node *n)
342 {
343 	if (n->flags & NODE_ID) {
344 		switch (n->tok) {
345 		case MDOC_Pp:
346 			tag_move_href(man, n->next, n->tag);
347 			break;
348 		case MDOC_Bd:
349 		case MDOC_D1:
350 		case MDOC_Dl:
351 			tag_move_href(man, n->child, n->tag);
352 			break;
353 		case MDOC_Bl:
354 			/* XXX No permalink for now. */
355 			break;
356 		default:
357 			if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
358 				tag_move_id(n);
359 			if (n->tok != MDOC_Tg)
360 				n->flags |= NODE_HREF;
361 			else if ((n->flags & NODE_ID) == 0) {
362 				n->flags |= NODE_NOPRT;
363 				free(n->tag);
364 				n->tag = NULL;
365 			}
366 			break;
367 		}
368 	}
369 	for (n = n->child; n != NULL; n = n->next)
370 		tag_postprocess(man, n);
371 }
372