1 /* $OpenBSD: tag.c,v 1.38 2023/11/24 04:48:02 schwarze Exp $ */
2 /*
3 * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
4 * Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Functions to tag syntax tree nodes.
19 * For internal use by mandoc(1) validation modules only.
20 */
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "roff_int.h"
37 #include "tag.h"
38
39 struct tag_entry {
40 struct roff_node **nodes;
41 size_t maxnodes;
42 size_t nnodes;
43 int prio;
44 char s[];
45 };
46
47 static void tag_move_href(struct roff_man *,
48 struct roff_node *, const char *);
49 static void tag_move_id(struct roff_node *);
50
51 static struct ohash tag_data;
52
53
54 /*
55 * Set up the ohash table to collect nodes
56 * where various marked-up terms are documented.
57 */
58 void
tag_alloc(void)59 tag_alloc(void)
60 {
61 mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
62 }
63
64 void
tag_free(void)65 tag_free(void)
66 {
67 struct tag_entry *entry;
68 unsigned int slot;
69
70 if (tag_data.info.free == NULL)
71 return;
72 entry = ohash_first(&tag_data, &slot);
73 while (entry != NULL) {
74 free(entry->nodes);
75 free(entry);
76 entry = ohash_next(&tag_data, &slot);
77 }
78 ohash_delete(&tag_data);
79 tag_data.info.free = NULL;
80 }
81
82 /*
83 * Set a node where a term is defined,
84 * unless the term is already defined at a lower priority.
85 */
86 void
tag_put(const char * s,int prio,struct roff_node * n)87 tag_put(const char *s, int prio, struct roff_node *n)
88 {
89 struct tag_entry *entry;
90 struct roff_node *nold;
91 const char *se, *src;
92 char *cpy;
93 size_t len;
94 unsigned int slot;
95 int changed;
96
97 assert(prio <= TAG_FALLBACK);
98
99 /*
100 * If the node is already tagged, the existing tag is
101 * explicit and we are now about to add an implicit tag.
102 * Don't do that; just skip implicit tagging if the author
103 * specified an explicit tag.
104 */
105
106 if (n->flags & NODE_ID)
107 return;
108
109 /* Determine the implicit tag. */
110
111 changed = 1;
112 if (s == NULL) {
113 if (n->child == NULL || n->child->type != ROFFT_TEXT)
114 return;
115 s = n->child->string;
116 switch (s[0]) {
117 case '-':
118 s++;
119 break;
120 case '\\':
121 switch (s[1]) {
122 case '&':
123 case '-':
124 case 'e':
125 s += 2;
126 break;
127 default:
128 return;
129 }
130 break;
131 default:
132 changed = 0;
133 break;
134 }
135 }
136
137 /*
138 * Translate \- and ASCII_HYPH to plain '-'.
139 * Skip whitespace and escapes and whatever follows,
140 * and if there is any, downgrade the priority.
141 */
142
143 cpy = mandoc_malloc(strlen(s) + 1);
144 for (src = s, len = 0; *src != '\0'; src++, len++) {
145 switch (*src) {
146 case '\t':
147 case ' ':
148 changed = 1;
149 break;
150 case ASCII_HYPH:
151 cpy[len] = '-';
152 changed = 1;
153 continue;
154 case '\\':
155 if (src[1] != '-')
156 break;
157 src++;
158 changed = 1;
159 /* FALLTHROUGH */
160 default:
161 cpy[len] = *src;
162 continue;
163 }
164 break;
165 }
166 if (len == 0)
167 goto out;
168 cpy[len] = '\0';
169
170 if (*src != '\0' && prio < TAG_WEAK)
171 prio = TAG_WEAK;
172
173 s = cpy;
174 se = cpy + len;
175 slot = ohash_qlookupi(&tag_data, s, &se);
176 entry = ohash_find(&tag_data, slot);
177
178 /* Build a new entry. */
179
180 if (entry == NULL) {
181 entry = mandoc_malloc(sizeof(*entry) + len + 1);
182 memcpy(entry->s, s, len + 1);
183 entry->nodes = NULL;
184 entry->maxnodes = entry->nnodes = 0;
185 ohash_insert(&tag_data, slot, entry);
186 }
187
188 /*
189 * Lower priority numbers take precedence.
190 * If a better entry is already present, ignore the new one.
191 */
192
193 else if (entry->prio < prio)
194 goto out;
195
196 /*
197 * If the existing entry is worse, clear it.
198 * In addition, a tag with priority TAG_FALLBACK
199 * is only used if the tag occurs exactly once.
200 */
201
202 else if (entry->prio > prio || prio == TAG_FALLBACK) {
203 while (entry->nnodes > 0) {
204 nold = entry->nodes[--entry->nnodes];
205 nold->flags &= ~NODE_ID;
206 free(nold->tag);
207 nold->tag = NULL;
208 }
209 if (prio == TAG_FALLBACK) {
210 entry->prio = TAG_DELETE;
211 goto out;
212 }
213 }
214
215 /* Remember the new node. */
216
217 if (entry->maxnodes == entry->nnodes) {
218 entry->maxnodes += 4;
219 entry->nodes = mandoc_reallocarray(entry->nodes,
220 entry->maxnodes, sizeof(*entry->nodes));
221 }
222 entry->nodes[entry->nnodes++] = n;
223 entry->prio = prio;
224 n->flags |= NODE_ID;
225 if (changed) {
226 assert(n->tag == NULL);
227 n->tag = mandoc_strndup(s, len);
228 }
229
230 out:
231 free(cpy);
232 }
233
234 int
tag_exists(const char * tag)235 tag_exists(const char *tag)
236 {
237 return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
238 }
239
240 /*
241 * For in-line elements, move the link target
242 * to the enclosing paragraph when appropriate.
243 */
244 static void
tag_move_id(struct roff_node * n)245 tag_move_id(struct roff_node *n)
246 {
247 struct roff_node *np;
248
249 np = n;
250 for (;;) {
251 if (np->prev != NULL)
252 np = np->prev;
253 else if ((np = np->parent) == NULL)
254 return;
255 switch (np->tok) {
256 case MDOC_It:
257 switch (np->parent->parent->norm->Bl.type) {
258 case LIST_column:
259 /* Target the ROFFT_BLOCK = <tr>. */
260 np = np->parent;
261 break;
262 case LIST_diag:
263 case LIST_hang:
264 case LIST_inset:
265 case LIST_ohang:
266 case LIST_tag:
267 /* Target the ROFFT_HEAD = <dt>. */
268 np = np->parent->head;
269 break;
270 default:
271 /* Target the ROFF_BODY = <li>. */
272 break;
273 }
274 /* FALLTHROUGH */
275 case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */
276 if (np->tag == NULL) {
277 np->tag = mandoc_strdup(n->tag == NULL ?
278 n->child->string : n->tag);
279 np->flags |= NODE_ID;
280 n->flags &= ~NODE_ID;
281 }
282 return;
283 case MDOC_Sh:
284 case MDOC_Ss:
285 case MDOC_Bd:
286 case MDOC_Bl:
287 case MDOC_D1:
288 case MDOC_Dl:
289 case MDOC_Rs:
290 /* Do not move past major blocks. */
291 return;
292 default:
293 /*
294 * Move past in-line content and partial
295 * blocks, for example .It Xo or .It Bq Er.
296 */
297 break;
298 }
299 }
300 }
301
302 /*
303 * When a paragraph is tagged and starts with text,
304 * move the permalink to the first few words.
305 */
306 static void
tag_move_href(struct roff_man * man,struct roff_node * n,const char * tag)307 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
308 {
309 char *cp;
310
311 if (n == NULL || n->type != ROFFT_TEXT ||
312 *n->string == '\0' || *n->string == ' ')
313 return;
314
315 cp = n->string;
316 while (cp != NULL && cp - n->string < 5)
317 cp = strchr(cp + 1, ' ');
318
319 /* If the first text node is longer, split it. */
320
321 if (cp != NULL && cp[1] != '\0') {
322 man->last = n;
323 man->next = ROFF_NEXT_SIBLING;
324 roff_word_alloc(man, n->line,
325 n->pos + (cp - n->string), cp + 1);
326 man->last->flags = n->flags & ~NODE_LINE;
327 *cp = '\0';
328 }
329
330 assert(n->tag == NULL);
331 n->tag = mandoc_strdup(tag);
332 n->flags |= NODE_HREF;
333 }
334
335 /*
336 * When all tags have been set, decide where to put
337 * the associated permalinks, and maybe move some tags
338 * to the beginning of the respective paragraphs.
339 */
340 void
tag_postprocess(struct roff_man * man,struct roff_node * n)341 tag_postprocess(struct roff_man *man, struct roff_node *n)
342 {
343 if (n->flags & NODE_ID) {
344 switch (n->tok) {
345 case MDOC_Pp:
346 tag_move_href(man, n->next, n->tag);
347 break;
348 case MDOC_Bd:
349 case MDOC_D1:
350 case MDOC_Dl:
351 tag_move_href(man, n->child, n->tag);
352 break;
353 case MDOC_Bl:
354 /* XXX No permalink for now. */
355 break;
356 default:
357 if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
358 tag_move_id(n);
359 if (n->tok != MDOC_Tg)
360 n->flags |= NODE_HREF;
361 else if ((n->flags & NODE_ID) == 0) {
362 n->flags |= NODE_NOPRT;
363 free(n->tag);
364 n->tag = NULL;
365 }
366 break;
367 }
368 }
369 for (n = n->child; n != NULL; n = n->next)
370 tag_postprocess(man, n);
371 }
372