1 /* $OpenBSD: tag.c,v 1.36 2020/04/19 16:26:11 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 * 17 * Functions to tag syntax tree nodes. 18 * For internal use by mandoc(1) validation modules only. 19 */ 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <limits.h> 24 #include <stddef.h> 25 #include <stdint.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include "mandoc_aux.h" 30 #include "mandoc_ohash.h" 31 #include "roff.h" 32 #include "mdoc.h" 33 #include "roff_int.h" 34 #include "tag.h" 35 36 struct tag_entry { 37 struct roff_node **nodes; 38 size_t maxnodes; 39 size_t nnodes; 40 int prio; 41 char s[]; 42 }; 43 44 static void tag_move_href(struct roff_man *, 45 struct roff_node *, const char *); 46 static void tag_move_id(struct roff_node *); 47 48 static struct ohash tag_data; 49 50 51 /* 52 * Set up the ohash table to collect nodes 53 * where various marked-up terms are documented. 54 */ 55 void 56 tag_alloc(void) 57 { 58 mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); 59 } 60 61 void 62 tag_free(void) 63 { 64 struct tag_entry *entry; 65 unsigned int slot; 66 67 if (tag_data.info.free == NULL) 68 return; 69 entry = ohash_first(&tag_data, &slot); 70 while (entry != NULL) { 71 free(entry->nodes); 72 free(entry); 73 entry = ohash_next(&tag_data, &slot); 74 } 75 ohash_delete(&tag_data); 76 tag_data.info.free = NULL; 77 } 78 79 /* 80 * Set a node where a term is defined, 81 * unless it is already defined at a lower priority. 82 */ 83 void 84 tag_put(const char *s, int prio, struct roff_node *n) 85 { 86 struct tag_entry *entry; 87 struct roff_node *nold; 88 const char *se; 89 size_t len; 90 unsigned int slot; 91 92 assert(prio <= TAG_FALLBACK); 93 94 if (s == NULL) { 95 if (n->child == NULL || n->child->type != ROFFT_TEXT) 96 return; 97 s = n->child->string; 98 switch (s[0]) { 99 case '-': 100 s++; 101 break; 102 case '\\': 103 switch (s[1]) { 104 case '&': 105 case '-': 106 case 'e': 107 s += 2; 108 break; 109 default: 110 break; 111 } 112 break; 113 default: 114 break; 115 } 116 } 117 118 /* 119 * Skip whitespace and escapes and whatever follows, 120 * and if there is any, downgrade the priority. 121 */ 122 123 len = strcspn(s, " \t\\"); 124 if (len == 0) 125 return; 126 127 se = s + len; 128 if (*se != '\0' && prio < TAG_WEAK) 129 prio = TAG_WEAK; 130 131 slot = ohash_qlookupi(&tag_data, s, &se); 132 entry = ohash_find(&tag_data, slot); 133 134 /* Build a new entry. */ 135 136 if (entry == NULL) { 137 entry = mandoc_malloc(sizeof(*entry) + len + 1); 138 memcpy(entry->s, s, len); 139 entry->s[len] = '\0'; 140 entry->nodes = NULL; 141 entry->maxnodes = entry->nnodes = 0; 142 ohash_insert(&tag_data, slot, entry); 143 } 144 145 /* 146 * Lower priority numbers take precedence. 147 * If a better entry is already present, ignore the new one. 148 */ 149 150 else if (entry->prio < prio) 151 return; 152 153 /* 154 * If the existing entry is worse, clear it. 155 * In addition, a tag with priority TAG_FALLBACK 156 * is only used if the tag occurs exactly once. 157 */ 158 159 else if (entry->prio > prio || prio == TAG_FALLBACK) { 160 while (entry->nnodes > 0) { 161 nold = entry->nodes[--entry->nnodes]; 162 nold->flags &= ~NODE_ID; 163 free(nold->tag); 164 nold->tag = NULL; 165 } 166 if (prio == TAG_FALLBACK) { 167 entry->prio = TAG_DELETE; 168 return; 169 } 170 } 171 172 /* Remember the new node. */ 173 174 if (entry->maxnodes == entry->nnodes) { 175 entry->maxnodes += 4; 176 entry->nodes = mandoc_reallocarray(entry->nodes, 177 entry->maxnodes, sizeof(*entry->nodes)); 178 } 179 entry->nodes[entry->nnodes++] = n; 180 entry->prio = prio; 181 n->flags |= NODE_ID; 182 if (n->child == NULL || n->child->string != s || *se != '\0') { 183 assert(n->tag == NULL); 184 n->tag = mandoc_strndup(s, len); 185 } 186 } 187 188 int 189 tag_exists(const char *tag) 190 { 191 return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL; 192 } 193 194 /* 195 * For in-line elements, move the link target 196 * to the enclosing paragraph when appropriate. 197 */ 198 static void 199 tag_move_id(struct roff_node *n) 200 { 201 struct roff_node *np; 202 203 np = n; 204 for (;;) { 205 if (np->prev != NULL) 206 np = np->prev; 207 else if ((np = np->parent) == NULL) 208 return; 209 switch (np->tok) { 210 case MDOC_It: 211 switch (np->parent->parent->norm->Bl.type) { 212 case LIST_column: 213 /* Target the ROFFT_BLOCK = <tr>. */ 214 np = np->parent; 215 break; 216 case LIST_diag: 217 case LIST_hang: 218 case LIST_inset: 219 case LIST_ohang: 220 case LIST_tag: 221 /* Target the ROFFT_HEAD = <dt>. */ 222 np = np->parent->head; 223 break; 224 default: 225 /* Target the ROFF_BODY = <li>. */ 226 break; 227 } 228 /* FALLTHROUGH */ 229 case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */ 230 if (np->tag == NULL) { 231 np->tag = mandoc_strdup(n->tag == NULL ? 232 n->child->string : n->tag); 233 np->flags |= NODE_ID; 234 n->flags &= ~NODE_ID; 235 } 236 return; 237 case MDOC_Sh: 238 case MDOC_Ss: 239 case MDOC_Bd: 240 case MDOC_Bl: 241 case MDOC_D1: 242 case MDOC_Dl: 243 case MDOC_Rs: 244 /* Do not move past major blocks. */ 245 return; 246 default: 247 /* 248 * Move past in-line content and partial 249 * blocks, for example .It Xo or .It Bq Er. 250 */ 251 break; 252 } 253 } 254 } 255 256 /* 257 * When a paragraph is tagged and starts with text, 258 * move the permalink to the first few words. 259 */ 260 static void 261 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag) 262 { 263 char *cp; 264 265 if (n == NULL || n->type != ROFFT_TEXT || 266 *n->string == '\0' || *n->string == ' ') 267 return; 268 269 cp = n->string; 270 while (cp != NULL && cp - n->string < 5) 271 cp = strchr(cp + 1, ' '); 272 273 /* If the first text node is longer, split it. */ 274 275 if (cp != NULL && cp[1] != '\0') { 276 man->last = n; 277 man->next = ROFF_NEXT_SIBLING; 278 roff_word_alloc(man, n->line, 279 n->pos + (cp - n->string), cp + 1); 280 man->last->flags = n->flags & ~NODE_LINE; 281 *cp = '\0'; 282 } 283 284 assert(n->tag == NULL); 285 n->tag = mandoc_strdup(tag); 286 n->flags |= NODE_HREF; 287 } 288 289 /* 290 * When all tags have been set, decide where to put 291 * the associated permalinks, and maybe move some tags 292 * to the beginning of the respective paragraphs. 293 */ 294 void 295 tag_postprocess(struct roff_man *man, struct roff_node *n) 296 { 297 if (n->flags & NODE_ID) { 298 switch (n->tok) { 299 case MDOC_Pp: 300 tag_move_href(man, n->next, n->tag); 301 break; 302 case MDOC_Bd: 303 case MDOC_D1: 304 case MDOC_Dl: 305 tag_move_href(man, n->child, n->tag); 306 break; 307 case MDOC_Bl: 308 /* XXX No permalink for now. */ 309 break; 310 default: 311 if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo) 312 tag_move_id(n); 313 if (n->tok != MDOC_Tg) 314 n->flags |= NODE_HREF; 315 else if ((n->flags & NODE_ID) == 0) { 316 n->flags |= NODE_NOPRT; 317 free(n->tag); 318 n->tag = NULL; 319 } 320 break; 321 } 322 } 323 for (n = n->child; n != NULL; n = n->next) 324 tag_postprocess(man, n); 325 } 326