1 /*
2 * Generate unique IDs.
3 *
4 * TO DO: Also generate "readable" IDs if the text uses non-ASCII
5 * characters.
6 *
7 * Copyright © 2000 World Wide Web Consortium
8 * See http://www.w3.org/Consortium/Legal/copyright-software
9 *
10 * Author: Bert Bos <bert@w3.org>
11 * Created: 4 August 2000
12 **/
13
14 #include "config.h"
15 #include <assert.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #ifdef HAVE_ERRNO_H
19 # include <errno.h>
20 #endif
21 #include <ctype.h>
22
23 #ifdef HAVE_SEARCH_H
24 # include <search.h>
25 #else
26 # include "search-freebsd.h"
27 #endif
28
29 #if STDC_HEADERS
30 # include <string.h>
31 #else
32 # ifndef HAVE_STRCHR
33 # define strchr index
34 # define strrchr rindex
35 # endif
36 # ifndef HAVE_STRSTR
37 # include "strstr.e"
38 # endif
39 #endif
40 #include <export.h>
41 #include "heap.e"
42 #include "types.e"
43 #include "tree.e"
44 #include "errexit.e"
45
46
47 #define MAXIDLEN 45 /* Max len of a generated ID */
48
49 typedef int(*compar_fn_t)(const void *, const void *);
50
51 static void *idtree = NULL; /* Sorted tree of IDs */
52
53
54 /* storeID -- remember the existence of an ID (allocates a copy of the ID) */
storeID(conststring id)55 EXPORT void storeID(conststring id)
56 {
57 /* Case-insensitive: necessary for HTML, only a little wasteful for XML */
58 (void) tsearch(newstring(id), &idtree, (compar_fn_t)strcasecmp);
59 }
60
61
62 /* gen_id_r -- find some text suitable for an ID recursively */
gen_id_r(Tree t,string s,int * len,int maxlen)63 static void gen_id_r(Tree t, string s, int *len, int maxlen)
64 {
65 int i;
66 Tree h;
67
68 assert(s); /* s at least maxlen long */
69
70 /* Loop over children looking for useful text */
71 for (h = t->children; h && *len < maxlen - 1; h = h->sister) {
72 switch (h->tp) {
73 case Text:
74 for (i = 0; *len < maxlen - 1 && h->text[i]; i++)
75 if (isalpha(h->text[i])) s[(*len)++] = tolower(h->text[i]);
76 else if (h->text[i] == '@') {s[(*len)++] = 'a'; s[(*len)++] = 't';}
77 else if (*len == 0) ; /* Wait for a letter first */
78 else if (h->text[i]=='-') s[(*len)++] = h->text[i];
79 else if (h->text[i]=='.') s[(*len)++] = h->text[i];
80 else if (h->text[i]=='_') s[(*len)++] = h->text[i];
81 else if (isdigit(h->text[i])) s[(*len)++] = h->text[i];
82 else if (isspace(h->text[i]) && s[*len-1] != '-') s[(*len)++]='-';
83 break;
84 case Element: /* Recursive */
85 gen_id_r(h, s, len, maxlen);
86 break;
87 default:
88 break;
89 }
90 }
91 #if 0
92 /* Look for a nice break, i.e., just before a '-' */
93 while (*len > 0 && s[(*len)-1] != '-') (*len)--;
94 if (*len > 0) (*len)--;
95 #endif
96 s[*len] = '\0';
97 }
98
99 /* gen_id -- try some heuristics to generate an ID for element t */
gen_id(Tree t)100 EXPORT string gen_id(Tree t)
101 {
102 string s;
103 int len = 0;
104
105 if (! (s = malloc(MAXIDLEN + 1))) errexit("Out of memory\n");
106
107 assert(MAXIDLEN > 4);
108 gen_id_r(t, s, &len, MAXIDLEN - 4);
109 if (len == 0) {
110 s[len++] = 'x'; /* At least one character */
111 s[len] = '\0';
112 }
113 if (tfind(s, &idtree, (compar_fn_t)strcasecmp)) {
114 /* No suitable text found or text is already used elsewhere */
115 int seqno = 0;
116 do { /* Try adding digits */
117 sprintf(s + len, "%d", seqno);
118 seqno++;
119 } while (seqno != 10000 && tfind(s, &idtree, (compar_fn_t)strcasecmp));
120 if (seqno == 10000) { /* 10000 tried, giving up... */
121 free(s);
122 return NULL;
123 }
124 }
125 (void) tsearch(s, &idtree, (compar_fn_t)strcasecmp); /* Store it */
126 return s;
127 }
128
129