1 /*
2  * Generate unique IDs.
3  *
4  * TO DO: Also generate "readable" IDs if the text uses non-ASCII
5  * characters.
6  *
7  * Copyright © 2000 World Wide Web Consortium
8  * See http://www.w3.org/Consortium/Legal/copyright-software
9  *
10  * Author: Bert Bos <bert@w3.org>
11  * Created: 4 August 2000
12  **/
13 
14 #include "config.h"
15 #include <assert.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #ifdef HAVE_ERRNO_H
19 #  include <errno.h>
20 #endif
21 #include <ctype.h>
22 
23 #ifdef HAVE_SEARCH_H
24 #  include <search.h>
25 #else
26 #  include "search-freebsd.h"
27 #endif
28 
29 #if STDC_HEADERS
30 # include <string.h>
31 #else
32 # ifndef HAVE_STRCHR
33 #  define strchr index
34 #  define strrchr rindex
35 # endif
36 # ifndef HAVE_STRSTR
37 #  include "strstr.e"
38 # endif
39 #endif
40 #include <export.h>
41 #include "heap.e"
42 #include "types.e"
43 #include "tree.e"
44 #include "errexit.e"
45 
46 
47 #define MAXIDLEN 45				/* Max len of a generated ID */
48 
49 typedef int(*compar_fn_t)(const void *, const void *);
50 
51 static void *idtree = NULL;			/* Sorted tree of IDs */
52 
53 
54 /* storeID -- remember the existence of an ID (allocates a copy of the ID) */
storeID(conststring id)55 EXPORT void storeID(conststring id)
56 {
57   /* Case-insensitive: necessary for HTML, only a little wasteful for XML */
58   (void) tsearch(newstring(id), &idtree, (compar_fn_t)strcasecmp);
59 }
60 
61 
62 /* gen_id_r -- find some text suitable for an ID recursively */
gen_id_r(Tree t,string s,int * len,int maxlen)63 static void gen_id_r(Tree t, string s, int *len, int maxlen)
64 {
65   int i;
66   Tree h;
67 
68   assert(s);					/* s at least maxlen long */
69 
70   /* Loop over children looking for useful text */
71   for (h = t->children; h && *len < maxlen - 1; h = h->sister) {
72     switch (h->tp) {
73       case Text:
74 	for (i = 0; *len < maxlen - 1 && h->text[i]; i++)
75 	  if (isalpha(h->text[i])) s[(*len)++] = tolower(h->text[i]);
76 	  else if (h->text[i] == '@') {s[(*len)++] = 'a'; s[(*len)++] = 't';}
77 	  else if (*len == 0) ;			/* Wait for a letter first */
78 	  else if (h->text[i]=='-') s[(*len)++] = h->text[i];
79 	  else if (h->text[i]=='.') s[(*len)++] = h->text[i];
80 	  else if (h->text[i]=='_') s[(*len)++] = h->text[i];
81 	  else if (isdigit(h->text[i])) s[(*len)++] = h->text[i];
82 	  else if (isspace(h->text[i]) && s[*len-1] != '-') s[(*len)++]='-';
83 	break;
84       case Element:				/* Recursive */
85 	gen_id_r(h, s, len, maxlen);
86 	break;
87       default:
88 	break;
89     }
90   }
91 #if 0
92   /* Look for a nice break, i.e., just before a '-' */
93   while (*len > 0 && s[(*len)-1] != '-') (*len)--;
94   if (*len > 0) (*len)--;
95 #endif
96   s[*len] = '\0';
97 }
98 
99 /* gen_id -- try some heuristics to generate an ID for element t */
gen_id(Tree t)100 EXPORT string gen_id(Tree t)
101 {
102   string s;
103   int len = 0;
104 
105   if (! (s = malloc(MAXIDLEN + 1))) errexit("Out of memory\n");
106 
107   assert(MAXIDLEN > 4);
108   gen_id_r(t, s, &len, MAXIDLEN - 4);
109   if (len == 0) {
110     s[len++] = 'x';		/* At least one character */
111     s[len] = '\0';
112   }
113   if (tfind(s, &idtree, (compar_fn_t)strcasecmp)) {
114     /* No suitable text found or text is already used elsewhere */
115     int seqno = 0;
116     do {					/* Try adding digits */
117       sprintf(s + len, "%d", seqno);
118       seqno++;
119     } while (seqno != 10000 && tfind(s, &idtree, (compar_fn_t)strcasecmp));
120     if (seqno == 10000) {			/* 10000 tried, giving up... */
121       free(s);
122       return NULL;
123     }
124   }
125   (void) tsearch(s, &idtree, (compar_fn_t)strcasecmp); /* Store it */
126   return s;
127 }
128 
129