1 /****************************************************************************
2  * Copyright 2018-2019,2020 Thomas E. Dickey                                *
3  * Copyright 2009-2013,2017 Free Software Foundation, Inc.                  *
4  *                                                                          *
5  * Permission is hereby granted, free of charge, to any person obtaining a  *
6  * copy of this software and associated documentation files (the            *
7  * "Software"), to deal in the Software without restriction, including      *
8  * without limitation the rights to use, copy, modify, merge, publish,      *
9  * distribute, distribute with modifications, sublicense, and/or sell       *
10  * copies of the Software, and to permit persons to whom the Software is    *
11  * furnished to do so, subject to the following conditions:                 *
12  *                                                                          *
13  * The above copyright notice and this permission notice shall be included  *
14  * in all copies or substantial portions of the Software.                   *
15  *                                                                          *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS  *
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF               *
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.   *
19  * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,   *
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR    *
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR    *
22  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.                               *
23  *                                                                          *
24  * Except as contained in this notice, the name(s) of the above copyright   *
25  * holders shall not be used in advertising or otherwise to promote the     *
26  * sale, use or other dealings in this Software without prior written       *
27  * authorization.                                                           *
28  ****************************************************************************/
29 
30 /****************************************************************************
31  *  Author: Zeyd M. Ben-Halim <zmbenhal@netcom.com> 1992,1995               *
32  *     and: Eric S. Raymond <esr@snark.thyrsus.com>                         *
33  *     and: Thomas E. Dickey                        1996-on                 *
34  ****************************************************************************/
35 
36 /*
37  *	make_hash.c --- build-time program for constructing comp_captab.c
38  */
39 
40 #include <build.priv.h>
41 
42 #include <tic.h>
43 #include <hashsize.h>
44 
45 #include <ctype.h>
46 
47 MODULE_ID("$Id: make_hash.c,v 1.33 2020/02/02 23:34:34 tom Exp $")
48 
49 /*
50  *	_nc_make_hash_table()
51  *
52  *	Takes the entries in table[] and hashes them into hash_table[]
53  *	by name.  There are CAPTABSIZE entries in the predefined table[]
54  *	and HASHTABSIZE slots in hash_table[].
55  *
56  */
57 
58 #undef MODULE_ID
59 #define MODULE_ID(id)		/*nothing */
60 #include <tinfo/doalloc.c>
61 
62 #define L_PAREN "("
63 #define R_PAREN ")"
64 #define L_BRACE "{"
65 #define R_BRACE "}"
66 
67 static const char *typenames[] =
68 {"BOOLEAN", "NUMBER", "STRING"};
69 
70 static void
71 failed(const char *s)
72 {
73     perror(s);
74     exit(EXIT_FAILURE);
75 }
76 
77 static char *
78 strmalloc(char *s)
79 {
80     size_t need = strlen(s) + 1;
81     char *result = malloc(need);
82     if (result == 0)
83 	failed("strmalloc");
84     _nc_STRCPY(result, s, need);
85     return result;
86 }
87 
88 /*
89  *	int hash_function(string)
90  *
91  *	Computes the hashing function on the given string.
92  *
93  *	The current hash function is the sum of each consecutive pair
94  *	of characters, taken as two-byte integers, mod HASHTABSIZE.
95  *
96  */
97 
98 static int
99 hash_function(const char *string)
100 {
101     long sum = 0;
102 
103     while (*string) {
104 	sum += (long) (UChar(*string) + (UChar(*(string + 1)) << 8));
105 	string++;
106     }
107 
108     return (int) (sum % HASHTABSIZE);
109 }
110 
111 #define UNUSED -1
112 
113 static void
114 _nc_make_hash_table(struct user_table_entry *table,
115 		    HashValue * hash_table,
116 		    unsigned tablesize)
117 {
118     unsigned i;
119     int hashvalue;
120     int collisions = 0;
121 
122     for (i = 0; i < HASHTABSIZE; i++) {
123 	hash_table[i] = UNUSED;
124     }
125     for (i = 0; i < tablesize; i++) {
126 	hashvalue = hash_function(table[i].ute_name);
127 
128 	if (hash_table[hashvalue] >= 0)
129 	    collisions++;
130 
131 	if (hash_table[hashvalue] != UNUSED) {
132 	    table[i].ute_link = hash_table[hashvalue];
133 	}
134 	hash_table[hashvalue] = (HashValue) i;
135     }
136 
137     printf("/* %d collisions out of %d entries */\n", collisions, tablesize);
138 }
139 
140 /*
141  * This filter reads from standard input a list of tab-delimited columns,
142  * (e.g., from Caps.filtered) computes the hash-value of a specified column and
143  * writes the hashed tables to standard output.
144  *
145  * By compiling the hash table at build time, we're able to make the entire
146  * set of terminfo and termcap tables readonly (and also provide some runtime
147  * performance enhancement).
148  */
149 
150 #define MAX_COLUMNS BUFSIZ	/* this _has_ to be worst-case */
151 
152 static int
153 count_columns(char **list)
154 {
155     int result = 0;
156     if (list != 0) {
157 	while (*list++) {
158 	    ++result;
159 	}
160     }
161     return result;
162 }
163 
164 static char **
165 parse_columns(char *buffer)
166 {
167     static char **list;
168 
169     int col = 0;
170 
171     if (buffer == 0) {
172 	free(list);
173 	list = 0;
174 	return 0;
175     }
176 
177     if (*buffer != '#') {
178 	if (list == 0) {
179 	    list = typeCalloc(char *, (MAX_COLUMNS + 1));
180 	    if (list == 0)
181 		return (0);
182 	}
183 	while (*buffer != '\0') {
184 	    char *s;
185 	    for (s = buffer; (*s != '\0') && !isspace(UChar(*s)); s++)
186 		/*EMPTY */ ;
187 	    if (s != buffer) {
188 		char mark = *s;
189 		*s = '\0';
190 		if ((s - buffer) > 1
191 		    && (*buffer == '"')
192 		    && (s[-1] == '"')) {	/* strip the quotes */
193 		    assert(s > buffer + 1);
194 		    s[-1] = '\0';
195 		    buffer++;
196 		}
197 		list[col] = buffer;
198 		col++;
199 		if (mark == '\0')
200 		    break;
201 		while (*++s && isspace(UChar(*s)))
202 		    /*EMPTY */ ;
203 		buffer = s;
204 	    } else
205 		break;
206 	}
207     }
208     return col ? list : 0;
209 }
210 
211 #define SetType(n,t) \
212 	if (is_user) \
213 	    name_table[n].ute_type |= (int)(1 << (t)); \
214 	else \
215 	    name_table[n].ute_type = (t)
216 
217 #define GetType(n) \
218 	(is_user \
219 	 ? get_type(name_table[n].ute_type) \
220 	 : typenames[name_table[n].ute_type])
221 
222 static char *
223 get_type(int type_mask)
224 {
225     static char result[80];
226     unsigned n;
227     _nc_STRCPY(result, L_PAREN, sizeof(result));
228     for (n = 0; n < 3; ++n) {
229 	if ((1 << n) & type_mask) {
230 	    size_t want = 5 + strlen(typenames[n]);
231 	    if (want > sizeof(result)) {
232 		fprintf(stderr, "Buffer is not large enough for %s + %s\n",
233 			result, typenames[n]);
234 		exit(EXIT_FAILURE);
235 	    }
236 	    if (result[1])
237 		_nc_STRCAT(result, "|", sizeof(result));
238 	    _nc_STRCAT(result, "1<<", sizeof(result));
239 	    _nc_STRCAT(result, typenames[n], sizeof(result));
240 	}
241     }
242     _nc_STRCAT(result, R_PAREN, sizeof(result));
243     return result;
244 }
245 
246 int
247 main(int argc, char **argv)
248 {
249     unsigned tablesize = CAPTABSIZE;
250     struct user_table_entry *name_table = typeCalloc(struct
251 						     user_table_entry, tablesize);
252     HashValue *hash_table = typeCalloc(HashValue, HASHTABSIZE);
253     const char *root_name = "";
254     int column = 0;
255     int bigstring = 0;
256     unsigned n;
257     unsigned nn;
258     unsigned tableused = 0;
259     bool is_user;
260     const char *table_name;
261     char buffer[BUFSIZ];
262 
263     short BoolCount = 0;
264     short NumCount = 0;
265     short StrCount = 0;
266 
267     /* The first argument is the column-number (starting with 0).
268      * The second is the root name of the tables to generate.
269      */
270     if (argc <= 3
271 	|| (column = atoi(argv[1])) <= 0
272 	|| (column >= MAX_COLUMNS)
273 	|| *(root_name = argv[2]) == 0
274 	|| (bigstring = atoi(argv[3])) < 0
275 	|| name_table == 0
276 	|| hash_table == 0) {
277 	fprintf(stderr, "usage: make_hash column root_name bigstring\n");
278 	exit(EXIT_FAILURE);
279     }
280     is_user = (*root_name == 'u');
281     table_name = (is_user ? "user" : "name");
282 
283     /*
284      * Read the table into our arrays.
285      */
286     for (n = 0; (n < tablesize) && fgets(buffer, BUFSIZ, stdin);) {
287 	char **list;
288 	char *nlp = strchr(buffer, '\n');
289 	if (nlp)
290 	    *nlp = '\0';
291 	else
292 	    buffer[sizeof(buffer) - 2] = '\0';
293 	list = parse_columns(buffer);
294 	if (list == 0)		/* blank or comment */
295 	    continue;
296 	if (is_user) {
297 	    if (strcmp(list[0], "userdef"))
298 		continue;
299 	} else if (!strcmp(list[0], "userdef")) {
300 	    continue;
301 	}
302 	if (column < 0 || column > count_columns(list)) {
303 	    fprintf(stderr, "expected %d columns, have %d:\n%s\n",
304 		    column,
305 		    count_columns(list),
306 		    buffer);
307 	    exit(EXIT_FAILURE);
308 	}
309 	nn = tableused;
310 	if (is_user) {
311 	    unsigned j;
312 	    for (j = 0; j < tableused; ++j) {
313 		if (!strcmp(list[column], name_table[j].ute_name)) {
314 		    nn = j;
315 		    break;
316 		}
317 	    }
318 	}
319 	if (nn == tableused) {
320 	    name_table[nn].ute_link = -1;	/* end-of-hash */
321 	    name_table[nn].ute_name = strmalloc(list[column]);
322 	    ++tableused;
323 	}
324 
325 	if (!strcmp(list[2], "bool")) {
326 	    SetType(nn, BOOLEAN);
327 	    name_table[nn].ute_index = BoolCount++;
328 	} else if (!strcmp(list[2], "num")) {
329 	    SetType(nn, NUMBER);
330 	    name_table[nn].ute_index = NumCount++;
331 	} else if (!strcmp(list[2], "str")) {
332 	    SetType(nn, STRING);
333 	    name_table[nn].ute_index = StrCount++;
334 	    if (is_user) {
335 		if (*list[3] != '-') {
336 		    unsigned j;
337 		    name_table[nn].ute_argc = (unsigned) strlen(list[3]);
338 		    for (j = 0; j < name_table[nn].ute_argc; ++j) {
339 			if (list[3][j] == 's') {
340 			    name_table[nn].ute_args |= (1U << j);
341 			}
342 		    }
343 		}
344 	    }
345 	} else {
346 	    fprintf(stderr, "Unknown type: %s\n", list[2]);
347 	    exit(EXIT_FAILURE);
348 	}
349 	n++;
350     }
351     if (tablesize > tableused)
352 	tablesize = tableused;
353     _nc_make_hash_table(name_table, hash_table, tablesize);
354 
355     /*
356      * Write the compiled tables to standard output
357      */
358     if (bigstring) {
359 	int len = 0;
360 	int nxt;
361 
362 	printf("static const char %s_names_text[] = \\\n", root_name);
363 	for (n = 0; n < tablesize; n++) {
364 	    nxt = (int) strlen(name_table[n].ute_name) + 5;
365 	    if (nxt + len > 72) {
366 		printf("\\\n");
367 		len = 0;
368 	    }
369 	    printf("\"%s\\0\" ", name_table[n].ute_name);
370 	    len += nxt;
371 	}
372 	printf(";\n\n");
373 
374 	len = 0;
375 	printf("static %s_table_data const %s_names_data[] =\n",
376 	       table_name,
377 	       root_name);
378 	printf("%s\n", L_BRACE);
379 	for (n = 0; n < tablesize; n++) {
380 	    printf("\t%s %15d,\t%10s,", L_BRACE, len, GetType(n));
381 	    if (is_user)
382 		printf("\t%d,%d,",
383 		       name_table[n].ute_argc,
384 		       name_table[n].ute_args);
385 	    printf("\t%3d, %3d %s%c\n",
386 		   name_table[n].ute_index,
387 		   name_table[n].ute_link,
388 		   R_BRACE,
389 		   n < tablesize - 1 ? ',' : ' ');
390 	    len += (int) strlen(name_table[n].ute_name) + 1;
391 	}
392 	printf("%s;\n\n", R_BRACE);
393 	printf("static struct %s_table_entry *_nc_%s_table = 0;\n\n",
394 	       table_name,
395 	       root_name);
396     } else {
397 
398 	printf("static struct %s_table_entry const _nc_%s_table[] =\n",
399 	       table_name,
400 	       root_name);
401 	printf("%s\n", L_BRACE);
402 	for (n = 0; n < tablesize; n++) {
403 	    _nc_SPRINTF(buffer, _nc_SLIMIT(sizeof(buffer)) "\"%s\"",
404 			name_table[n].ute_name);
405 	    printf("\t%s %15s,\t%10s,", L_BRACE, buffer, GetType(n));
406 	    if (is_user)
407 		printf("\t%d,%d,",
408 		       name_table[n].ute_argc,
409 		       name_table[n].ute_args);
410 	    printf("\t%3d, %3d %s%c\n",
411 		   name_table[n].ute_index,
412 		   name_table[n].ute_link,
413 		   R_BRACE,
414 		   n < tablesize - 1 ? ',' : ' ');
415 	}
416 	printf("%s;\n\n", R_BRACE);
417     }
418 
419     printf("static const HashValue _nc_%s_hash_table[%d] =\n",
420 	   root_name,
421 	   HASHTABSIZE + 1);
422     printf("%s\n", L_BRACE);
423     for (n = 0; n < HASHTABSIZE; n++) {
424 	printf("\t%3d,\n", hash_table[n]);
425     }
426     printf("\t0\t/* base-of-table */\n");
427     printf("%s;\n\n", R_BRACE);
428 
429     if (!is_user) {
430 	printf("#if (BOOLCOUNT!=%d)||(NUMCOUNT!=%d)||(STRCOUNT!=%d)\n",
431 	       BoolCount, NumCount, StrCount);
432 	printf("#error\t--> term.h and comp_captab.c disagree about the <--\n");
433 	printf("#error\t--> numbers of booleans, numbers and/or strings <--\n");
434 	printf("#endif\n\n");
435     }
436 
437     free(hash_table);
438     for (n = 0; (n < tablesize); ++n) {
439 	free((void *) name_table[n].ute_name);
440     }
441     free(name_table);
442     parse_columns(0);
443 
444     return EXIT_SUCCESS;
445 }
446